From 3934788a7b4df71f8bd7a2a1f1c0480f06a2076e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 10 Sep 2017 19:02:25 -0700 Subject: [PATCH 0001/2177] net: tcp_input: Neaten DBGUNDO Move the #ifdef into the static void function so that the use of DBGUNDO is validated when FASTRETRANS_DEBUG <= 1. Remove the now unnecessary #else and #define DBGUNDO. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5d7656beeee..bddf724f5c02 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2333,9 +2333,9 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -#if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { +#if FASTRETRANS_DEBUG > 1 struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -2357,10 +2357,8 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->packets_out); } #endif -} -#else -#define DBGUNDO(x...) do { } while (0) #endif +} static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { From ab076b94c6a3cfc4e5651d559750220543394871 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Tue, 12 Sep 2017 09:32:45 +0800 Subject: [PATCH 0002/2177] dummy: declare dummy devices as enumerated devices Dummy device name is enumerated by the kernel, let user space be aware of the naming scheme used by dummy devices: (visible in /sys/class/net//name_assign_type). Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d0a1f9ce3168..e31ab3b94c6f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -388,7 +388,7 @@ static int __init dummy_init_one(void) int err; dev_dummy = alloc_netdev(sizeof(struct dummy_priv), - "dummy%d", NET_NAME_UNKNOWN, dummy_setup); + "dummy%d", NET_NAME_ENUM, dummy_setup); if (!dev_dummy) return -ENOMEM; From 013955a6556766a76f9f2cc31e740fc6db6ecff4 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 12 Sep 2017 18:54:35 +0900 Subject: [PATCH 0003/2177] net: phy: realtek: rename RTL8211F_PAGE_SELECT to RTL821x_PAGE_SELECT This renames the definition of page select register from RTL8211F_PAGE_SELECT to RTL821x_PAGE_SELECT to use it across models. Signed-off-by: Kunihiko Hayashi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 9cbe645e3d89..99c3297c957e 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -22,11 +22,11 @@ #define RTL821x_INER 0x12 #define RTL821x_INER_INIT 0x6400 #define RTL821x_INSR 0x13 +#define RTL821x_PAGE_SELECT 0x1f #define RTL8211E_INER_LINK_STATUS 0x400 #define RTL8211F_INER_LINK_STATUS 0x0010 #define RTL8211F_INSR 0x1d -#define RTL8211F_PAGE_SELECT 0x1f #define RTL8211F_TX_DELAY 0x100 MODULE_DESCRIPTION("Realtek PHY driver"); @@ -46,10 +46,10 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) { int err; - phy_write(phydev, RTL8211F_PAGE_SELECT, 0xa43); + phy_write(phydev, RTL821x_PAGE_SELECT, 0xa43); err = phy_read(phydev, RTL8211F_INSR); /* restore to default page 0 */ - phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0); + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); return (err < 0) ? err : 0; } @@ -102,7 +102,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) if (ret < 0) return ret; - phy_write(phydev, RTL8211F_PAGE_SELECT, 0xd08); + phy_write(phydev, RTL821x_PAGE_SELECT, 0xd08); reg = phy_read(phydev, 0x11); /* enable TX-delay for rgmii-id and rgmii-txid, otherwise disable it */ @@ -114,7 +114,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) phy_write(phydev, 0x11, reg); /* restore to default page 0 */ - phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0); + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); return 0; } From 513588dd44b09bb5fdd5066a4fbc1e7443b86d1c Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Tue, 12 Sep 2017 18:54:36 +0900 Subject: [PATCH 0004/2177] net: phy: realtek: add RTL8201F phy-id and functions Add RTL8201F phy-id and the related functions to the driver. The original patch is as follows: https://patchwork.kernel.org/patch/2538341/ Signed-off-by: Jongsung Kim Signed-off-by: Jassi Brar Signed-off-by: Kunihiko Hayashi Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 99c3297c957e..d4670ecdb1ba 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -29,10 +29,22 @@ #define RTL8211F_INSR 0x1d #define RTL8211F_TX_DELAY 0x100 +#define RTL8201F_ISR 0x1e +#define RTL8201F_IER 0x13 + MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); MODULE_LICENSE("GPL"); +static int rtl8201_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, RTL8201F_ISR); + + return (err < 0) ? err : 0; +} + static int rtl821x_ack_interrupt(struct phy_device *phydev) { int err; @@ -54,6 +66,25 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) return (err < 0) ? err : 0; } +static int rtl8201_config_intr(struct phy_device *phydev) +{ + int err; + + /* switch to page 7 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x7); + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, RTL8201F_IER, + BIT(13) | BIT(12) | BIT(11)); + else + err = phy_write(phydev, RTL8201F_IER, 0); + + /* restore to default page 0 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + + return err; +} + static int rtl8211b_config_intr(struct phy_device *phydev) { int err; @@ -128,6 +159,18 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, + }, { + .phy_id = 0x001cc816, + .name = "RTL8201F 10/100Mbps Ethernet", + .phy_id_mask = 0x001fffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = &genphy_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &rtl8201_ack_interrupt, + .config_intr = &rtl8201_config_intr, + .suspend = genphy_suspend, + .resume = genphy_resume, }, { .phy_id = 0x001cc912, .name = "RTL8211B Gigabit Ethernet", @@ -181,6 +224,7 @@ static struct phy_driver realtek_drvs[] = { module_phy_driver(realtek_drvs); static struct mdio_device_id __maybe_unused realtek_tbl[] = { + { 0x001cc816, 0x001fffff }, { 0x001cc912, 0x001fffff }, { 0x001cc914, 0x001fffff }, { 0x001cc915, 0x001fffff }, From 785ec87483d1e24a012ecf642ee7d07c4118f142 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 12 Sep 2017 23:02:08 +0300 Subject: [PATCH 0005/2177] ravb: document R8A77970 bindings R-Car V3M (R8A77970) SoC also has the R-Car gen3 compatible EtherAVB device, so document the SoC specific bindings. Signed-off-by: Sergei Shtylyov Acked-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 16723535e1aa..2689211d324c 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -17,6 +17,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. + - "renesas,etheravb-r8a77970" for the R8A77970 SoC. - "renesas,etheravb-rcar-gen3" as a fallback for the above R-Car Gen3 devices. @@ -40,7 +41,7 @@ Optional properties: - interrupt-parent: the phandle for the interrupt controller that services interrupts for this device. - interrupt-names: A list of interrupt names. - For the R8A779[56] SoCs this property is mandatory; + For the R-Car Gen 3 SoCs this property is mandatory; it should include one entry per channel, named "ch%u", where %u is the channel number ranging from 0 to 24. For other SoCs this property is optional; if present From 7016e0627171878810798a842a416dddee4e3329 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 13 Sep 2017 13:58:15 -0700 Subject: [PATCH 0006/2177] net: Convert int functions to bool Global function ipv6_rcv_saddr_equal and static functions ipv6_rcv_saddr_equal and ipv4_rcv_saddr_equal currently return int. bool is slightly more descriptive for these functions so change their return type from int to bool. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- net/ipv4/inet_connection_sock.c | 36 ++++++++++++++++----------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f44ff2476758..87981cd63180 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,8 +94,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b9c64b40a83a..9707372b78ed 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, * and 0.0.0.0 equals to 0.0.0.0 only */ -static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, - const struct in6_addr *sk2_rcv_saddr6, - __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) +static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, + const struct in6_addr *sk2_rcv_saddr6, + __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk1_ipv6only, bool sk2_ipv6only, + bool match_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; + return true; if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; + return true; if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; + return true; if (sk2_rcv_saddr6 && ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) - return 1; + return true; - return 0; + return false; } #endif @@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, * match_wildcard == false: addresses must be exactly the same, i.e. * 0.0.0.0 only equals to 0.0.0.0 */ -static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) +static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk2_ipv6only, bool match_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) From f231c4178a655b09c1fe4dce4b09de7b867c20af Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 14 Sep 2017 09:06:38 +0900 Subject: [PATCH 0007/2177] dt-bindings: net: renesas-ravb: Add support for R8A77995 RAVB Add a new compatible string for the R8A77995 (R-Car D3) RAVB. Signed-off-by: Yoshihiro Shimoda Acked-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Acked-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 2689211d324c..c902261893b9 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -18,6 +18,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. - "renesas,etheravb-r8a77970" for the R8A77970 SoC. + - "renesas,etheravb-r8a77995" for the R8A77995 SoC. - "renesas,etheravb-rcar-gen3" as a fallback for the above R-Car Gen3 devices. From a45b3faf16f0cbc4ec48f9ec81e550d430199212 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:14 +0200 Subject: [PATCH 0008/2177] s390/qeth: add basic VNICC support VNIC Characteristics (VNICC) are features of HiperSockets that define how packets are handled by the underlying network hardware. For example, if the VNICC flooding is configured on a qeth device, ethernet frames to unknown destination MAC addresses are received. Currently, there is support for seven VNICCs: flooding, multicast flooding, receive broadcast, learning, takeover learning, takeover setvmac, bridge invisible. Also, six IPA commands exist for configuring VNICCs on a qeth device: query characteristics, query commands, enable characteristic, disable characteristic, set timeout, get timeout. This patch adds the basic code infrastructure for VNICC support to qeth. It allows querying VNICC support from the underlying hardware. To this end, it adds: * basic message formats for IPA commands * basic data structures * basic error handling * query characteristics IPA command support The query characteristics IPA command allows requesting the currently supported and currently enabled VNIC characteristics from the underlying hardware. Support for the other IPA commands and for the configuration of VNICCs is added in follow-up patches together with the respective user interface functions. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 7 ++ drivers/s390/net/qeth_core_mpc.c | 4 +- drivers/s390/net/qeth_core_mpc.h | 29 +++++++ drivers/s390/net/qeth_l2_main.c | 136 ++++++++++++++++++++++++++++++- 4 files changed, 172 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 59e09854c4f7..b96438df8fd4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -182,6 +182,12 @@ struct qeth_sbp_info { __u32 reflect_promisc_primary:1; }; +struct qeth_vnicc_info { + /* supported/currently configured VNICCs; updated in IPA exchanges */ + u32 sup_chars; + u32 cur_chars; +}; + static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, enum qeth_ipa_funcs func) { @@ -673,6 +679,7 @@ struct qeth_card_options { struct qeth_routing_info route6; struct qeth_ipa_info ipa6; struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */ + struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; int layer2; int performance_stats; diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 6dd7d05e5693..5f8a2b834d39 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -167,7 +167,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_IP_TABLE_FULL, "Add Addr IP Table Full - ipv6"}, {IPA_RC_UNKNOWN_ERROR, "IPA command failed - reason unknown"}, {IPA_RC_UNSUPPORTED_COMMAND, "Command not supported"}, - {IPA_RC_TRACE_ALREADY_ACTIVE, "trace already active"}, + {IPA_RC_VNICC_OOSEQ, "Command issued out of sequence"}, {IPA_RC_INVALID_FORMAT, "invalid format or length"}, {IPA_RC_DUP_IPV6_REMOTE, "ipv6 address already registered remote"}, {IPA_RC_SBP_IQD_NOT_CONFIGURED, "Not configured for bridgeport"}, @@ -193,6 +193,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_L2_INVALID_VLAN_ID, "L2 invalid vlan id"}, {IPA_RC_L2_DUP_VLAN_ID, "L2 duplicate vlan id"}, {IPA_RC_L2_VLAN_ID_NOT_FOUND, "L2 vlan id not found"}, + {IPA_RC_VNICC_VNICBP, "VNIC is BridgePort"}, {IPA_RC_SBP_OSA_NOT_CONFIGURED, "Not configured for bridgeport"}, {IPA_RC_SBP_OSA_OS_MISMATCH, "OS mismatch"}, {IPA_RC_SBP_OSA_ANO_DEV_PRIMARY, "Primary bridgeport exists already"}, @@ -253,6 +254,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELGMAC, "delgmac"}, {IPA_CMD_SETVLAN, "setvlan"}, {IPA_CMD_DELVLAN, "delvlan"}, + {IPA_CMD_VNICC, "vnic_characteristics"}, {IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"}, {IPA_CMD_SETCCID, "setccid"}, {IPA_CMD_DELCCID, "delccid"}, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 912e0107de8f..c13816b8c92f 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -90,6 +90,7 @@ enum qeth_ipa_cmds { IPA_CMD_DELGMAC = 0x24, IPA_CMD_SETVLAN = 0x25, IPA_CMD_DELVLAN = 0x26, + IPA_CMD_VNICC = 0x2a, IPA_CMD_SETBRIDGEPORT_OSA = 0x2b, IPA_CMD_SETCCID = 0x41, IPA_CMD_DELCCID = 0x42, @@ -165,6 +166,7 @@ enum qeth_ipa_return_codes { IPA_RC_L2_INVALID_VLAN_ID = 0x2015, IPA_RC_L2_DUP_VLAN_ID = 0x2016, IPA_RC_L2_VLAN_ID_NOT_FOUND = 0x2017, + IPA_RC_VNICC_VNICBP = 0x20B0, IPA_RC_SBP_OSA_NOT_CONFIGURED = 0x2B0C, IPA_RC_SBP_OSA_OS_MISMATCH = 0x2B10, IPA_RC_SBP_OSA_ANO_DEV_PRIMARY = 0x2B14, @@ -197,6 +199,9 @@ enum qeth_ipa_return_codes { IPA_RC_ENOMEM = 0xfffe, IPA_RC_FFFF = 0xffff }; +/* for VNIC Characteristics */ +#define IPA_RC_VNICC_OOSEQ 0x0005 + /* for SET_DIAGNOSTIC_ASSIST */ #define IPA_RC_INVALID_SUBCMD IPA_RC_IP_TABLE_FULL #define IPA_RC_HARDWARE_AUTH_ERROR IPA_RC_UNKNOWN_ERROR @@ -551,6 +556,29 @@ struct qeth_ipacmd_diagass { __u8 cdata[64]; } __attribute__ ((packed)); +/* VNIC Characteristics IPA Command: *****************************************/ +/* IPA commands/sub commands for VNICC */ +#define IPA_VNICC_QUERY_CHARS 0x00000000L + +/* VNICC header */ +struct qeth_ipacmd_vnicc_hdr { + u32 sup; + u32 cur; +}; + +/* VNICC sub command header */ +struct qeth_vnicc_sub_hdr { + u16 data_length; + u16 reserved; + u32 sub_command; +}; + +/* complete VNICC IPA command message */ +struct qeth_ipacmd_vnicc { + struct qeth_ipacmd_vnicc_hdr hdr; + struct qeth_vnicc_sub_hdr sub_hdr; +}; + /* SETBRIDGEPORT IPA Command: *********************************************/ enum qeth_ipa_sbp_cmd { IPA_SBP_QUERY_COMMANDS_SUPPORTED = 0x00000000L, @@ -692,6 +720,7 @@ struct qeth_ipa_cmd { struct qeth_ipacmd_diagass diagass; struct qeth_ipacmd_setbridgeport sbp; struct qeth_ipacmd_addr_change addrchange; + struct qeth_ipacmd_vnicc vnicc; } data; } __attribute__ ((packed)); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 760b023eae95..2fa273b40dd1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -33,6 +33,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); +static void qeth_l2_vnicc_init(struct qeth_card *card); static int qeth_l2_verify_dev(struct net_device *dev) { @@ -1045,9 +1046,14 @@ static int qeth_l2_start_ipassists(struct qeth_card *card) static void qeth_l2_trace_features(struct qeth_card *card) { - QETH_CARD_TEXT(card, 2, "l2featur"); + /* Set BridgePort features */ + QETH_CARD_TEXT(card, 2, "featuSBP"); QETH_CARD_HEX(card, 2, &card->options.sbp.supported_funcs, sizeof(card->options.sbp.supported_funcs)); + /* VNIC Characteristics features */ + QETH_CARD_TEXT(card, 2, "feaVNICC"); + QETH_CARD_HEX(card, 2, &card->options.vnicc.sup_chars, + sizeof(card->options.vnicc.sup_chars)); } static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) @@ -1072,8 +1078,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) if (card->options.sbp.supported_funcs) dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n"); - qeth_trace_features(card); - qeth_l2_trace_features(card); if (!card->dev && qeth_l2_setup_netdev(card)) { rc = -ENODEV; @@ -1090,6 +1094,12 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) } else card->info.hwtrap = 0; + /* for the rx_bcast characteristic, init VNICC after setmac */ + qeth_l2_vnicc_init(card); + + qeth_trace_features(card); + qeth_l2_trace_features(card); + qeth_l2_setup_bridgeport_attrs(card); card->state = CARD_STATE_HARDSETUP; @@ -2039,6 +2049,126 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) } EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set); +/* VNIC Characteristics support */ + +/* handle VNICC IPA command return codes; convert to error codes */ +static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) +{ + int rc; + + switch (ipa_rc) { + case IPA_RC_SUCCESS: + return ipa_rc; + case IPA_RC_L2_UNSUPPORTED_CMD: + case IPA_RC_NOTSUPP: + rc = -EOPNOTSUPP; + break; + case IPA_RC_VNICC_OOSEQ: + rc = -EALREADY; + break; + case IPA_RC_VNICC_VNICBP: + rc = -EBUSY; + break; + case IPA_RC_L2_ADDR_TABLE_FULL: + rc = -ENOSPC; + break; + case IPA_RC_L2_MAC_NOT_AUTH_BY_ADP: + rc = -EACCES; + break; + default: + rc = -EIO; + } + + QETH_CARD_TEXT_(card, 2, "err%04x", ipa_rc); + return rc; +} + +/* generic VNICC request call back control */ +struct _qeth_l2_vnicc_request_cbctl { + u32 sub_cmd; +}; + +/* generic VNICC request call back */ +static int qeth_l2_vnicc_request_cb(struct qeth_card *card, + struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + struct qeth_ipacmd_vnicc *rep = &cmd->data.vnicc; + + QETH_CARD_TEXT(card, 2, "vniccrcb"); + if (cmd->hdr.return_code) + return 0; + /* return results to caller */ + card->options.vnicc.sup_chars = rep->hdr.sup; + card->options.vnicc.cur_chars = rep->hdr.cur; + + return 0; +} + +/* generic VNICC request */ +static int qeth_l2_vnicc_request(struct qeth_card *card, + struct _qeth_l2_vnicc_request_cbctl *cbctl) +{ + struct qeth_ipacmd_vnicc *req; + struct qeth_cmd_buffer *iob; + struct qeth_ipa_cmd *cmd; + int rc; + + QETH_CARD_TEXT(card, 2, "vniccreq"); + + /* get new buffer for request */ + iob = qeth_get_ipacmd_buffer(card, IPA_CMD_VNICC, 0); + if (!iob) + return -ENOMEM; + + /* create header for request */ + cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE); + req = &cmd->data.vnicc; + + /* create sub command header for request */ + req->sub_hdr.data_length = sizeof(req->sub_hdr); + req->sub_hdr.sub_command = cbctl->sub_cmd; + + /* create sub command specific request fields */ + switch (cbctl->sub_cmd) { + case IPA_VNICC_QUERY_CHARS: + break; + default: + qeth_release_buffer(iob->channel, iob); + return -EOPNOTSUPP; + } + + /* send request */ + rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, + (void *) cbctl); + + return qeth_l2_vnicc_makerc(card, rc); +} + +/* VNICC query VNIC characteristics request */ +static int qeth_l2_vnicc_query_chars(struct qeth_card *card) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = IPA_VNICC_QUERY_CHARS; + + QETH_CARD_TEXT(card, 2, "vniccqch"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* (re-)initialize VNICC */ +static void qeth_l2_vnicc_init(struct qeth_card *card) +{ + QETH_CARD_TEXT(card, 2, "vniccini"); + /* initial query and storage of VNIC characteristics */ + if (qeth_l2_vnicc_query_chars(card)) { + card->options.vnicc.sup_chars = 0; + card->options.vnicc.cur_chars = 0; + } +} + module_init(qeth_l2_init); module_exit(qeth_l2_exit); MODULE_AUTHOR("Frank Blaschka "); From caa1f0b10d18f31c5491f84cb2e68a5d2047f437 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:15 +0200 Subject: [PATCH 0009/2177] s390/qeth: add VNICC enable/disable support HiperSocket devices allow enabling and disabling so called VNIC Characteristics (VNICC) that influence how the underlying hardware handles packets. These VNICCs are: * Flooding VNICC: Flooding allows specifying if packets to unknown destination MAC addresses are received by the qeth device. * Multicast flooding VNICC: Multicast flooding allows specifying if packets to multicast MAC addresses are received by the qeth device. * Learning VNICC: If learning is enabled on a qeth device, the device learns the source MAC addresses of outgoing packets and incoming packets to those learned MAC addresses are received. * Takeover setvmac VNICC: If takeover setvmac is configured on a qeth device, the MAC address of this device can be configured on a different qeth device with the setvmac IPA command. * Takeover by learning VNICC: If takeover learning is enabled on a qeth device, the MAC address of this device can be learned (learning VNICC) on a different qeth device. * BridgePort invisible VNICC: If BridgePort invisible is enabled on a qeth device, (1) packets from this device are not sent to a BridgePort enabled qeth device and (2) packets coming from a BridgePort enabled qeth device are not received by this device. * Receive broadcast VNICC: Receive broadcast allows configuring if a qeth device receives packets with the broadcast destination MAC address. This patch adds support for the IPA commands that are required to enable and disable these VNIC characteristics on qeth devices. As a prerequisite, it also adds the query commands IPA command. The query commands IPA command allows requesting the supported commands for each characteristic from the underlying hardware. Additionally, this patch provides users with a sysfs user interface to enable/disable the VNICCs mentioned above. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 6 + drivers/s390/net/qeth_core_mpc.h | 31 +++++ drivers/s390/net/qeth_l2.h | 4 + drivers/s390/net/qeth_l2_main.c | 207 +++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_sys.c | 158 ++++++++++++++++++++--- 5 files changed, 392 insertions(+), 14 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b96438df8fd4..2236c0c9744a 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -186,6 +186,12 @@ struct qeth_vnicc_info { /* supported/currently configured VNICCs; updated in IPA exchanges */ u32 sup_chars; u32 cur_chars; + /* supported commands: bitmasks which VNICCs support respective cmd */ + u32 set_char_sup; + /* characteristics wanted/configured by user */ + u32 wanted_chars; + /* has user explicitly enabled rx_bcast while online? */ + bool rx_bcast_enabled; }; static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index c13816b8c92f..7f67a81a2ae6 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -559,6 +559,22 @@ struct qeth_ipacmd_diagass { /* VNIC Characteristics IPA Command: *****************************************/ /* IPA commands/sub commands for VNICC */ #define IPA_VNICC_QUERY_CHARS 0x00000000L +#define IPA_VNICC_QUERY_CMDS 0x00000001L +#define IPA_VNICC_ENABLE 0x00000002L +#define IPA_VNICC_DISABLE 0x00000004L + +/* VNICC flags */ +#define QETH_VNICC_FLOODING 0x80000000 +#define QETH_VNICC_MCAST_FLOODING 0x40000000 +#define QETH_VNICC_LEARNING 0x20000000 +#define QETH_VNICC_TAKEOVER_SETVMAC 0x10000000 +#define QETH_VNICC_TAKEOVER_LEARNING 0x08000000 +#define QETH_VNICC_BRIDGE_INVISIBLE 0x04000000 +#define QETH_VNICC_RX_BCAST 0x02000000 + +/* VNICC default values */ +#define QETH_VNICC_ALL 0xff000000 +#define QETH_VNICC_DEFAULT QETH_VNICC_RX_BCAST /* VNICC header */ struct qeth_ipacmd_vnicc_hdr { @@ -573,10 +589,25 @@ struct qeth_vnicc_sub_hdr { u32 sub_command; }; +/* query supported commands for VNIC characteristic */ +struct qeth_vnicc_query_cmds { + u32 vnic_char; + u32 sup_cmds; +}; + +/* enable/disable VNIC characteristic */ +struct qeth_vnicc_set_char { + u32 vnic_char; +}; + /* complete VNICC IPA command message */ struct qeth_ipacmd_vnicc { struct qeth_ipacmd_vnicc_hdr hdr; struct qeth_vnicc_sub_hdr sub_hdr; + union { + struct qeth_vnicc_query_cmds query_cmds; + struct qeth_vnicc_set_char set_char; + }; }; /* SETBRIDGEPORT IPA Command: *********************************************/ diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 0d59f9a45ea9..0619018c76f9 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -14,6 +14,10 @@ int qeth_l2_create_device_attributes(struct device *); void qeth_l2_remove_device_attributes(struct device *); void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); +int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state); +int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state); +bool qeth_l2_vnicc_is_in_use(struct qeth_card *card); + struct qeth_mac { u8 mac_addr[OSA_ADDR_LEN]; u8 is_uc:1; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2fa273b40dd1..36a7fd7255e3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -33,6 +33,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); +static void qeth_l2_vnicc_set_defaults(struct qeth_card *card); static void qeth_l2_vnicc_init(struct qeth_card *card); static int qeth_l2_verify_dev(struct net_device *dev) @@ -920,6 +921,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) hash_init(card->mac_htable); card->options.layer2 = 1; card->info.hwtrap = 0; + qeth_l2_vnicc_set_defaults(card); return 0; } @@ -2049,6 +2051,12 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) } EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set); +static bool qeth_bridgeport_is_in_use(struct qeth_card *card) +{ + return (card->options.sbp.role || card->options.sbp.reflect_promisc || + card->options.sbp.hostnotification); +} + /* VNIC Characteristics support */ /* handle VNICC IPA command return codes; convert to error codes */ @@ -2086,6 +2094,12 @@ static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) /* generic VNICC request call back control */ struct _qeth_l2_vnicc_request_cbctl { u32 sub_cmd; + struct { + u32 vnic_char; + } param; + struct { + u32 *sup_cmds; + } result; }; /* generic VNICC request call back */ @@ -2093,6 +2107,8 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { + struct _qeth_l2_vnicc_request_cbctl *cbctl = + (struct _qeth_l2_vnicc_request_cbctl *) reply->param; struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_vnicc *rep = &cmd->data.vnicc; @@ -2103,6 +2119,9 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, card->options.vnicc.sup_chars = rep->hdr.sup; card->options.vnicc.cur_chars = rep->hdr.cur; + if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS) + *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds; + return 0; } @@ -2134,6 +2153,15 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, switch (cbctl->sub_cmd) { case IPA_VNICC_QUERY_CHARS: break; + case IPA_VNICC_QUERY_CMDS: + req->sub_hdr.data_length += sizeof(req->query_cmds); + req->query_cmds.vnic_char = cbctl->param.vnic_char; + break; + case IPA_VNICC_ENABLE: + case IPA_VNICC_DISABLE: + req->sub_hdr.data_length += sizeof(req->set_char); + req->set_char.vnic_char = cbctl->param.vnic_char; + break; default: qeth_release_buffer(iob->channel, iob); return -EOPNOTSUPP; @@ -2158,15 +2186,194 @@ static int qeth_l2_vnicc_query_chars(struct qeth_card *card) return qeth_l2_vnicc_request(card, &cbctl); } +/* VNICC query sub commands request */ +static int qeth_l2_vnicc_query_cmds(struct qeth_card *card, u32 vnic_char, + u32 *sup_cmds) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = IPA_VNICC_QUERY_CMDS; + cbctl.param.vnic_char = vnic_char; + cbctl.result.sup_cmds = sup_cmds; + + QETH_CARD_TEXT(card, 2, "vniccqcm"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* VNICC enable/disable characteristic request */ +static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char, + u32 cmd) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = cmd; + cbctl.param.vnic_char = vnic_char; + + QETH_CARD_TEXT(card, 2, "vniccedc"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* set current VNICC flag state; called from sysfs store function */ +int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) +{ + int rc = 0; + u32 cmd; + + QETH_CARD_TEXT(card, 2, "vniccsch"); + + /* do not change anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and enable/disable are supported */ + if (!(card->options.vnicc.sup_chars & vnicc) || + !(card->options.vnicc.set_char_sup & vnicc)) + return -EOPNOTSUPP; + + /* set enable/disable command and store wanted characteristic */ + if (state) { + cmd = IPA_VNICC_ENABLE; + card->options.vnicc.wanted_chars |= vnicc; + } else { + cmd = IPA_VNICC_DISABLE; + card->options.vnicc.wanted_chars &= ~vnicc; + } + + /* do we need to do anything? */ + if (card->options.vnicc.cur_chars == card->options.vnicc.wanted_chars) + return rc; + + /* if card is not ready, simply stop here */ + if (!qeth_card_hw_is_reachable(card)) { + if (state) + card->options.vnicc.cur_chars |= vnicc; + else + card->options.vnicc.cur_chars &= ~vnicc; + return rc; + } + + rc = qeth_l2_vnicc_set_char(card, vnicc, cmd); + if (rc) + card->options.vnicc.wanted_chars = + card->options.vnicc.cur_chars; + else if (state && vnicc == QETH_VNICC_RX_BCAST) + card->options.vnicc.rx_bcast_enabled = true; + + return rc; +} + +/* get current VNICC flag state; called from sysfs show function */ +int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccgch"); + + /* do not get anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic is supported */ + if (!(card->options.vnicc.sup_chars & vnicc)) + return -EOPNOTSUPP; + + /* if card is ready, query current VNICC state */ + if (qeth_card_hw_is_reachable(card)) + rc = qeth_l2_vnicc_query_chars(card); + + *state = (card->options.vnicc.cur_chars & vnicc) ? true : false; + return rc; +} + +/* check if VNICC is currently enabled */ +bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) +{ + /* if everything is turned off, VNICC is not active */ + if (!card->options.vnicc.cur_chars) + return false; + /* default values are only OK if rx_bcast was not enabled by user + * or the card is offline. + */ + if (card->options.vnicc.cur_chars == QETH_VNICC_DEFAULT) { + if (!card->options.vnicc.rx_bcast_enabled || + !qeth_card_hw_is_reachable(card)) + return false; + } + return true; +} + +/* recover user characteristic setting */ +static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, + bool enable) +{ + u32 cmd = enable ? IPA_VNICC_ENABLE : IPA_VNICC_DISABLE; + + if (card->options.vnicc.sup_chars & vnicc && + card->options.vnicc.set_char_sup & vnicc && + !qeth_l2_vnicc_set_char(card, vnicc, cmd)) + return false; + card->options.vnicc.wanted_chars &= ~vnicc; + card->options.vnicc.wanted_chars |= QETH_VNICC_DEFAULT & vnicc; + return true; +} + /* (re-)initialize VNICC */ static void qeth_l2_vnicc_init(struct qeth_card *card) { + unsigned int chars_len, i; + unsigned long chars_tmp; + u32 sup_cmds, vnicc; + bool enable, error; + QETH_CARD_TEXT(card, 2, "vniccini"); + /* reset rx_bcast */ + card->options.vnicc.rx_bcast_enabled = 0; /* initial query and storage of VNIC characteristics */ if (qeth_l2_vnicc_query_chars(card)) { + if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT) + dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); + /* fail quietly if user didn't change the default config */ card->options.vnicc.sup_chars = 0; card->options.vnicc.cur_chars = 0; + card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; + return; } + /* get supported commands for each supported characteristic */ + chars_tmp = card->options.vnicc.sup_chars; + chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE; + for_each_set_bit(i, &chars_tmp, chars_len) { + vnicc = BIT(i); + qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); + if (!(sup_cmds & IPA_VNICC_ENABLE) || + !(sup_cmds & IPA_VNICC_DISABLE)) + card->options.vnicc.set_char_sup &= ~vnicc; + } + /* enforce assumed default values and recover settings, if changed */ + error = false; + chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; + chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; + chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; + for_each_set_bit(i, &chars_tmp, chars_len) { + vnicc = BIT(i); + enable = card->options.vnicc.wanted_chars & vnicc; + error |= qeth_l2_vnicc_recover_char(card, vnicc, enable); + } + if (error) + dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); +} + +/* configure default values of VNIC characteristics */ +static void qeth_l2_vnicc_set_defaults(struct qeth_card *card) +{ + /* characteristics values */ + card->options.vnicc.sup_chars = QETH_VNICC_ALL; + card->options.vnicc.cur_chars = QETH_VNICC_DEFAULT; + /* supported commands */ + card->options.vnicc.set_char_sup = QETH_VNICC_ALL; + /* settings wanted by users */ + card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; } module_init(qeth_l2_init); diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 9696baa49e2d..01936f7dbe7e 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -20,6 +20,9 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + if (qeth_card_hw_is_reachable(card) && card->options.sbp.supported_funcs) rc = qeth_bridgeport_query_ports(card, @@ -60,6 +63,11 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, static ssize_t qeth_bridge_port_role_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct qeth_card *card = dev_get_drvdata(dev); + + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + return qeth_bridge_port_role_state_show(dev, attr, buf, 0); } @@ -83,7 +91,10 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (card->options.sbp.reflect_promisc) /* Forbid direct manipulation */ + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (card->options.sbp.reflect_promisc) + /* Forbid direct manipulation */ rc = -EPERM; else if (qeth_card_hw_is_reachable(card)) { rc = qeth_bridgeport_setrole(card, role); @@ -103,6 +114,11 @@ static DEVICE_ATTR(bridge_role, 0644, qeth_bridge_port_role_show, static ssize_t qeth_bridge_port_state_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct qeth_card *card = dev_get_drvdata(dev); + + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + return qeth_bridge_port_role_state_show(dev, attr, buf, 1); } @@ -118,6 +134,9 @@ static ssize_t qeth_bridgeport_hostnotification_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + enabled = card->options.sbp.hostnotification; return sprintf(buf, "%d\n", enabled); @@ -142,7 +161,9 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (qeth_card_hw_is_reachable(card)) { + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (qeth_card_hw_is_reachable(card)) { rc = qeth_bridgeport_an_set(card, enable); if (!rc) card->options.sbp.hostnotification = enable; @@ -167,6 +188,9 @@ static ssize_t qeth_bridgeport_reflect_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + if (card->options.sbp.reflect_promisc) { if (card->options.sbp.reflect_promisc_primary) state = "primary"; @@ -202,7 +226,9 @@ static ssize_t qeth_bridgeport_reflect_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (card->options.sbp.role != QETH_SBP_ROLE_NONE) + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (card->options.sbp.role != QETH_SBP_ROLE_NONE) rc = -EPERM; else { card->options.sbp.reflect_promisc = enable; @@ -231,16 +257,6 @@ static struct attribute_group qeth_l2_bridgeport_attr_group = { .attrs = qeth_l2_bridgeport_attrs, }; -int qeth_l2_create_device_attributes(struct device *dev) -{ - return sysfs_create_group(&dev->kobj, &qeth_l2_bridgeport_attr_group); -} - -void qeth_l2_remove_device_attributes(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &qeth_l2_bridgeport_attr_group); -} - /** * qeth_l2_setup_bridgeport_attrs() - set/restore attrs when turning online. * @card: qeth_card structure pointer @@ -270,10 +286,124 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card) qeth_bridgeport_an_set(card, 0); } +/* VNIC CHARS support */ + +/* convert sysfs attr name to VNIC characteristic */ +static u32 qeth_l2_vnicc_sysfs_attr_to_char(const char *attr_name) +{ + if (sysfs_streq(attr_name, "flooding")) + return QETH_VNICC_FLOODING; + else if (sysfs_streq(attr_name, "mcast_flooding")) + return QETH_VNICC_MCAST_FLOODING; + else if (sysfs_streq(attr_name, "learning")) + return QETH_VNICC_LEARNING; + else if (sysfs_streq(attr_name, "takeover_setvmac")) + return QETH_VNICC_TAKEOVER_SETVMAC; + else if (sysfs_streq(attr_name, "takeover_learning")) + return QETH_VNICC_TAKEOVER_LEARNING; + else if (sysfs_streq(attr_name, "bridge_invisible")) + return QETH_VNICC_BRIDGE_INVISIBLE; + else if (sysfs_streq(attr_name, "rx_bcast")) + return QETH_VNICC_RX_BCAST; + + return 0; +} + +/* get current setting of characteristic */ +static ssize_t qeth_vnicc_char_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct qeth_card *card = dev_get_drvdata(dev); + bool state; + u32 vnicc; + int rc; + + if (!card) + return -EINVAL; + + vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name); + rc = qeth_l2_vnicc_get_state(card, vnicc, &state); + + if (rc == -EBUSY) + return sprintf(buf, "n/a (BridgePort)\n"); + if (rc == -EOPNOTSUPP) + return sprintf(buf, "n/a\n"); + return rc ? rc : sprintf(buf, "%d\n", state); +} + +/* change setting of characteristic */ +static ssize_t qeth_vnicc_char_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct qeth_card *card = dev_get_drvdata(dev); + bool state; + u32 vnicc; + int rc; + + if (!card) + return -EINVAL; + + if (kstrtobool(buf, &state)) + return -EINVAL; + + vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name); + mutex_lock(&card->conf_mutex); + rc = qeth_l2_vnicc_set_state(card, vnicc, state); + mutex_unlock(&card->conf_mutex); + + return rc ? rc : count; +} + +static DEVICE_ATTR(flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(mcast_flooding, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(learning, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(takeover_setvmac, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(takeover_learning, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(bridge_invisible, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(rx_bcast, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); + +static struct attribute *qeth_l2_vnicc_attrs[] = { + &dev_attr_flooding.attr, + &dev_attr_mcast_flooding.attr, + &dev_attr_learning.attr, + &dev_attr_takeover_setvmac.attr, + &dev_attr_takeover_learning.attr, + &dev_attr_bridge_invisible.attr, + &dev_attr_rx_bcast.attr, + NULL, +}; + +static struct attribute_group qeth_l2_vnicc_attr_group = { + .attrs = qeth_l2_vnicc_attrs, + .name = "vnicc", +}; + +static const struct attribute_group *qeth_l2_only_attr_groups[] = { + &qeth_l2_bridgeport_attr_group, + &qeth_l2_vnicc_attr_group, + NULL, +}; + +int qeth_l2_create_device_attributes(struct device *dev) +{ + return sysfs_create_groups(&dev->kobj, qeth_l2_only_attr_groups); +} + +void qeth_l2_remove_device_attributes(struct device *dev) +{ + sysfs_remove_groups(&dev->kobj, qeth_l2_only_attr_groups); +} + const struct attribute_group *qeth_l2_attr_groups[] = { &qeth_device_attr_group, &qeth_device_blkt_group, - /* l2 specific, see l2_{create,remove}_device_attributes(): */ + /* l2 specific, see qeth_l2_only_attr_groups: */ &qeth_l2_bridgeport_attr_group, + &qeth_l2_vnicc_attr_group, NULL, }; From 349d13d5ab58668ab7c8fadadf292430170c919e Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:16 +0200 Subject: [PATCH 0010/2177] s390/qeth: add VNICC get/set timeout support HiperSockets allow configuring so called VNIC Characteristics (VNICC) that influence how the underlying hardware handles packets. For VNICCs, additional commands for getting and setting timeouts are available. Currently, the learning VNICC uses these commands. * Learning VNICC: If learning is enabled on a qeth device, the device learns the source MAC addresses of outgoing packets and incoming packets to those learned MAC addresses are received. For learning, the timeout specifies the idle period in seconds, after which the underlying hardware removes a learned MAC address again. This patch adds support for the IPA commands that are required to get and set the current timeout values for the learning VNIC characteristic. Also, it introduces the sysfs interface that allows users to configure the timeout. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 + drivers/s390/net/qeth_core_mpc.h | 11 +++ drivers/s390/net/qeth_l2.h | 2 + drivers/s390/net/qeth_l2_main.c | 135 +++++++++++++++++++++++++++++-- drivers/s390/net/qeth_l2_sys.c | 44 ++++++++++ 5 files changed, 190 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 2236c0c9744a..e3d3609cd9e7 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -188,6 +188,9 @@ struct qeth_vnicc_info { u32 cur_chars; /* supported commands: bitmasks which VNICCs support respective cmd */ u32 set_char_sup; + u32 getset_timeout_sup; + /* timeout value for the learning characteristic */ + u32 learning_timeout; /* characteristics wanted/configured by user */ u32 wanted_chars; /* has user explicitly enabled rx_bcast while online? */ diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 7f67a81a2ae6..2f1f0da3d089 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -562,6 +562,8 @@ struct qeth_ipacmd_diagass { #define IPA_VNICC_QUERY_CMDS 0x00000001L #define IPA_VNICC_ENABLE 0x00000002L #define IPA_VNICC_DISABLE 0x00000004L +#define IPA_VNICC_SET_TIMEOUT 0x00000008L +#define IPA_VNICC_GET_TIMEOUT 0x00000010L /* VNICC flags */ #define QETH_VNICC_FLOODING 0x80000000 @@ -575,6 +577,8 @@ struct qeth_ipacmd_diagass { /* VNICC default values */ #define QETH_VNICC_ALL 0xff000000 #define QETH_VNICC_DEFAULT QETH_VNICC_RX_BCAST +/* default VNICC timeout in seconds */ +#define QETH_VNICC_DEFAULT_TIMEOUT 600 /* VNICC header */ struct qeth_ipacmd_vnicc_hdr { @@ -600,6 +604,12 @@ struct qeth_vnicc_set_char { u32 vnic_char; }; +/* get/set timeout for VNIC characteristic */ +struct qeth_vnicc_getset_timeout { + u32 vnic_char; + u32 timeout; +}; + /* complete VNICC IPA command message */ struct qeth_ipacmd_vnicc { struct qeth_ipacmd_vnicc_hdr hdr; @@ -607,6 +617,7 @@ struct qeth_ipacmd_vnicc { union { struct qeth_vnicc_query_cmds query_cmds; struct qeth_vnicc_set_char set_char; + struct qeth_vnicc_getset_timeout getset_timeout; }; }; diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 0619018c76f9..241df6b98ab4 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -16,6 +16,8 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state); int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state); +int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout); +int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout); bool qeth_l2_vnicc_is_in_use(struct qeth_card *card); struct qeth_mac { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 36a7fd7255e3..25a0f381bcd5 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -35,6 +35,8 @@ static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_l2_vnicc_set_defaults(struct qeth_card *card); static void qeth_l2_vnicc_init(struct qeth_card *card); +static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, + u32 *timeout); static int qeth_l2_verify_dev(struct net_device *dev) { @@ -2096,9 +2098,13 @@ struct _qeth_l2_vnicc_request_cbctl { u32 sub_cmd; struct { u32 vnic_char; + u32 timeout; } param; struct { - u32 *sup_cmds; + union{ + u32 *sup_cmds; + u32 *timeout; + }; } result; }; @@ -2122,6 +2128,9 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS) *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds; + if (cbctl->sub_cmd == IPA_VNICC_GET_TIMEOUT) + *cbctl->result.timeout = rep->getset_timeout.timeout; + return 0; } @@ -2162,6 +2171,13 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, req->sub_hdr.data_length += sizeof(req->set_char); req->set_char.vnic_char = cbctl->param.vnic_char; break; + case IPA_VNICC_SET_TIMEOUT: + req->getset_timeout.timeout = cbctl->param.timeout; + /* fallthrough */ + case IPA_VNICC_GET_TIMEOUT: + req->sub_hdr.data_length += sizeof(req->getset_timeout); + req->getset_timeout.vnic_char = cbctl->param.vnic_char; + break; default: qeth_release_buffer(iob->channel, iob); return -EOPNOTSUPP; @@ -2215,6 +2231,24 @@ static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char, return qeth_l2_vnicc_request(card, &cbctl); } +/* VNICC get/set timeout for characteristic request */ +static int qeth_l2_vnicc_getset_timeout(struct qeth_card *card, u32 vnicc, + u32 cmd, u32 *timeout) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = cmd; + cbctl.param.vnic_char = vnicc; + if (cmd == IPA_VNICC_SET_TIMEOUT) + cbctl.param.timeout = *timeout; + if (cmd == IPA_VNICC_GET_TIMEOUT) + cbctl.result.timeout = timeout; + + QETH_CARD_TEXT(card, 2, "vniccgst"); + return qeth_l2_vnicc_request(card, &cbctl); +} + /* set current VNICC flag state; called from sysfs store function */ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) { @@ -2258,8 +2292,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) if (rc) card->options.vnicc.wanted_chars = card->options.vnicc.cur_chars; - else if (state && vnicc == QETH_VNICC_RX_BCAST) - card->options.vnicc.rx_bcast_enabled = true; + else { + /* successful online VNICC change; handle special cases */ + if (state && vnicc == QETH_VNICC_RX_BCAST) + card->options.vnicc.rx_bcast_enabled = true; + if (!state && vnicc == QETH_VNICC_LEARNING) + qeth_l2_vnicc_recover_timeout(card, vnicc, + &card->options.vnicc.learning_timeout); + } return rc; } @@ -2287,6 +2327,70 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) return rc; } +/* set VNICC timeout; called from sysfs store function. Currently, only learning + * supports timeout + */ +int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccsto"); + + /* do not change anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and set_timeout are supported */ + if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || + !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) + return -EOPNOTSUPP; + + /* do we need to do anything? */ + if (card->options.vnicc.learning_timeout == timeout) + return rc; + + /* if card is not ready, simply store the value internally and return */ + if (!qeth_card_hw_is_reachable(card)) { + card->options.vnicc.learning_timeout = timeout; + return rc; + } + + /* send timeout value to card; if successful, store value internally */ + rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING, + IPA_VNICC_SET_TIMEOUT, &timeout); + if (!rc) + card->options.vnicc.learning_timeout = timeout; + + return rc; +} + +/* get current VNICC timeout; called from sysfs show function. Currently, only + * learning supports timeout + */ +int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccgto"); + + /* do not get anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and get_timeout are supported */ + if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || + !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) + return -EOPNOTSUPP; + /* if card is ready, get timeout. Otherwise, just return stored value */ + *timeout = card->options.vnicc.learning_timeout; + if (qeth_card_hw_is_reachable(card)) + rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING, + IPA_VNICC_GET_TIMEOUT, + timeout); + + return rc; +} + /* check if VNICC is currently enabled */ bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) { @@ -2304,6 +2408,19 @@ bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) return true; } +/* recover user timeout setting */ +static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, + u32 *timeout) +{ + if (card->options.vnicc.sup_chars & vnicc && + card->options.vnicc.getset_timeout_sup & vnicc && + !qeth_l2_vnicc_getset_timeout(card, vnicc, IPA_VNICC_SET_TIMEOUT, + timeout)) + return false; + *timeout = QETH_VNICC_DEFAULT_TIMEOUT; + return true; +} + /* recover user characteristic setting */ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, bool enable) @@ -2322,6 +2439,7 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, /* (re-)initialize VNICC */ static void qeth_l2_vnicc_init(struct qeth_card *card) { + u32 *timeout = &card->options.vnicc.learning_timeout; unsigned int chars_len, i; unsigned long chars_tmp; u32 sup_cmds, vnicc; @@ -2332,7 +2450,8 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) card->options.vnicc.rx_bcast_enabled = 0; /* initial query and storage of VNIC characteristics */ if (qeth_l2_vnicc_query_chars(card)) { - if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT) + if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT || + *timeout != QETH_VNICC_DEFAULT_TIMEOUT) dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); /* fail quietly if user didn't change the default config */ card->options.vnicc.sup_chars = 0; @@ -2346,12 +2465,16 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); + if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) || + !(sup_cmds & IPA_VNICC_GET_TIMEOUT)) + card->options.vnicc.getset_timeout_sup &= ~vnicc; if (!(sup_cmds & IPA_VNICC_ENABLE) || !(sup_cmds & IPA_VNICC_DISABLE)) card->options.vnicc.set_char_sup &= ~vnicc; } /* enforce assumed default values and recover settings, if changed */ - error = false; + error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, + timeout); chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; @@ -2370,8 +2493,10 @@ static void qeth_l2_vnicc_set_defaults(struct qeth_card *card) /* characteristics values */ card->options.vnicc.sup_chars = QETH_VNICC_ALL; card->options.vnicc.cur_chars = QETH_VNICC_DEFAULT; + card->options.vnicc.learning_timeout = QETH_VNICC_DEFAULT_TIMEOUT; /* supported commands */ card->options.vnicc.set_char_sup = QETH_VNICC_ALL; + card->options.vnicc.getset_timeout_sup = QETH_VNICC_LEARNING; /* settings wanted by users */ card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; } diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 01936f7dbe7e..4608daedb204 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -309,6 +309,47 @@ static u32 qeth_l2_vnicc_sysfs_attr_to_char(const char *attr_name) return 0; } +/* get current timeout setting */ +static ssize_t qeth_vnicc_timeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct qeth_card *card = dev_get_drvdata(dev); + u32 timeout; + int rc; + + if (!card) + return -EINVAL; + + rc = qeth_l2_vnicc_get_timeout(card, &timeout); + if (rc == -EBUSY) + return sprintf(buf, "n/a (BridgePort)\n"); + if (rc == -EOPNOTSUPP) + return sprintf(buf, "n/a\n"); + return rc ? rc : sprintf(buf, "%d\n", timeout); +} + +/* change timeout setting */ +static ssize_t qeth_vnicc_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct qeth_card *card = dev_get_drvdata(dev); + u32 timeout; + int rc; + + if (!card) + return -EINVAL; + + rc = kstrtou32(buf, 10, &timeout); + if (rc) + return rc; + + mutex_lock(&card->conf_mutex); + rc = qeth_l2_vnicc_set_timeout(card, timeout); + mutex_unlock(&card->conf_mutex); + return rc ? rc : count; +} + /* get current setting of characteristic */ static ssize_t qeth_vnicc_char_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -359,6 +400,8 @@ static DEVICE_ATTR(flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(mcast_flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(learning, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(learning_timeout, 0644, qeth_vnicc_timeout_show, + qeth_vnicc_timeout_store); static DEVICE_ATTR(takeover_setvmac, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(takeover_learning, 0644, qeth_vnicc_char_show, @@ -371,6 +414,7 @@ static struct attribute *qeth_l2_vnicc_attrs[] = { &dev_attr_flooding.attr, &dev_attr_mcast_flooding.attr, &dev_attr_learning.attr, + &dev_attr_learning_timeout.attr, &dev_attr_takeover_setvmac.attr, &dev_attr_takeover_learning.attr, &dev_attr_bridge_invisible.attr, From 9627923062b6a5c40964b188bb222ec8c02fd2d6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:17 +0200 Subject: [PATCH 0011/2177] s390/qeth: remove unused code in qdio_establish_cq() Storing the number of input buffers into 'i' has no effect, it is immediately re-assigned in the next line. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index bae7440abc01..77032801e30b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4923,7 +4923,6 @@ static void qeth_qdio_establish_cq(struct qeth_card *card, if (card->options.cq == QETH_CQ_ENABLED) { int offset = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); - i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { in_sbal_ptrs[offset + i] = (struct qdio_buffer *) virt_to_phys(card->qdio.c_q->bufs[i].buffer); From 7c2e9ba373264b29a35d6df06cc5fb125d189b90 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:18 +0200 Subject: [PATCH 0012/2177] s390/qeth: don't take queue lock in send_packet_fast() Locking the output queue prior to TX is needed on OSA devices, to synchronize against a packing flush from the TX completion code (via qeth_check_outbound_queue()). But send_packet_fast() is only used for IQDs, which don't do packing. So remove the locking, and apply some easy cleanups. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +-- drivers/s390/net/qeth_core_main.c | 22 +++++----------------- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 4 ++-- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e3d3609cd9e7..b7e8531625aa 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -962,8 +962,7 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); -int qeth_do_send_packet_fast(struct qeth_card *card, - struct qeth_qdio_out_q *queue, struct sk_buff *skb, +int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 77032801e30b..68e118f1202e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4040,35 +4040,23 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, return flush_cnt; } -int qeth_do_send_packet_fast(struct qeth_card *card, - struct qeth_qdio_out_q *queue, struct sk_buff *skb, +int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len) { - struct qeth_qdio_out_buffer *buffer; - int index; + int index = queue->next_buf_to_fill; + struct qeth_qdio_out_buffer *buffer = queue->bufs[index]; - /* spin until we get the queue ... */ - while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, - QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); - /* ... now we've got the queue */ - index = queue->next_buf_to_fill; - buffer = queue->bufs[queue->next_buf_to_fill]; /* * check if buffer is empty to make sure that we do not 'overtake' * ourselves and try to fill a buffer that is already primed */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) - goto out; - queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % - QDIO_MAX_BUFFERS_PER_Q; - atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); + return -EBUSY; + queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q; qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); return 0; -out: - atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); - return -EBUSY; } EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 25a0f381bcd5..009de7248cc7 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -698,7 +698,7 @@ static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb, rc = -E2BIG; goto out; } - rc = qeth_do_send_packet_fast(card, queue, skb, hdr, data_offset, + rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset, sizeof(*hdr) + data_offset); out: if (rc) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ab661a431f7c..733f94cbd4d4 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2771,8 +2771,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len, hd_len, elements); } else - rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, - data_offset, hd_len); + rc = qeth_do_send_packet_fast(queue, new_skb, hdr, data_offset, + hd_len); if (!rc) { card->stats.tx_packets++; From ab25a5014eddb4da94b7acf0d214265964c5fd21 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:19 +0200 Subject: [PATCH 0013/2177] s390/qeth: simplify L3 sysfs group management Use the right helpers to create/remove all attribute groups in one go. Suggested-by: Hans Wippel Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 45 +++++++++------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index e8bcc314cc5f..0ad0f7f4b23f 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -1028,52 +1028,31 @@ static const struct attribute_group qeth_device_rxip_group = { .attrs = qeth_rxip_device_attrs, }; +static const struct attribute_group *qeth_l3_only_attr_groups[] = { + &qeth_l3_device_attr_group, + &qeth_device_ipato_group, + &qeth_device_vipa_group, + &qeth_device_rxip_group, + NULL, +}; + int qeth_l3_create_device_attributes(struct device *dev) { - int ret; - - ret = sysfs_create_group(&dev->kobj, &qeth_l3_device_attr_group); - if (ret) - return ret; - - ret = sysfs_create_group(&dev->kobj, &qeth_device_ipato_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - return ret; - } - - ret = sysfs_create_group(&dev->kobj, &qeth_device_vipa_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - return ret; - } - - ret = sysfs_create_group(&dev->kobj, &qeth_device_rxip_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group); - return ret; - } - return 0; + return sysfs_create_groups(&dev->kobj, qeth_l3_only_attr_groups); } void qeth_l3_remove_device_attributes(struct device *dev) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group); - sysfs_remove_group(&dev->kobj, &qeth_device_rxip_group); + sysfs_remove_groups(&dev->kobj, qeth_l3_only_attr_groups); } const struct attribute_group *qeth_l3_attr_groups[] = { &qeth_device_attr_group, &qeth_device_blkt_group, - /* l3 specific, see l3_{create,remove}_device_attributes(): */ + /* l3 specific, see qeth_l3_only_attr_groups: */ &qeth_l3_device_attr_group, &qeth_device_ipato_group, &qeth_device_vipa_group, &qeth_device_rxip_group, -NULL, + NULL, }; From 2aa4867198c22ecfae1c12f0c96c4c25fea01cb5 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:20 +0200 Subject: [PATCH 0014/2177] s390/qeth: translate SETVLAN/DELVLAN errors Properly return any error encountered during VLAN processing to the the caller. Resulting change in behaviour: if SETVLAN fails while registering a new VLAN ID, the stack no longer creates the corresponding vlan device. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_mpc.h | 1 + drivers/s390/net/qeth_l2_main.c | 49 +++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 2f1f0da3d089..6a7654df6e78 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -166,6 +166,7 @@ enum qeth_ipa_return_codes { IPA_RC_L2_INVALID_VLAN_ID = 0x2015, IPA_RC_L2_DUP_VLAN_ID = 0x2016, IPA_RC_L2_VLAN_ID_NOT_FOUND = 0x2017, + IPA_RC_L2_VLAN_ID_NOT_ALLOWED = 0x2050, IPA_RC_VNICC_VNICBP = 0x20B0, IPA_RC_SBP_OSA_NOT_CONFIGURED = 0x2B0C, IPA_RC_SBP_OSA_OS_MISMATCH = 0x2B10, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 009de7248cc7..7b61c2ef4c74 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -78,7 +78,7 @@ static struct net_device *qeth_l2_netdev_by_devno(unsigned char *read_dev_no) return ndev; } -static int qeth_setdel_makerc(struct qeth_card *card, int retcode) +static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) { int rc; @@ -128,8 +128,8 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); cmd->data.setdelmac.mac_length = OSA_ADDR_LEN; memcpy(&cmd->data.setdelmac.mac, mac, OSA_ADDR_LEN); - return qeth_setdel_makerc(card, qeth_send_ipa_cmd(card, iob, - NULL, NULL)); + return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob, + NULL, NULL)); } static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) @@ -289,17 +289,40 @@ static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb, } } -static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) +static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) { - struct qeth_ipa_cmd *cmd; + if (retcode) + QETH_CARD_TEXT_(card, 2, "err%04x", retcode); + + switch (retcode) { + case IPA_RC_SUCCESS: + return 0; + case IPA_RC_L2_INVALID_VLAN_ID: + return -EINVAL; + case IPA_RC_L2_DUP_VLAN_ID: + return -EEXIST; + case IPA_RC_L2_VLAN_ID_NOT_FOUND: + return -ENOENT; + case IPA_RC_L2_VLAN_ID_NOT_ALLOWED: + return -EPERM; + case -ENOMEM: + return -ENOMEM; + default: + return -EIO; + } +} + +static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, + struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 2, "L2sdvcb"); - cmd = (struct qeth_ipa_cmd *) data; if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x. " - "Continuing\n", cmd->data.setdelvlan.vlan_id, - QETH_CARD_IFNAME(card), cmd->hdr.return_code); + QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n", + cmd->data.setdelvlan.vlan_id, + QETH_CARD_IFNAME(card), cmd->hdr.return_code); QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command); QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code); } @@ -307,7 +330,7 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, } static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, - enum qeth_ipa_cmds ipacmd) + enum qeth_ipa_cmds ipacmd) { struct qeth_ipa_cmd *cmd; struct qeth_cmd_buffer *iob; @@ -318,8 +341,8 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, return -ENOMEM; cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); cmd->data.setdelvlan.vlan_id = i; - return qeth_send_ipa_cmd(card, iob, - qeth_l2_send_setdelvlan_cb, NULL); + return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob, + qeth_l2_send_setdelvlan_cb, NULL)); } static void qeth_l2_process_vlans(struct qeth_card *card) From d7aa9d0bb923d55aa65a6e18428b384038c6523e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:21 +0200 Subject: [PATCH 0015/2177] s390/qeth: fold VLAN handling into l3_rebuild_skb() Move the overly complicated VLAN processing from the L3 RX handler into its l3_rebuild_skb() helper. No change in functionality. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 733f94cbd4d4..270ac9515f6b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1646,13 +1646,12 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, return 0; } -static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, - struct qeth_hdr *hdr, unsigned short *vlan_id) +static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, + struct qeth_hdr *hdr) { __u16 prot; struct iphdr *ip_hdr; unsigned char tg_addr[MAX_ADDR_LEN]; - int is_vlan = 0; if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) { prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 : @@ -1706,11 +1705,14 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, skb->protocol = eth_type_trans(skb, card->dev); - if (hdr->hdr.l3.ext_flags & - (QETH_HDR_EXT_VLAN_FRAME | QETH_HDR_EXT_INCLUDE_VLAN_TAG)) { - *vlan_id = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ? - hdr->hdr.l3.vlan_id : *((u16 *)&hdr->hdr.l3.dest_addr[12]); - is_vlan = 1; + /* copy VLAN tag from hdr into skb */ + if (!card->options.sniffer && + (hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME | + QETH_HDR_EXT_INCLUDE_VLAN_TAG))) { + u16 tag = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ? + hdr->hdr.l3.vlan_id : + *((u16 *)&hdr->hdr.l3.dest_addr[12]); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); } if (card->dev->features & NETIF_F_RXCSUM) { @@ -1724,7 +1726,6 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; } else skb->ip_summed = CHECKSUM_NONE; - return is_vlan; } static int qeth_l3_process_inbound_buffer(struct qeth_card *card, @@ -1733,8 +1734,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, int work_done = 0; struct sk_buff *skb; struct qeth_hdr *hdr; - __u16 vlan_tag = 0; - int is_vlan; unsigned int len; __u16 magic; @@ -1764,12 +1763,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, card->dev->addr_len); netif_receive_skb(skb); } else { - is_vlan = qeth_l3_rebuild_skb(card, skb, hdr, - &vlan_tag); + qeth_l3_rebuild_skb(card, skb, hdr); len = skb->len; - if (is_vlan && !card->options.sniffer) - __vlan_hwaccel_put_tag(skb, - htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&card->napi, skb); } break; From e878c5e6003edae23512fe43d3c050622e5ebcf5 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Mon, 18 Sep 2017 21:18:22 +0200 Subject: [PATCH 0016/2177] s390/qeth: tidy up parameter naming for qeth_do_send_packet() Cppcheck reports the following for drivers/s390/net/qeth_core.h: warning - line 1560 - Function 'qeth_do_send_packet' argument order different: declaration 'card, queue, skb, hdr, hd_len, offset, elements' definition 'card, queue, skb, hdr, offset, hd_len, elements_needed'. Match the naming in the function's declaration against its definition. Signed-off-by: Jens Remus Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b7e8531625aa..91fcadbede80 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -967,7 +967,8 @@ int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, unsigned int hd_len); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int hd_len, unsigned int offset, int elements); + unsigned int offset, unsigned int hd_len, + int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int qeth_core_get_sset_count(struct net_device *, int); void qeth_core_get_ethtool_stats(struct net_device *, From 7598b3498bfdad49d2b19bf0cba829922a9689a9 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 14 Sep 2017 23:01:51 -0400 Subject: [PATCH 0017/2177] forcedeth: replace pci_map_single with dma_map_single functions pci_map_single functions are obsolete. So replace them with dma_map_single functions. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 70 +++++++++++++------------ 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 994a83a1f0a5..b605b94f4567 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1813,12 +1813,12 @@ static int nv_alloc_rx(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma)) { kfree_skb(skb); goto packet_dropped; } @@ -1854,12 +1854,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma)) { kfree_skb(skb); goto packet_dropped; } @@ -1977,9 +1977,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb) { if (tx_skb->dma) { if (tx_skb->dma_single) - pci_unmap_single(np->pci_dev, tx_skb->dma, + dma_unmap_single(&np->pci_dev->dev, tx_skb->dma, tx_skb->dma_len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); else pci_unmap_page(np->pci_dev, tx_skb->dma, tx_skb->dma_len, @@ -2047,10 +2047,10 @@ static void nv_drain_rx(struct net_device *dev) } wmb(); if (np->rx_skb[i].skb) { - pci_unmap_single(np->pci_dev, np->rx_skb[i].dma, + dma_unmap_single(&np->pci_dev->dev, np->rx_skb[i].dma, (skb_end_pointer(np->rx_skb[i].skb) - - np->rx_skb[i].skb->data), - PCI_DMA_FROMDEVICE); + np->rx_skb[i].skb->data), + DMA_FROM_DEVICE); dev_kfree_skb(np->rx_skb[i].skb); np->rx_skb[i].skb = NULL; } @@ -2224,10 +2224,11 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma)) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2373,10 +2374,11 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma)) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2810,9 +2812,9 @@ static int nv_rx_process(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -2916,9 +2918,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -5070,11 +5072,11 @@ static int nv_loopback_test(struct net_device *dev) ret = 0; goto out; } - test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data, + test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data, skb_tailroom(tx_skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - test_dma_addr)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + test_dma_addr)) { dev_kfree_skb_any(tx_skb); goto out; } @@ -5129,9 +5131,9 @@ static int nv_loopback_test(struct net_device *dev) } } - pci_unmap_single(np->pci_dev, test_dma_addr, - (skb_end_pointer(tx_skb) - tx_skb->data), - PCI_DMA_TODEVICE); + dma_unmap_single(&np->pci_dev->dev, test_dma_addr, + (skb_end_pointer(tx_skb) - tx_skb->data), + DMA_TO_DEVICE); dev_kfree_skb_any(tx_skb); out: /* stop engines */ From 2df9d6730215db85f7306f0bda03b7391e392837 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Fri, 15 Sep 2017 07:58:47 +0200 Subject: [PATCH 0018/2177] net/ethernet/freescale: fix warning for ucc_geth uf_info.regs is resource_size_t i.e. phys_addr_t that can be either u32 or u64 according to CONFIG_PHYS_ADDR_T_64BIT. The printk format is thus adaptet to u64 and the regs value cast to u64 to take both u32 and u64 into account. Signed-off-by: Valentin Longchamp Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f77ba9fa257b..a96b838cffce 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3857,8 +3857,9 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (netif_msg_probe(&debug)) - pr_info("UCC%1d at 0x%8x (irq = %d)\n", - ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, + pr_info("UCC%1d at 0x%8llx (irq = %d)\n", + ug_info->uf_info.ucc_num + 1, + (u64)ug_info->uf_info.regs, ug_info->uf_info.irq); /* Create an ethernet device instance */ From ca444073a2de97809d63e613d01203f4f4644cfb Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 17 Sep 2017 12:46:20 +0100 Subject: [PATCH 0019/2177] hamradio: baycom: use new parport device model Modify baycom driver to use the new parallel port device model. Signed-off-by: Sudip Mukherjee Acked-By: Thomas Sailer Signed-off-by: David S. Miller --- drivers/net/hamradio/baycom_epp.c | 50 ++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 1503f10122f7..1e62d00732f2 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -840,6 +840,7 @@ static int epp_open(struct net_device *dev) unsigned char tmp[128]; unsigned char stat; unsigned long tstart; + struct pardev_cb par_cb; if (!pp) { printk(KERN_ERR "%s: parport at 0x%lx unknown\n", bc_drvname, dev->base_addr); @@ -859,8 +860,21 @@ static int epp_open(struct net_device *dev) return -EIO; } memset(&bc->modem, 0, sizeof(bc->modem)); - bc->pdev = parport_register_device(pp, dev->name, NULL, epp_wakeup, - NULL, PARPORT_DEV_EXCL, dev); + memset(&par_cb, 0, sizeof(par_cb)); + par_cb.wakeup = epp_wakeup; + par_cb.private = (void *)dev; + par_cb.flags = PARPORT_DEV_EXCL; + for (i = 0; i < NR_PORTS; i++) + if (baycom_device[i] == dev) + break; + + if (i == NR_PORTS) { + pr_err("%s: no device found\n", bc_drvname); + parport_put_port(pp); + return -ENODEV; + } + + bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i); parport_put_port(pp); if (!bc->pdev) { printk(KERN_ERR "%s: cannot register parport at 0x%lx\n", bc_drvname, pp->base); @@ -1185,6 +1199,23 @@ MODULE_LICENSE("GPL"); /* --------------------------------------------------------------------- */ +static int baycom_epp_par_probe(struct pardevice *par_dev) +{ + struct device_driver *drv = par_dev->dev.driver; + int len = strlen(drv->name); + + if (strncmp(par_dev->name, drv->name, len)) + return -ENODEV; + + return 0; +} + +static struct parport_driver baycom_epp_par_driver = { + .name = "bce", + .probe = baycom_epp_par_probe, + .devmodel = true, +}; + static void __init baycom_epp_dev_setup(struct net_device *dev) { struct baycom_state *bc = netdev_priv(dev); @@ -1204,10 +1235,15 @@ static void __init baycom_epp_dev_setup(struct net_device *dev) static int __init init_baycomepp(void) { - int i, found = 0; + int i, found = 0, ret; char set_hw = 1; printk(bc_drvinfo); + + ret = parport_register_driver(&baycom_epp_par_driver); + if (ret) + return ret; + /* * register net devices */ @@ -1241,7 +1277,12 @@ static int __init init_baycomepp(void) found++; } - return found ? 0 : -ENXIO; + if (found == 0) { + parport_unregister_driver(&baycom_epp_par_driver); + return -ENXIO; + } + + return 0; } static void __exit cleanup_baycomepp(void) @@ -1260,6 +1301,7 @@ static void __exit cleanup_baycomepp(void) printk(paranoia_str, "cleanup_module"); } } + parport_unregister_driver(&baycom_epp_par_driver); } module_init(init_baycomepp); From 7ce103b4cbb20babf19b881e28228b7fd40ce0b3 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:15 +0300 Subject: [PATCH 0020/2177] net: korina: don't use overflow and underflow interrupts When such interrupts occur there is not much we can do. Dropping the whole ring doesn't help and only produces high packet loss. If we just ignore the interrupt the mac will drop one or few packets instead of the whole ring. Also this will lower the irq handling load and increase performance. Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 83 +---------------------------------- 1 file changed, 1 insertion(+), 82 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 3c0a6451273d..98d686ed69a9 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -122,8 +122,6 @@ struct korina_private { int rx_irq; int tx_irq; - int ovr_irq; - int und_irq; spinlock_t lock; /* NIC xmit lock */ @@ -891,8 +889,6 @@ static void korina_restart_task(struct work_struct *work) */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); writel(readl(&lp->tx_dma_regs->dmasm) | DMA_STAT_FINI | DMA_STAT_ERR, @@ -911,40 +907,10 @@ static void korina_restart_task(struct work_struct *work) } korina_multicast_list(dev); - enable_irq(lp->und_irq); - enable_irq(lp->ovr_irq); enable_irq(lp->tx_irq); enable_irq(lp->rx_irq); } -static void korina_clear_and_restart(struct net_device *dev, u32 value) -{ - struct korina_private *lp = netdev_priv(dev); - - netif_stop_queue(dev); - writel(value, &lp->eth_regs->ethintfc); - schedule_work(&lp->restart_task); -} - -/* Ethernet Tx Underflow interrupt */ -static irqreturn_t korina_und_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int und; - - spin_lock(&lp->lock); - - und = readl(&lp->eth_regs->ethintfc); - - if (und & ETH_INT_FC_UND) - korina_clear_and_restart(dev, und & ~ETH_INT_FC_UND); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - static void korina_tx_timeout(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); @@ -952,25 +918,6 @@ static void korina_tx_timeout(struct net_device *dev) schedule_work(&lp->restart_task); } -/* Ethernet Rx Overflow interrupt */ -static irqreturn_t -korina_ovr_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int ovr; - - spin_lock(&lp->lock); - ovr = readl(&lp->eth_regs->ethintfc); - - if (ovr & ETH_INT_FC_OVR) - korina_clear_and_restart(dev, ovr & ~ETH_INT_FC_OVR); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void korina_poll_controller(struct net_device *dev) { @@ -993,8 +940,7 @@ static int korina_open(struct net_device *dev) } /* Install the interrupt handler - * that handles the Done Finished - * Ovr and Und Events */ + * that handles the Done Finished */ ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt, 0, "Korina ethernet Rx", dev); if (ret < 0) { @@ -1010,31 +956,10 @@ static int korina_open(struct net_device *dev) goto err_free_rx_irq; } - /* Install handler for overrun error. */ - ret = request_irq(lp->ovr_irq, korina_ovr_interrupt, - 0, "Ethernet Overflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get OVR IRQ %d\n", - dev->name, lp->ovr_irq); - goto err_free_tx_irq; - } - - /* Install handler for underflow error. */ - ret = request_irq(lp->und_irq, korina_und_interrupt, - 0, "Ethernet Underflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get UND IRQ %d\n", - dev->name, lp->und_irq); - goto err_free_ovr_irq; - } mod_timer(&lp->media_check_timer, jiffies + 1); out: return ret; -err_free_ovr_irq: - free_irq(lp->ovr_irq, dev); -err_free_tx_irq: - free_irq(lp->tx_irq, dev); err_free_rx_irq: free_irq(lp->rx_irq, dev); err_release: @@ -1052,8 +977,6 @@ static int korina_close(struct net_device *dev) /* Disable interrupts */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); korina_abort_tx(dev); tmp = readl(&lp->tx_dma_regs->dmasm); @@ -1073,8 +996,6 @@ static int korina_close(struct net_device *dev) free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); - free_irq(lp->ovr_irq, dev); - free_irq(lp->und_irq, dev); return 0; } @@ -1113,8 +1034,6 @@ static int korina_probe(struct platform_device *pdev) lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx"); lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx"); - lp->ovr_irq = platform_get_irq_byname(pdev, "korina_ovr"); - lp->und_irq = platform_get_irq_byname(pdev, "korina_und"); r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs"); dev->base_addr = r->start; From 364a97f5d1ae3102d53a3ad1efea3fa546781f78 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:26 +0300 Subject: [PATCH 0021/2177] net: korina: optimize rx descriptor flags processing Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 99 ++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 98d686ed69a9..e5466e19994a 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -363,59 +363,60 @@ static int korina_rx(struct net_device *dev, int limit) if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0) break; - /* Update statistics counters */ - if (devcs & ETH_RX_CRC) - dev->stats.rx_crc_errors++; - if (devcs & ETH_RX_LOR) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_LE) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_OVR) - dev->stats.rx_fifo_errors++; - if (devcs & ETH_RX_CV) - dev->stats.rx_frame_errors++; - if (devcs & ETH_RX_CES) - dev->stats.rx_length_errors++; + /* check that this is a whole packet + * WARNING: DMA_FD bit incorrectly set + * in Rc32434 (errata ref #077) */ + if (!(devcs & ETH_RX_LD)) + goto next; + + if (!(devcs & ETH_RX_ROK)) { + /* Update statistics counters */ + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + if (devcs & ETH_RX_CRC) + dev->stats.rx_crc_errors++; + if (devcs & ETH_RX_LE) + dev->stats.rx_length_errors++; + if (devcs & ETH_RX_OVR) + dev->stats.rx_fifo_errors++; + if (devcs & ETH_RX_CV) + dev->stats.rx_frame_errors++; + if (devcs & ETH_RX_CES) + dev->stats.rx_frame_errors++; + + goto next; + } + + pkt_len = RCVPKT_LENGTH(devcs); + + /* must be the (first and) last + * descriptor then */ + pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; + + /* invalidate the cache */ + dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); + + /* Malloc up new buffer. */ + skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); + + if (!skb_new) + break; + /* Do not count the CRC */ + skb_put(skb, pkt_len - 4); + skb->protocol = eth_type_trans(skb, dev); + + /* Pass the packet to upper layers */ + netif_receive_skb(skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; + + /* Update the mcast stats */ if (devcs & ETH_RX_MP) dev->stats.multicast++; - if ((devcs & ETH_RX_LD) != ETH_RX_LD) { - /* check that this is a whole packet - * WARNING: DMA_FD bit incorrectly set - * in Rc32434 (errata ref #077) */ - dev->stats.rx_errors++; - dev->stats.rx_dropped++; - } else if ((devcs & ETH_RX_ROK)) { - pkt_len = RCVPKT_LENGTH(devcs); - - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - - /* Malloc up new buffer. */ - skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); - - if (!skb_new) - break; - /* Do not count the CRC */ - skb_put(skb, pkt_len - 4); - skb->protocol = eth_type_trans(skb, dev); - - /* Pass the packet to upper layers */ - netif_receive_skb(skb); - dev->stats.rx_packets++; - dev->stats.rx_bytes += pkt_len; - - /* Update the mcast stats */ - if (devcs & ETH_RX_MP) - dev->stats.multicast++; - - lp->rx_skb[lp->rx_next_done] = skb_new; - } + lp->rx_skb[lp->rx_next_done] = skb_new; +next: rd->devcs = 0; /* Restore descriptor's curr_addr */ From d609d2893c25a3336422d06e1aff50d13ba5b7f2 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:38 +0300 Subject: [PATCH 0022/2177] net: korina: use NAPI_POLL_WEIGHT Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index e5466e19994a..c210add9b654 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1082,7 +1082,7 @@ static int korina_probe(struct platform_device *pdev) dev->netdev_ops = &korina_netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - netif_napi_add(dev, &lp->napi, korina_poll, 64); + netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT); lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05); lp->mii_if.dev = dev; From 247c78f2bed0c4d72c381c9caf429173513dcc51 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:50 +0300 Subject: [PATCH 0023/2177] net: korina: use GRO Performance gain when receiving locally is 55->95Mbps and 50->65Mbps for NAT. Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index c210add9b654..5f36e1703378 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -406,7 +406,7 @@ static int korina_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); /* Pass the packet to upper layers */ - netif_receive_skb(skb); + napi_gro_receive(&lp->napi, skb); dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; From 2e5396b14db3c885c5b9de698ecf38652a0f3c15 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:02 +0300 Subject: [PATCH 0024/2177] net: korina: whitespace cleanup Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 58 +++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 5f36e1703378..c26f0d84ba6b 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -64,9 +64,9 @@ #include #include -#define DRV_NAME "korina" -#define DRV_VERSION "0.10" -#define DRV_RELDATE "04Mar2008" +#define DRV_NAME "korina" +#define DRV_VERSION "0.10" +#define DRV_RELDATE "04Mar2008" #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) @@ -75,7 +75,7 @@ ((dev)->dev_addr[4] << 8) | \ ((dev)->dev_addr[5])) -#define MII_CLOCK 1250000 /* no more than 2.5MHz */ +#define MII_CLOCK 1250000 /* no more than 2.5MHz */ /* the following must be powers of two */ #define KORINA_NUM_RDS 64 /* number of receive descriptors */ @@ -87,15 +87,19 @@ #define KORINA_RBSIZE 1536 /* size of one resource buffer = Ether MTU */ #define KORINA_RDS_MASK (KORINA_NUM_RDS - 1) #define KORINA_TDS_MASK (KORINA_NUM_TDS - 1) -#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) +#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) #define TD_RING_SIZE (KORINA_NUM_TDS * sizeof(struct dma_desc)) -#define TX_TIMEOUT (6000 * HZ / 1000) +#define TX_TIMEOUT (6000 * HZ / 1000) -enum chain_status { desc_filled, desc_empty }; -#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) -#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) -#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) +enum chain_status { + desc_filled, + desc_empty +}; + +#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) +#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) +#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) /* Information that need to be kept for each board. */ struct korina_private { @@ -123,7 +127,7 @@ struct korina_private { int rx_irq; int tx_irq; - spinlock_t lock; /* NIC xmit lock */ + spinlock_t lock; /* NIC xmit lock */ int dma_halt_cnt; int dma_run_cnt; @@ -146,17 +150,17 @@ static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr) static inline void korina_abort_dma(struct net_device *dev, struct dma_reg *ch) { - if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { - writel(0x10, &ch->dmac); + if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { + writel(0x10, &ch->dmac); - while (!(readl(&ch->dmas) & DMA_STAT_HALT)) - netif_trans_update(dev); + while (!(readl(&ch->dmas) & DMA_STAT_HALT)) + netif_trans_update(dev); - writel(0, &ch->dmas); - } + writel(0, &ch->dmas); + } - writel(0, &ch->dmadptr); - writel(0, &ch->dmandptr); + writel(0, &ch->dmadptr); + writel(0, &ch->dmandptr); } static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr) @@ -685,7 +689,7 @@ static int korina_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) /* ethtool helpers */ static void netdev_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) + struct ethtool_drvinfo *info) { struct korina_private *lp = netdev_priv(dev); @@ -728,10 +732,10 @@ static u32 netdev_get_link(struct net_device *dev) } static const struct ethtool_ops netdev_ethtool_ops = { - .get_drvinfo = netdev_get_drvinfo, - .get_link = netdev_get_link, - .get_link_ksettings = netdev_get_link_ksettings, - .set_link_ksettings = netdev_set_link_ksettings, + .get_drvinfo = netdev_get_drvinfo, + .get_link = netdev_get_link, + .get_link_ksettings = netdev_get_link_ksettings, + .set_link_ksettings = netdev_set_link_ksettings, }; static int korina_alloc_ring(struct net_device *dev) @@ -863,7 +867,7 @@ static int korina_init(struct net_device *dev) /* Management Clock Prescaler Divisor * Clock independent setting */ writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1, - &lp->eth_regs->ethmcp); + &lp->eth_regs->ethmcp); /* don't transmit until fifo contains 48b */ writel(48, &lp->eth_regs->ethfifott); @@ -946,14 +950,14 @@ static int korina_open(struct net_device *dev) 0, "Korina ethernet Rx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n", - dev->name, lp->rx_irq); + dev->name, lp->rx_irq); goto err_release; } ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt, 0, "Korina ethernet Tx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n", - dev->name, lp->tx_irq); + dev->name, lp->tx_irq); goto err_free_rx_irq; } From 87736fc6f75f23ae2583ee197a0e85515f246ba6 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:11 +0300 Subject: [PATCH 0025/2177] net: korina: update authors Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index c26f0d84ba6b..d58aa4bfcb58 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -4,6 +4,7 @@ * Copyright 2004 IDT Inc. (rischelp@idt.com) * Copyright 2006 Felix Fietkau * Copyright 2008 Florian Fainelli + * Copyright 2017 Roman Yeryomin * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -1150,5 +1151,6 @@ module_platform_driver(korina_driver); MODULE_AUTHOR("Philip Rischel "); MODULE_AUTHOR("Felix Fietkau "); MODULE_AUTHOR("Florian Fainelli "); +MODULE_AUTHOR("Roman Yeryomin "); MODULE_DESCRIPTION("IDT RC32434 (Korina) Ethernet driver"); MODULE_LICENSE("GPL"); From da1d2def654dc5a9cae346a93f25bd2e8959b080 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:21 +0300 Subject: [PATCH 0026/2177] net: korina: bump version Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index d58aa4bfcb58..7cecd9dbc111 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -66,8 +66,8 @@ #include #define DRV_NAME "korina" -#define DRV_VERSION "0.10" -#define DRV_RELDATE "04Mar2008" +#define DRV_VERSION "0.20" +#define DRV_RELDATE "15Sep2017" #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) From 7e5dd53f6ee81ffa0d1f42f79e6440b6a751ab40 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 12:40:38 +0100 Subject: [PATCH 0027/2177] net_sched: use explicit size of struct tcmsg, remove need to declare tcm Pointer tcm is being initialized and is never read, it is only being used to determine the size of struct tcmsg. Clean this up by removing variable tcm and explicitly using the sizeof struct tcmsg rather than *tcm. Cleans up clang warning: warning: Value stored to 'tcm' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c6deb74e3d2f..aa82116ed10c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1500,7 +1500,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; - struct tcmsg *tcm = nlmsg_data(nlh); struct nlattr *tca[TCA_MAX + 1]; int err; @@ -1510,7 +1509,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; From 3c75f6ee139d464351f8ab77a042dd3635769d98 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2017 12:36:22 -0700 Subject: [PATCH 0028/2177] net_sched: sch_htb: add per class overlimits counter HTB qdisc overlimits counter is properly increased, but we have no per class counter, meaning it is difficult to diagnose HTB problems. This patch adds this counter, visible in "tc -s class show dev eth0", with current iproute2. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 7e148376ba52..c6d7ae81b41f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -142,6 +142,7 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ unsigned int drops ____cacheline_aligned_in_smp; + unsigned int overlimits; }; struct htb_level { @@ -533,6 +534,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; + if (new_mode == HTB_CANT_SEND) + cl->overlimits++; + if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) htb_deactivate_prios(q, cl); @@ -1143,6 +1147,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) struct htb_class *cl = (struct htb_class *)arg; struct gnet_stats_queue qs = { .drops = cl->drops, + .overlimits = cl->overlimits, }; __u32 qlen = 0; From 38c5eb93aca9dc1b21a2c96d583ce7f9886a44e6 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 15:36:51 +0200 Subject: [PATCH 0029/2177] net: mvpp2: remove useless goto Remove a goto in the PPv2 tx function which jumps to the next line anyway. This is a cosmetic commit. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index dd0ee2691c86..8041d692db3c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6452,7 +6452,6 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) { tx_desc_unmap_put(port, txq, tx_desc); frags = 0; - goto out; } } From 173f4c5ebbd803023e42799d956cf174dea92db5 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 18 Sep 2017 20:18:55 +0200 Subject: [PATCH 0030/2177] vsock: vmci: Remove unneeded linux/miscdevice.h include net/vmw_vsock/vmci_transport.c does not use any miscdevice so this patch remove this unnecessary inclusion. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 10ae7823a19d..0206155bff53 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include From e454cf5958538666635488030046b6a84a22d447 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:55 -0400 Subject: [PATCH 0031/2177] bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM_TRIE This is a simple non-recursive delete operation. It prunes paths of empty nodes in the tree, but it does not try to further compress the tree as nodes are removed. Signed-off-by: Craig Gallek Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 80 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1b767844a76f..9d58a576b2ae 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -389,10 +389,84 @@ out: return ret; } -static int trie_delete_elem(struct bpf_map *map, void *key) +/* Called from syscall or from eBPF program */ +static int trie_delete_elem(struct bpf_map *map, void *_key) { - /* TODO */ - return -ENOSYS; + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct bpf_lpm_trie_key *key = _key; + struct lpm_trie_node __rcu **trim; + struct lpm_trie_node *node; + unsigned long irq_flags; + unsigned int next_bit; + size_t matchlen = 0; + int ret = 0; + + if (key->prefixlen > trie->max_prefixlen) + return -EINVAL; + + raw_spin_lock_irqsave(&trie->lock, irq_flags); + + /* Walk the tree looking for an exact key/length match and keeping + * track of where we could begin trimming the tree. The trim-point + * is the sub-tree along the walk consisting of only single-child + * intermediate nodes and ending at a leaf node that we want to + * remove. + */ + trim = &trie->root; + node = rcu_dereference_protected( + trie->root, lockdep_is_held(&trie->lock)); + while (node) { + matchlen = longest_prefix_match(trie, node, key); + + if (node->prefixlen != matchlen || + node->prefixlen == key->prefixlen) + break; + + next_bit = extract_bit(key->data, node->prefixlen); + /* If we hit a node that has more than one child or is a valid + * prefix itself, do not remove it. Reset the root of the trim + * path to its descendant on our path. + */ + if (!(node->flags & LPM_TREE_NODE_FLAG_IM) || + (node->child[0] && node->child[1])) + trim = &node->child[next_bit]; + node = rcu_dereference_protected( + node->child[next_bit], lockdep_is_held(&trie->lock)); + } + + if (!node || node->prefixlen != key->prefixlen || + (node->flags & LPM_TREE_NODE_FLAG_IM)) { + ret = -ENOENT; + goto out; + } + + trie->n_entries--; + + /* If the node we are removing is not a leaf node, simply mark it + * as intermediate and we are done. + */ + if (rcu_access_pointer(node->child[0]) || + rcu_access_pointer(node->child[1])) { + node->flags |= LPM_TREE_NODE_FLAG_IM; + goto out; + } + + /* trim should now point to the slot holding the start of a path from + * zero or more intermediate nodes to our leaf node for deletion. + */ + while ((node = rcu_dereference_protected( + *trim, lockdep_is_held(&trie->lock)))) { + RCU_INIT_POINTER(*trim, NULL); + trim = rcu_access_pointer(node->child[0]) ? + &node->child[0] : + &node->child[1]; + kfree_rcu(node, rcu); + } + +out: + raw_spin_unlock_irqrestore(&trie->lock, irq_flags); + + return ret; } #define LPM_DATA_SIZE_MAX 256 From bae30468dfc1522464f16051addd7bc7da6da75a Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:56 -0400 Subject: [PATCH 0032/2177] bpf: Add uniqueness invariant to trivial lpm test implementation The 'trivial' lpm implementation in this test allows equivalent nodes to be added (that is, nodes consisting of the same prefix and prefix length). For lookup operations, this is fine because insertion happens at the head of the (singly linked) list and the first, best match is returned. In order to support deletion, the tlpm data structue must first enforce uniqueness. This change modifies the insertion algorithm to search for equivalent nodes and remove them. Note: the BPF_MAP_TYPE_LPM_TRIE already has a uniqueness invariant that is implemented as node replacement. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lpm_map.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index e97565243d59..a5ed7c4f1819 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -31,6 +31,10 @@ struct tlpm_node { uint8_t key[]; }; +static struct tlpm_node *tlpm_match(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits); + static struct tlpm_node *tlpm_add(struct tlpm_node *list, const uint8_t *key, size_t n_bits) @@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list, struct tlpm_node *node; size_t n; + n = (n_bits + 7) / 8; + + /* 'overwrite' an equivalent entry if one already exists */ + node = tlpm_match(list, key, n_bits); + if (node && node->n_bits == n_bits) { + memcpy(node->key, key, n); + return list; + } + /* add new entry with @key/@n_bits to @list and return new head */ - n = (n_bits + 7) / 8; node = malloc(sizeof(*node) + n); assert(node); From e8d1749910b56a7248cb9a5392274348d10108bb Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:57 -0400 Subject: [PATCH 0033/2177] bpf: Test deletion in BPF_MAP_TYPE_LPM_TRIE Extend the 'random' operation tests to include a delete operation (delete half of the nodes from both lpm implementions and ensure that lookups are still equivalent). Also, add a simple IPv4 test which verifies lookup behavior as nodes are deleted from the tree. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lpm_map.c | 187 ++++++++++++++++++++- 1 file changed, 183 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index a5ed7c4f1819..6fedb9fec36b 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -104,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list, return best; } +static struct tlpm_node *tlpm_delete(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *best = tlpm_match(list, key, n_bits); + struct tlpm_node *node; + + if (!best || best->n_bits != n_bits) + return list; + + if (best == list) { + node = best->next; + free(best); + return node; + } + + for (node = list; node; node = node->next) { + if (node->next == best) { + node->next = best->next; + free(best); + return list; + } + } + /* should never get here */ + assert(0); + return list; +} + static void test_lpm_basic(void) { struct tlpm_node *list = NULL, *t1, *t2; @@ -126,6 +154,13 @@ static void test_lpm_basic(void) assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15)); assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16)); + list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + + list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8); + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + tlpm_clear(list); } @@ -170,7 +205,7 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { - size_t i, j, n_matches, n_nodes, n_lookups; + size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; uint8_t *data, *value; @@ -182,6 +217,7 @@ static void test_lpm_map(int keysize) */ n_matches = 0; + n_matches_after_delete = 0; n_nodes = 1 << 8; n_lookups = 1 << 16; @@ -235,15 +271,54 @@ static void test_lpm_map(int keysize) } } + /* Remove the first half of the elements in the tlpm and the + * corresponding nodes from the bpf-lpm. Then run the same + * large number of random lookups in both and make sure they match. + * Note: we need to count the number of nodes actually inserted + * since there may have been duplicates. + */ + for (i = 0, t = list; t; i++, t = t->next) + ; + for (j = 0; j < i / 2; ++j) { + key->prefixlen = list->n_bits; + memcpy(key->data, list->key, keysize); + r = bpf_map_delete_elem(map, key); + assert(!r); + list = tlpm_delete(list, list->key, list->n_bits); + assert(list); + } + for (i = 0; i < n_lookups; ++i) { + for (j = 0; j < keysize; ++j) + data[j] = rand() & 0xff; + + t = tlpm_match(list, data, 8 * keysize); + + key->prefixlen = 8 * keysize; + memcpy(key->data, data, keysize); + r = bpf_map_lookup_elem(map, key, value); + assert(!r || errno == ENOENT); + assert(!t == !!r); + + if (t) { + ++n_matches_after_delete; + assert(t->n_bits == value[keysize]); + for (j = 0; j < t->n_bits; ++j) + assert((t->key[j / 8] & (1 << (7 - j % 8))) == + (value[j / 8] & (1 << (7 - j % 8)))); + } + } + close(map); tlpm_clear(list); /* With 255 random nodes in the map, we are pretty likely to match * something on every lookup. For statistics, use this: * - * printf(" nodes: %zu\n" - * "lookups: %zu\n" - * "matches: %zu\n", n_nodes, n_lookups, n_matches); + * printf(" nodes: %zu\n" + * " lookups: %zu\n" + * " matches: %zu\n" + * "matches(delete): %zu\n", + * n_nodes, n_lookups, n_matches, n_matches_after_delete); */ } @@ -343,6 +418,108 @@ static void test_lpm_ipaddr(void) close(map_fd_ipv6); } +static void test_lpm_delete(void) +{ + struct bpf_lpm_trie_key *key; + size_t key_size; + int map_fd; + __u64 value; + + key_size = sizeof(*key) + sizeof(__u32); + key = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* Add nodes: + * 192.168.0.0/16 (1) + * 192.168.0.0/24 (2) + * 192.168.128.0/24 (3) + * 192.168.1.0/24 (4) + * + * (1) + * / \ + * (IM) (3) + * / \ + * (2) (4) + */ + value = 1; + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 2; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 3; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 4; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + /* remove non-existent node */ + key->prefixlen = 32; + inet_pton(AF_INET, "10.0.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + /* assert initial lookup */ + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 2); + + /* remove leaf node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove leaf (and intermediary) node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.1.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove root node */ + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 3); + + /* remove last node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -365,6 +542,8 @@ int main(void) test_lpm_ipaddr(); + test_lpm_delete(); + printf("test_lpm: OK\n"); return 0; } From e2b2d35a052d9264a774715bc6aa3395a45dcfa2 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:08 +0200 Subject: [PATCH 0034/2177] mlxsw: spectrum: Change init order The multicast router offloading code is going to require the counter_pools initialization to occur before the router initialization, thus, change the spectrum initialization order to fix it. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 696b99e65a5a..97284161ea35 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3693,6 +3693,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3711,12 +3717,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_counter_pool_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); - goto err_counter_pool_init; - } - err = mlxsw_sp_dpipe_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); @@ -3734,14 +3734,14 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - mlxsw_sp_counter_pool_fini(mlxsw_sp); -err_counter_pool_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: mlxsw_sp_lag_fini(mlxsw_sp); @@ -3760,10 +3760,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); From d3b939b8f9a571da82359b6baa5506c9179770d1 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:09 +0200 Subject: [PATCH 0035/2177] mlxsw: spectrum: Move ACL flexible actions instance to spectrum A flexible action instance allows, given a set of ops, creating, committing and sharing a set of ACL action blocks. The flexible action instance in question is using the spectrum KVD linear space to store the flexible action sets. Move this flexible action instance to the common spectrum struct to allow other users (such as multicast router) to get that functionality. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++ .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../ethernet/mellanox/mlxsw/spectrum_acl.c | 93 +------------ .../mlxsw/spectrum_acl_flex_actions.c | 129 ++++++++++++++++++ .../mlxsw/spectrum_acl_flex_actions.h | 44 ++++++ 6 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 891ff418bb5e..4b88158173f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -17,7 +17,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o + spectrum_ipip.o spectrum_acl_flex_actions.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 97284161ea35..6ba6ff276b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -69,6 +69,7 @@ #include "txheader.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_acl_flex_actions.h" #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 @@ -3699,6 +3700,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_counter_pool_init; } + err = mlxsw_sp_afa_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n"); + goto err_afa_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3740,6 +3747,8 @@ err_acl_init: err_span_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_afa_fini(mlxsw_sp); +err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -3763,6 +3772,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 84ce83acdc19..7180d8f3de75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -152,6 +152,7 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 4b2455e3e079..2523785f1904 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -52,7 +52,6 @@ struct mlxsw_sp_acl { struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; - struct mlxsw_afa *afa; struct mlxsw_sp_fid *dummy_fid; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; @@ -333,7 +332,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); if (!rulei) return NULL; - rulei->act_block = mlxsw_afa_block_create(acl->afa); + rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa); if (IS_ERR(rulei->act_block)) { err = PTR_ERR(rulei->act_block); goto err_afa_block_create; @@ -653,85 +652,6 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, return 0; } -#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 - -static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, - char *enc_actions, bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char pefa_pl[MLXSW_REG_PEFA_LEN]; - u32 kvdl_index; - int err; - - /* The first action set of a TCAM entry is stored directly in TCAM, - * not KVD linear area. - */ - if (is_first) - return 0; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE, - &kvdl_index); - if (err) - return err; - mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); - if (err) - goto err_pefa_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_pefa_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, - bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - if (is_first) - return; - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, - u8 local_port) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char ppbs_pl[MLXSW_REG_PPBS_LEN]; - u32 kvdl_index; - int err; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); - if (err) - return err; - mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); - if (err) - goto err_ppbs_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_ppbs_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { - .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, - .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, - .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, - .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, -}; - int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; @@ -753,14 +673,6 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) goto err_afk_create; } - acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, - ACL_ACTIONS_PER_SET), - &mlxsw_sp_act_afa_ops, mlxsw_sp); - if (IS_ERR(acl->afa)) { - err = PTR_ERR(acl->afa); - goto err_afa_create; - } - err = rhashtable_init(&acl->ruleset_ht, &mlxsw_sp_acl_ruleset_ht_params); if (err) @@ -792,8 +704,6 @@ err_acl_ops_init: err_fid_get: rhashtable_destroy(&acl->ruleset_ht); err_rhashtable_init: - mlxsw_afa_destroy(acl->afa); -err_afa_create: mlxsw_afk_destroy(acl->afk); err_afk_create: kfree(acl); @@ -810,7 +720,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); - mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); kfree(acl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c new file mode 100644 index 000000000000..4d3340ed0291 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -0,0 +1,129 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spectrum_acl_flex_actions.h" +#include "core_acl_flex_actions.h" + +#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 + +static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + u32 kvdl_index; + int err; + + /* The first action set of a TCAM entry is stored directly in TCAM, + * not KVD linear area. + */ + if (is_first) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE, + &kvdl_index); + if (err) + return err; + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_pefa_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, + bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + if (is_first) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, + u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char ppbs_pl[MLXSW_REG_PPBS_LEN]; + u32 kvdl_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + if (err) + return err; + mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); + if (err) + goto err_ppbs_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_ppbs_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { + .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, +}; + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_ACTIONS_PER_SET), + &mlxsw_sp_act_afa_ops, mlxsw_sp); + return PTR_ERR_OR_ZERO(mlxsw_sp->afa); +} + +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_afa_destroy(mlxsw_sp->afa); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h new file mode 100644 index 000000000000..2726192836ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h @@ -0,0 +1,44 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H +#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H + +#include "spectrum.h" + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp); + +#endif From 4b8a79ff27645c1201287c3b17091add748d1fb9 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:10 +0200 Subject: [PATCH 0036/2177] mlxsw: acl: Introduce mcrouter ACL action The Spectrum multicast forwarding is done using an ACL action. Add the mcrouter ACL action that will be used to offload the multicast router logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 71 +++++++++++++++++++ .../mellanox/mlxsw/core_acl_flex_actions.h | 3 + 2 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 5ae110172c22..65a32d7b4350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -891,3 +891,74 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); + +/* MC Routing Action + * ----------------- + * The Multicast router action. Can be used by RMFT_V2 - Router Multicast + * Forwarding Table Version 2 Register. + */ + +#define MLXSW_AFA_MCROUTER_CODE 0x10 +#define MLXSW_AFA_MCROUTER_SIZE 2 + +enum mlxsw_afa_mcrouter_rpf_action { + MLXSW_AFA_MCROUTER_RPF_ACTION_NOP, + MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR, +}; + +/* afa_mcrouter_rpf_action */ +MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3); + +/* afa_mcrouter_expected_irif */ +MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16); + +/* afa_mcrouter_min_mtu */ +MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16); + +enum mlxsw_afa_mrouter_vrmid { + MLXSW_AFA_MCROUTER_VRMID_INVALID, + MLXSW_AFA_MCROUTER_VRMID_VALID +}; + +/* afa_mcrouter_vrmid + * Valid RMID: rigr_rmid_index is used as RMID + */ +MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1); + +/* afa_mcrouter_rigr_rmid_index + * When the vrmid field is set to invalid, the field is used as pointer to + * Router Interface Group (RIGR) Table in the KVD linear. + * When the vrmid is set to valid, the field is used as RMID index, ranged + * from 0 to max_mid - 1. The index is to the Port Group Table. + */ +MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24); + +static inline void +mlxsw_afa_mcrouter_pack(char *payload, + enum mlxsw_afa_mcrouter_rpf_action rpf_action, + u16 expected_irif, u16 min_mtu, + enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index) + +{ + mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action); + mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif); + mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu); + mlxsw_afa_mcrouter_vrmid_set(payload, vrmid); + mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index); +} + +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_MCROUTER_CODE, + MLXSW_AFA_MCROUTER_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + expected_irif, min_mtu, rmid_valid, kvdl_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index f99c341b2497..5dbb31fa5a27 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -68,5 +68,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index); #endif From 9cb3fa940e2c1c62d35972ab8433531a4ba421a5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:11 +0200 Subject: [PATCH 0037/2177] mlxsw: acl: Change trap ACL action to get the trap_id as a parameter Allow the trap ACL action to be configured with different traps. This allows the multicast router offloading code to use that same ACL action with the multicast router traps. By using different traps, the multicast router can have different trap policies and can handle the packet differently. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 65a32d7b4350..ab3ffe7a8eda 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -712,7 +712,7 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_append_drop); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) { char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPDISC_CODE, @@ -722,7 +722,7 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) return -ENOBUFS; mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, - MLXSW_TRAP_ID_ACL0); + trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 5dbb31fa5a27..501819c790d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -60,7 +60,7 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 2523785f1904..eede75fbd585 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -396,7 +396,8 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) { - return mlxsw_afa_block_append_trap(rulei->act_block); + return mlxsw_afa_block_append_trap(rulei->act_block, + MLXSW_TRAP_ID_ACL0); } int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, From 587265655159d73247a56236092917131183496e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:12 +0200 Subject: [PATCH 0038/2177] mlxsw: reg: Rename the flexible action set length field The MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN is relevant for the multicast router registers too, so rename it to have a general name which is not bound to a specific register. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index cc27c5de5a1d..fb8ab441b11e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2142,15 +2142,14 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN); */ MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24); -#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8 +#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8 /* reg_pefa_flex_action_set * Action-set to perform when rule is matched. * Must be zero padded if action set is shorter. * Access: RW */ -MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); +MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, const char *flex_action_set) @@ -2243,7 +2242,7 @@ MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80, * Access: RW */ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); + MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, enum mlxsw_reg_ptce2_op op, From 46a7054ebace0fcd0d1826881aa5ab219faa6a77 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:13 +0200 Subject: [PATCH 0039/2177] mlxsw: reg: Add The Router TCAM Allocation register This register is used for allocation of regions in the TCAM table and it will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index fb8ab441b11e..e9f37eac8788 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4310,6 +4310,57 @@ mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } +/* RTAR - Router TCAM Allocation Register + * -------------------------------------- + * This register is used for allocation of regions in the TCAM table. + */ +#define MLXSW_REG_RTAR_ID 0x8004 +#define MLXSW_REG_RTAR_LEN 0x20 + +MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN); + +enum mlxsw_reg_rtar_op { + MLXSW_REG_RTAR_OP_ALLOCATE, + MLXSW_REG_RTAR_OP_RESIZE, + MLXSW_REG_RTAR_OP_DEALLOCATE, +}; + +/* reg_rtar_op + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4); + +enum mlxsw_reg_rtar_key_type { + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1, + MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3 +}; + +/* reg_rtar_key_type + * TCAM key type for the region. + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8); + +/* reg_rtar_region_size + * TCAM region size. When allocating/resizing this is the requested + * size, the response is the actual size. + * Note: Actual size may be larger than requested. + * Reserved for op = Deallocate + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16); + +static inline void mlxsw_reg_rtar_pack(char *payload, + enum mlxsw_reg_rtar_op op, + enum mlxsw_reg_rtar_key_type key_type, + u16 region_size) +{ + MLXSW_REG_ZERO(rtar, payload); + mlxsw_reg_rtar_op_set(payload, op); + mlxsw_reg_rtar_key_type_set(payload, key_type); + mlxsw_reg_rtar_region_size_set(payload, region_size); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -6855,6 +6906,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(hpkt), MLXSW_REG(rgcr), MLXSW_REG(ritr), + MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), MLXSW_REG(ricnt), From 5080c7e91701744ef1a5d7aab51f568f889bfddb Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:14 +0200 Subject: [PATCH 0040/2177] mlxsw: reg: Add the Router Interface Group Version 2 register The RIGR-V2 register is used to add, remove and query egress interface list of a multicast forwarding entry and it will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 83 +++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index e9f37eac8788..1778d7f5f843 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5646,6 +5646,88 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); } +/* RIGR-V2 - Router Interface Group Register Version 2 + * --------------------------------------------------- + * The RIGR_V2 register is used to add, remove and query egress interface list + * of a multicast forwarding entry. + */ +#define MLXSW_REG_RIGR2_ID 0x8023 +#define MLXSW_REG_RIGR2_LEN 0xB0 + +#define MLXSW_REG_RIGR2_MAX_ERIFS 32 + +MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN); + +/* reg_rigr2_rigr_index + * KVD Linear index. + * Access: Index + */ +MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24); + +/* reg_rigr2_vnext + * Next RIGR Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1); + +/* reg_rigr2_next_rigr_index + * Next RIGR Index. The index is to the KVD linear. + * Reserved when vnxet = '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24); + +/* reg_rigr2_vrmid + * RMID Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1); + +/* reg_rigr2_rmid_index + * RMID Index. + * Range 0 .. max_mid - 1 + * Reserved when vrmid = '0'. + * The index is to the Port Group Table (PGT) + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16); + +/* reg_rigr2_erif_entry_v + * Egress Router Interface is valid. + * Note that low-entries must be set if high-entries are set. For + * example: if erif_entry[2].v is set then erif_entry[1].v and + * erif_entry[0].v must be set. + * Index can be from 0 to cap_mc_erif_list_entries-1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false); + +/* reg_rigr2_erif_entry_erif + * Egress Router Interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index, + bool vnext, u32 next_rigr_index) +{ + MLXSW_REG_ZERO(rigr2, payload); + mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index); + mlxsw_reg_rigr2_vnext_set(payload, vnext); + mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index); + mlxsw_reg_rigr2_vrmid_set(payload, 0); + mlxsw_reg_rigr2_rmid_index_set(payload, 0); +} + +static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, + bool v, u16 erif) +{ + mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v); + mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -6917,6 +6999,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rauht), MLXSW_REG(raleu), MLXSW_REG(rauhtd), + MLXSW_REG(rigr2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), From 771ced742a4f02ac248ad679325bd434843d78d0 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:15 +0200 Subject: [PATCH 0041/2177] mlxsw: resources: Add multicast ERIF list entries resource The multicast ERIF list entries resource indicates the number of entries that can be put in one rigr2 register operation. While the register can hold up to MLXSW_REG_RIGR2_MAX_ERIFS ( = 32) ERIF entries, the actual number allowed by firmware is indicated with this resource. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/resources.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 9556d934714b..087aad52c195 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -63,6 +63,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES, MLXSW_RES_ID_MAX_LPM_TREES, /* Internal resources. @@ -100,6 +101,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10, [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, }; From 2e654e33c5791332d7abf759fd9d34a39082ffc7 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:16 +0200 Subject: [PATCH 0042/2177] mlxsw: reg: Add the Router Multicast Forwarding Table Version 2 register The RMFT-V2 register is used to configure and query the multicast table and will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 142 ++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1778d7f5f843..046525ebe5ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5728,6 +5728,147 @@ static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); } +/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register + * -------------------------------------------------------------- + * The RMFT_V2 register is used to configure and query the multicast table. + */ +#define MLXSW_REG_RMFT2_ID 0x8027 +#define MLXSW_REG_RMFT2_LEN 0x174 + +MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN); + +/* reg_rmft2_v + * Valid + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1); + +enum mlxsw_reg_rmft2_type { + MLXSW_REG_RMFT2_TYPE_IPV4, + MLXSW_REG_RMFT2_TYPE_IPV6 +}; + +/* reg_rmft2_type + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2); + +enum mlxsw_sp_reg_rmft2_op { + /* For Write: + * Write operation. Used to write a new entry to the table. All RW + * fields are relevant for new entry. Activity bit is set for new + * entries - Note write with v (Valid) 0 will delete the entry. + * For Query: + * Read operation + */ + MLXSW_REG_RMFT2_OP_READ_WRITE, +}; + +/* reg_rmft2_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2); + +/* reg_rmft2_a + * Activity. Set for new entries. Set if a packet lookup has hit on the specific + * entry. + * Access: RO + */ +MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1); + +/* reg_rmft2_offset + * Offset within the multicast forwarding table to write to. + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16); + +/* reg_rmft2_virtual_router + * Virtual Router ID. Range from 0..cap_max_virtual_routers-1 + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16); + +enum mlxsw_reg_rmft2_irif_mask { + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, + MLXSW_REG_RMFT2_IRIF_MASK_COMPARE +}; + +/* reg_rmft2_irif_mask + * Ingress RIF mask. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1); + +/* reg_rmft2_irif + * Ingress RIF index. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16); + +/* reg_rmft2_dip4 + * Destination IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32); + +/* reg_rmft2_dip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32); + +/* reg_rmft2_sip4 + * Source IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32); + +/* reg_rmft2_sip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32); + +/* reg_rmft2_flexible_action_set + * ACL action set. The only supported action types in this field and in any + * action-set pointed from here are as follows: + * 00h: ACTION_NULL + * 01h: ACTION_MAC_TTL, only TTL configuration is supported. + * 03h: ACTION_TRAP + * 06h: ACTION_QOS + * 08h: ACTION_POLICING_MONITORING + * 10h: ACTION_ROUTER_MC + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80, + MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void +mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask, + const char *flexible_action_set) +{ + MLXSW_REG_ZERO(rmft2, payload); + mlxsw_reg_rmft2_v_set(payload, v); + mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4); + mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE); + mlxsw_reg_rmft2_offset_set(payload, offset); + mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router); + mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask); + mlxsw_reg_rmft2_irif_set(payload, irif); + mlxsw_reg_rmft2_dip4_set(payload, dip4); + mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask); + mlxsw_reg_rmft2_sip4_set(payload, sip4); + mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask); + if (flexible_action_set) + mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload, + flexible_action_set); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -7000,6 +7141,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(raleu), MLXSW_REG(rauhtd), MLXSW_REG(rigr2), + MLXSW_REG(rmft2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), From 4fc92846f65b0a3470b433c54251a40feae7b2d5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:17 +0200 Subject: [PATCH 0043/2177] mlxsw: reg: Add Router Rules Copy Register The RRCR register is used for copying and moving TCAM multicast routes from different offsets. It will be used to allow routes relocation for parman ops as part of the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 60 +++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 046525ebe5ac..31d120ae8dc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4680,6 +4680,65 @@ static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); } +/* RRCR - Router Rules Copy Register Layout + * ---------------------------------------- + * This register is used for moving and copying route entry rules. + */ +#define MLXSW_REG_RRCR_ID 0x800F +#define MLXSW_REG_RRCR_LEN 0x24 + +MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN); + +enum mlxsw_reg_rrcr_op { + /* Move rules */ + MLXSW_REG_RRCR_OP_MOVE, + /* Copy rules */ + MLXSW_REG_RRCR_OP_COPY, +}; + +/* reg_rrcr_op + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4); + +/* reg_rrcr_offset + * Offset within the region from which to copy/move. + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16); + +/* reg_rrcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16); + +/* reg_rrcr_table_id + * Identifier of the table on which to perform the operation. Encoding is the + * same as in RTAR.key_type + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4); + +/* reg_rrcr_dest_offset + * Offset within the region to which to copy/move + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16); + +static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op, + u16 offset, u16 size, + enum mlxsw_reg_rtar_key_type table_id, + u16 dest_offset) +{ + MLXSW_REG_ZERO(rrcr, payload); + mlxsw_reg_rrcr_op_set(payload, op); + mlxsw_reg_rrcr_offset_set(payload, offset); + mlxsw_reg_rrcr_size_set(payload, size); + mlxsw_reg_rrcr_table_id_set(payload, table_id); + mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -7133,6 +7192,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(ratr), MLXSW_REG(rtdp), MLXSW_REG(ricnt), + MLXSW_REG(rrcr), MLXSW_REG(ralta), MLXSW_REG(ralst), MLXSW_REG(raltb), From 4af5964e58884855d28ae68ddf01279868e70853 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:18 +0200 Subject: [PATCH 0044/2177] mlxsw: reg: Configure RIF to forward IPv4 multicast packets by default Turn on two bits on the Spectrum RIF configuration: - IPv4 multicast: when a multicast packet arrives on a RIF, send it to go through multicast routes lookup. - IPv4 multicast forwarding enable: when multicast packet arrives on a RIF, allow it to be forwarded by multicast routes. If this bit is not set, multicast packets will go through multicast routing lookup but will be dropped at the egress of the ports. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 31d120ae8dc6..c203e0dfa827 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3991,6 +3991,12 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); +/* reg_ritr_ipv4_mc + * IPv4 multicast routing enable. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1); + enum mlxsw_reg_ritr_if_type { /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, @@ -4048,6 +4054,14 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_ipv4_mc_fe + * IPv4 Multicast Forwarding Enable. + * When disabled, forwarding is blocked but local traffic (traps and IP to me) + * will be enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1); + /* reg_ritr_lb_en * Loop-back filter enable for unicast packets. * If the flag is set then loop-back filter for unicast packets is @@ -4270,11 +4284,13 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); mlxsw_reg_ritr_ipv6_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); mlxsw_reg_ritr_ipv6_fe_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); From 91e4d59a4600afe64b44e013a7c1805bbfe61e59 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:19 +0200 Subject: [PATCH 0045/2177] mlxsw: spectrum_router: Export RIF dev access function The mlxsw_sp_rif struct, defined as private struct in spectrum_router.c will be used in the multicast router source file. Due to the fact that the dev field will be needed by the multicast router logic, add an access function to it. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2cfb3f5d092d..0bd93dc88ffa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5049,6 +5049,11 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) return rif->dev->ifindex; } +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + return rif->dev; +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 345fcc4f38e9..ae4c99b3f2fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -69,6 +69,7 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, From b48cfc80ce9c27368e331d9aa742314487b0ee12 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:20 +0200 Subject: [PATCH 0046/2177] mlxsw: spectrum: Add multicast router traps and trap groups Add three new traps needed for multicast routing: - PIM: Trap for PIM protocol control packets. - RPF: Trap for packets that fail the RPF check on a specific hardware route entry. - MULTICAST: Generic trap for multicast. It is used for routes that trap the packets to the CPU. The RPF and MULTICAST traps have rate limiters as these traps may have line-rate of packets trapped. The PIM trap has a rate limiter similarly to other L3 control protocols. The rate limiters are implemented by adding three new trap groups for the newly introduced traps. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 3 +++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index c203e0dfa827..17eba19100de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3681,12 +3681,15 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, + MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6ba6ff276b17..e9b94430afed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3421,6 +3421,10 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { false, SP_IP2ME, DISCARD), /* ACL trap */ MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false), + /* Multicast Router Traps */ + MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), + MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), + MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3446,6 +3450,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: rate = 128; burst_size = 7; break; @@ -3461,6 +3467,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: rate = 1024; burst_size = 7; break; @@ -3506,6 +3513,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: priority = 5; tc = 5; break; @@ -3522,12 +3530,14 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: priority = 2; tc = 2; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: priority = 1; tc = 1; break; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index f396a1fef633..a98103539f6b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -62,6 +62,8 @@ enum { MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, MLXSW_TRAP_ID_IPV4_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_PIM = 0x58, + MLXSW_TRAP_ID_RPF = 0x5C, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, @@ -89,6 +91,8 @@ enum { MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, + /* Multicast trap used for routes with trap action */ + MLXSW_TRAP_ID_ACL1 = 0x1C1, MLXSW_TRAP_ID_MAX = 0x1FF }; From bffa72cf7f9df842f0016ba03586039296b4caaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 05:14:24 -0700 Subject: [PATCH 0047/2177] net: sk_buff rbnode reorg skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ++++++++-------- include/net/tcp.h | 6 ------ net/ipv4/tcp_input.c | 27 +++++---------------------- net/sched/sch_netem.c | 7 ++++--- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..492828801acb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -661,8 +661,12 @@ struct sk_buff { struct sk_buff *prev; union { - ktime_t tstamp; - u64 skb_mstamp; + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; }; }; struct rb_node rbnode; /* used in netem & tcp stack */ @@ -670,12 +674,8 @@ struct sk_buff { struct sock *sk; union { - struct net_device *dev; - /* Some protocols might use this space to store information, - * while device pointer would be NULL. - * UDP receive path is one user. - */ - unsigned long dev_scratch; + ktime_t tstamp; + u64 skb_mstamp; }; /* * This is the control buffer. It is free to use for every diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..49a8a46466f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -797,12 +797,6 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; - - /* Used to stash the receive timestamp while this skb is in the - * out of order queue, as skb->tstamp is overwritten by the - * rbnode. - */ - ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bddf724f5c02..db9bb46b5776 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) tp->rx_opt.num_sacks = num_sacks; } -enum tcp_queue { - OOO_QUEUE, - RCV_QUEUE, -}; - /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket @@ -4286,7 +4281,6 @@ enum tcp_queue { * Returns true if caller should free @from instead of queueing it */ static bool tcp_try_coalesce(struct sock *sk, - enum tcp_queue dest, struct sk_buff *to, struct sk_buff *from, bool *fragstolen) @@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->has_rxtstamp) { TCP_SKB_CB(to)->has_rxtstamp = true; - if (dest == OOO_QUEUE) - TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; - else - to->tstamp = from->tstamp; + to->tstamp = from->tstamp; } return true; @@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk) } p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); - /* Replace tstamp which was stomped by rbnode */ - if (TCP_SKB_CB(skb)->has_rxtstamp) - skb->tstamp = TCP_SKB_CB(skb)->swtstamp; if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); @@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk) TCP_SKB_CB(skb)->end_seq); tail = skb_peek_tail(&sk->sk_receive_queue); - eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, - tail, skb, &fragstolen); + eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) @@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } - /* Stash tstamp to avoid being stomped on by rbnode */ - if (TCP_SKB_CB(skb)->has_rxtstamp) - TCP_SKB_CB(skb)->swtstamp = skb->tstamp; - /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); @@ -4502,7 +4485,7 @@ coalesce_done: __kfree_skb(skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { goto coalesce_done; } @@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && - tcp_try_coalesce(sk, RCV_QUEUE, tail, + tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b1266e75ca43..063a4bdb9ee6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -146,7 +146,6 @@ struct netem_sched_data { */ struct netem_skb_cb { psched_time_t time_to_send; - ktime_t tstamp_save; }; @@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } cb->time_to_send = now + delay; - cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -629,7 +627,10 @@ deliver: qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; - skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* From 69e33b2754ea4fbe4fdd2d18bc8ca05d8670a5c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 Sep 2017 14:42:17 +0200 Subject: [PATCH 0048/2177] selftests: rtnetlink.sh: add test case for device ifalias Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 57b5ff576240..4b48de565cae 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -15,6 +15,14 @@ check_err() fi } +# same but inverted -- used when command must fail for test to pass +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + kci_add_dummy() { ip link add name "$devdummy" type dummy @@ -235,6 +243,54 @@ kci_test_addrlabel() echo "PASS: ipv6 addrlabel" } +kci_test_ifalias() +{ + ret=0 + namewant=$(uuidgen) + syspathname="/sys/class/net/$devdummy/ifalias" + + ip link set dev "$devdummy" alias "$namewant" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: cannot set interface alias of $devdummy to $namewant" + return 1 + fi + + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + if [ -r "$syspathname" ] ; then + read namehave < "$syspathname" + if [ "$namewant" != "$namehave" ]; then + echo "FAIL: did set ifalias $namewant but got $namehave" + return 1 + fi + + namewant=$(uuidgen) + echo "$namewant" > "$syspathname" + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + # sysfs interface allows to delete alias again + echo "" > "$syspathname" + + ip link show "$devdummy" | grep -q "alias $namewant" + check_fail $? + + # re-add the alias -- kernel should free mem when dummy dev is removed + ip link set dev "$devdummy" alias "$namewant" + check_err $? + fi + + if [ $ret -ne 0 ]; then + echo "FAIL: set interface alias $devdummy to $namewant" + return 1 + fi + + echo "PASS: set ifalias $namewant for $devdummy" +} + kci_test_rtnl() { kci_add_dummy @@ -249,6 +305,7 @@ kci_test_rtnl() kci_test_gre kci_test_bridge kci_test_addrlabel + kci_test_ifalias kci_del_dummy } From f5619866592c65adc087364cc1a3ba709201ea26 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:57 -0400 Subject: [PATCH 0049/2177] net: dsa: remove copy of master ethtool_ops There is no need to store a copy of the master ethtool ops, storing the original pointer in DSA and the new one in the master netdev itself is enough. In the meantime, set orig_ethtool_ops to NULL when restoring the master ethtool ops and check the presence of the master original ethtool ops as well as its needed functions before calling them. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - net/dsa/dsa.c | 8 ++++---- net/dsa/slave.c | 19 +++++++++++-------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index dd44d6ce1097..8dee216a5a9b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -188,7 +188,6 @@ struct dsa_port { /* * Original copy of the master netdev ethtool_ops */ - struct ethtool_ops ethtool_ops; const struct ethtool_ops *orig_ethtool_ops; }; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 03c58b0eb082..abadf7b49236 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -124,11 +124,10 @@ int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) if (!cpu_ops) return -ENOMEM; - memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops, - sizeof(struct ethtool_ops)); cpu_dp->orig_ethtool_ops = master->ethtool_ops; - memcpy(cpu_ops, &cpu_dp->ethtool_ops, - sizeof(struct ethtool_ops)); + if (cpu_dp->orig_ethtool_ops) + memcpy(cpu_ops, cpu_dp->orig_ethtool_ops, sizeof(*cpu_ops)); + dsa_cpu_port_ethtool_init(cpu_ops); master->ethtool_ops = cpu_ops; @@ -138,6 +137,7 @@ int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) { cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; } void dsa_cpu_dsa_destroy(struct dsa_port *port) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2afa99506f8b..2ff4f907d137 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -574,12 +574,13 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; s8 cpu_port = cpu_dp->index; int count = 0; - if (cpu_dp->ethtool_ops.get_sset_count) { - count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data); + if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); } if (ds->ops->get_ethtool_stats) @@ -591,10 +592,11 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; int count = 0; - if (cpu_dp->ethtool_ops.get_sset_count) - count += cpu_dp->ethtool_ops.get_sset_count(dev, sset); + if (ops && ops->get_sset_count) + count += ops->get_sset_count(dev, sset); if (sset == ETH_SS_STATS && ds->ops->get_sset_count) count += ds->ops->get_sset_count(ds); @@ -608,6 +610,7 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; s8 cpu_port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; @@ -619,9 +622,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; - if (cpu_dp->ethtool_ops.get_sset_count) { - mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_strings(dev, stringset, data); + if (ops && ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_strings(dev, stringset, data); } if (stringset == ETH_SS_STATS && ds->ops->get_strings) { From cd8d7dd41bfddaa02e34db35abfab14ce4584dee Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:58 -0400 Subject: [PATCH 0050/2177] net: dsa: setup master ethtool unconditionally When a DSA switch tree is meant to be applied, it already has a CPU port. Thus remove the condition of dst->cpu_dp. Moreover, the next lines access dst->cpu_dp unconditionally. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 873af0108e24..bd19304f862f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -433,11 +433,9 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - if (dst->cpu_dp) { - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); - if (err) - return err; - } + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + if (err) + return err; /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -474,10 +472,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - if (dst->cpu_dp) { - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dst->cpu_dp = NULL; - } + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp = NULL; pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; From 1943563dfd4b3a1f9dc102f056813112d29bb60f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:59 -0400 Subject: [PATCH 0051/2177] net: dsa: setup master ethtool after dsa_ptr DSA overrides the master's ethtool ops so that we can inject its CPU port's statistics. Because of that, we need to setup the ethtool ops after the master's dsa_ptr pointer has been assigned, not before. This patch setups the ethtool ops after dsa_ptr is assigned, and restores them before it gets cleared. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 12 +++++++----- net/dsa/legacy.c | 10 +++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index bd19304f862f..032f8bc3e788 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -433,16 +433,17 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); - if (err) - return err; - /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. */ wmb(); dst->cpu_dp->netdev->dsa_ptr = dst; + + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + if (err) + return err; + dst->applied = true; return 0; @@ -456,6 +457,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype @@ -472,7 +475,6 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_dp); dst->cpu_dp = NULL; pr_info("DSA: tree %d unapplied\n", dst->tree); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 91e6f7981d39..163910699db7 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -206,10 +206,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); - ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp); - if (ret) - return ret; - return 0; } @@ -606,7 +602,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = dst; - return 0; + return dsa_cpu_port_ethtool_setup(dst->cpu_dp); } static int dsa_probe(struct platform_device *pdev) @@ -671,6 +667,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype @@ -686,8 +684,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dev_put(dst->cpu_dp->netdev); } From f2f2356685bcaf1063859356fc65a5ac808b1382 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:57:00 -0400 Subject: [PATCH 0052/2177] net: dsa: move master ethtool code DSA overrides the master device ethtool ops, so that it can inject stats from its dedicated switch CPU port as well. The related code is currently split in dsa.c and slave.c, but it only scopes the master net device. Move it to a new master.c DSA core file. This file will be later extented with master net device specific code. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/Makefile | 2 +- net/dsa/dsa.c | 28 ----------- net/dsa/dsa2.c | 4 +- net/dsa/dsa_priv.h | 7 +-- net/dsa/legacy.c | 4 +- net/dsa/master.c | 120 +++++++++++++++++++++++++++++++++++++++++++++ net/dsa/slave.c | 83 ------------------------------- 7 files changed, 129 insertions(+), 119 deletions(-) create mode 100644 net/dsa/master.c diff --git a/net/dsa/Makefile b/net/dsa/Makefile index fcce25da937c..2e7ac8bab19d 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index abadf7b49236..81c852e32821 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -112,34 +112,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) -{ - struct dsa_switch *ds = cpu_dp->ds; - struct net_device *master; - struct ethtool_ops *cpu_ops; - - master = cpu_dp->netdev; - - cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL); - if (!cpu_ops) - return -ENOMEM; - - cpu_dp->orig_ethtool_ops = master->ethtool_ops; - if (cpu_dp->orig_ethtool_ops) - memcpy(cpu_ops, cpu_dp->orig_ethtool_ops, sizeof(*cpu_ops)); - - dsa_cpu_port_ethtool_init(cpu_ops); - master->ethtool_ops = cpu_ops; - - return 0; -} - -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) -{ - cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; - cpu_dp->orig_ethtool_ops = NULL; -} - void dsa_cpu_dsa_destroy(struct dsa_port *port) { struct device_node *port_dn = port->dn; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 032f8bc3e788..dcccaebde708 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -440,7 +440,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) wmb(); dst->cpu_dp->netdev->dsa_ptr = dst; - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + err = dsa_master_ethtool_setup(dst->cpu_dp->netdev); if (err) return err; @@ -457,7 +457,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dsa_master_ethtool_restore(dst->cpu_dp->netdev); dst->cpu_dp->netdev->dsa_ptr = NULL; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9c3eeb72462d..f616b3444418 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -97,8 +97,6 @@ struct dsa_slave_priv { int dsa_cpu_dsa_setup(struct dsa_port *port); void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); bool dsa_schedule_work(struct work_struct *work); /* legacy.c */ @@ -112,6 +110,10 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); +/* master.c */ +int dsa_master_ethtool_setup(struct net_device *dev); +void dsa_master_ethtool_restore(struct net_device *dev); + /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); @@ -139,7 +141,6 @@ int dsa_port_vlan_del(struct dsa_port *dp, /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); int dsa_slave_create(struct dsa_port *port, const char *name); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 163910699db7..ae505d8e4417 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -602,7 +602,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = dst; - return dsa_cpu_port_ethtool_setup(dst->cpu_dp); + return dsa_master_ethtool_setup(dst->cpu_dp->netdev); } static int dsa_probe(struct platform_device *pdev) @@ -667,7 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dsa_master_ethtool_restore(dst->cpu_dp->netdev); dst->cpu_dp->netdev->dsa_ptr = NULL; diff --git a/net/dsa/master.c b/net/dsa/master.c new file mode 100644 index 000000000000..5e5147ec5a44 --- /dev/null +++ b/net/dsa/master.c @@ -0,0 +1,120 @@ +/* + * Handling of a master device, switching frames via its switch fabric CPU port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "dsa_priv.h" + +static void dsa_master_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int count = 0; + + if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); + } + + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, port->index, data + count); +} + +static int dsa_master_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int count = 0; + + if (ops && ops->get_sset_count) + count += ops->get_sset_count(dev, sset); + + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); + + return count; +} + +static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int len = ETH_GSTRING_LEN; + int mcount = 0, count; + unsigned int i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", port->index); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (ops && ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_strings(dev, stringset, data); + } + + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->ops->get_strings(ds, port->index, ndata); + count = ds->ops->get_sset_count(ds); + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + +int dsa_master_ethtool_setup(struct net_device *dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + struct ethtool_ops *ops; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + port->orig_ethtool_ops = dev->ethtool_ops; + if (port->orig_ethtool_ops) + memcpy(ops, port->orig_ethtool_ops, sizeof(*ops)); + + ops->get_sset_count = dsa_master_get_sset_count; + ops->get_ethtool_stats = dsa_master_get_ethtool_stats; + ops->get_strings = dsa_master_get_strings; + + dev->ethtool_ops = ops; + + return 0; +} + +void dsa_master_ethtool_restore(struct net_device *dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + + dev->ethtool_ops = port->orig_ethtool_ops; + port->orig_ethtool_ops = NULL; +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2ff4f907d137..d51b10450e1b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -567,82 +567,6 @@ static void dsa_slave_get_strings(struct net_device *dev, } } -static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - s8 cpu_port = cpu_dp->index; - int count = 0; - - if (ops && ops->get_sset_count && ops->get_ethtool_stats) { - count = ops->get_sset_count(dev, ETH_SS_STATS); - ops->get_ethtool_stats(dev, stats, data); - } - - if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, cpu_port, data + count); -} - -static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - int count = 0; - - if (ops && ops->get_sset_count) - count += ops->get_sset_count(dev, sset); - - if (sset == ETH_SS_STATS && ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds); - - return count; -} - -static void dsa_cpu_port_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - s8 cpu_port = cpu_dp->index; - int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; - uint8_t pfx[4]; - uint8_t *ndata; - - snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port); - /* We do not want to be NULL-terminated, since this is a prefix */ - pfx[sizeof(pfx) - 1] = '_'; - - if (ops && ops->get_sset_count && ops->get_strings) { - mcount = ops->get_sset_count(dev, ETH_SS_STATS); - ops->get_strings(dev, stringset, data); - } - - if (stringset == ETH_SS_STATS && ds->ops->get_strings) { - ndata = data + mcount * len; - /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle - * the output after to prepend our CPU port prefix we - * constructed earlier - */ - ds->ops->get_strings(ds, cpu_port, ndata); - count = ds->ops->get_sset_count(ds); - for (i = 0; i < count; i++) { - memmove(ndata + (i * len + sizeof(pfx)), - ndata + i * len, len - sizeof(pfx)); - memcpy(ndata + i * len, pfx, sizeof(pfx)); - } - } -} - static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -979,13 +903,6 @@ static void dsa_slave_get_stats64(struct net_device *dev, } } -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) -{ - ops->get_sset_count = dsa_cpu_port_get_sset_count; - ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats; - ops->get_strings = dsa_cpu_port_get_strings; -} - static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { From 7131cc9fc9fd3291bff0e0d3326a1b6983fa0f34 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:43 -0700 Subject: [PATCH 0053/2177] net: dsa: b53: Remove is_cpu_port() This is not used anywhere, so remove it. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 01bd8cbe9a3f..7528b22aeb03 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -186,11 +186,6 @@ static inline int is58xx(struct b53_device *dev) #define B53_CPU_PORT_25 5 #define B53_CPU_PORT 8 -static inline int is_cpu_port(struct b53_device *dev, int port) -{ - return dev->cpu_port; -} - struct b53_device *b53_switch_alloc(struct device *base, const struct b53_io_ops *ops, void *priv); From 299752a7d28654dd74304a76d3639e744df52084 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:44 -0700 Subject: [PATCH 0054/2177] net: dsa: b53: Make b53_enable_cpu_port() take a port argument In preparation for future changes allowing the configuring of multiple CPU ports, make b53_enable_cpu_port() take a port argument. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 274f3679f33d..d8bc54cfcfbe 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -538,19 +538,18 @@ static void b53_disable_port(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } -static void b53_enable_cpu_port(struct b53_device *dev) +static void b53_enable_cpu_port(struct b53_device *dev, int port) { - unsigned int cpu_port = dev->cpu_port; u8 port_ctrl; /* BCM5325 CPU port is at 8 */ - if ((is5325(dev) || is5365(dev)) && cpu_port == B53_CPU_PORT_25) - cpu_port = B53_CPU_PORT; + if ((is5325(dev) || is5365(dev)) && port == B53_CPU_PORT_25) + port = B53_CPU_PORT; port_ctrl = PORT_CTRL_RX_BCST_EN | PORT_CTRL_RX_MCST_EN | PORT_CTRL_RX_UCST_EN; - b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(cpu_port), port_ctrl); + b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); } static void b53_enable_mib(struct b53_device *dev) @@ -820,7 +819,7 @@ static int b53_setup(struct dsa_switch *ds) if (BIT(port) & ds->enabled_port_mask) b53_enable_port(ds, port, NULL); else if (dsa_is_cpu_port(ds, port)) - b53_enable_cpu_port(dev); + b53_enable_cpu_port(dev, port); else b53_disable_port(ds, port, NULL); } From 34c8befd1365eb38f8dd7f3f81086fa766915610 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:45 -0700 Subject: [PATCH 0055/2177] net: dsa: b53: Defer port enabling to calling port_enable There is no need to configure the enabled ports once in b53_setup() and then a second time around when dsa_switch_ops::port_enable is called, just do it when port_enable is called which is better in terms of power consumption and correctness. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d8bc54cfcfbe..3297af6aab8a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -815,12 +815,13 @@ static int b53_setup(struct dsa_switch *ds) if (ret) dev_err(ds->dev, "failed to apply configuration\n"); + /* Configure IMP/CPU port, disable unused ports. Enabled + * ports will be configured with .port_enable + */ for (port = 0; port < dev->num_ports; port++) { - if (BIT(port) & ds->enabled_port_mask) - b53_enable_port(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) b53_enable_cpu_port(dev, port); - else + else if (!(BIT(port) & ds->enabled_port_mask)) b53_disable_port(ds, port, NULL); } From e85ec74ace29acfda4afdfd56df122a0a62e6a1a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:46 -0700 Subject: [PATCH 0056/2177] net: dsa: bcm_sf2: Defer port enabling to calling port_enable There is no need to configure the enabled ports once in bcm_sf2_sw_setup() and then a second time around when dsa_switch_ops::port_enable is called, just do it when port_enable is called which is better in terms of power consumption and correctness. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d7b53d53c116..8acbd17bc1fd 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -890,14 +890,11 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; - /* Enable all valid ports and disable those unused */ + /* Disable unused ports and configure IMP port */ for (port = 0; port < priv->hw_params.num_ports; port++) { - /* IMP port receives special treatment */ - if ((1 << port) & ds->enabled_port_mask) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); - else + else if (!((1 << port) & ds->enabled_port_mask)) bcm_sf2_port_disable(ds, port, NULL); } From 5345862e9af02ce3780639bfe350ed2da9f8af13 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:47 -0700 Subject: [PATCH 0057/2177] net: dsa: b53: Use a macro to define I/O operations Instead of repeating the same pattern: acquire mutex, read/write, release mutex, define a macro: b53_build_op() which takes the type (read|write), I/O size, and value (scalar or pointer). This helps with fixing bugs that could exist (e.g: missing barrier, lock etc.). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 131 ++++++--------------------------- 1 file changed, 21 insertions(+), 110 deletions(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 7528b22aeb03..5bebe97900e8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -199,119 +199,30 @@ static inline void b53_switch_remove(struct b53_device *dev) dsa_unregister_switch(dev->ds); } -static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read8(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; +#define b53_build_op(type_op_size, val_type) \ +static inline int b53_##type_op_size(struct b53_device *dev, u8 page, \ + u8 reg, val_type val) \ +{ \ + int ret; \ + \ + mutex_lock(&dev->reg_mutex); \ + ret = dev->ops->type_op_size(dev, page, reg, val); \ + mutex_unlock(&dev->reg_mutex); \ + \ + return ret; \ } -static inline int b53_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val) -{ - int ret; +b53_build_op(read8, u8 *); +b53_build_op(read16, u16 *); +b53_build_op(read32, u32 *); +b53_build_op(read48, u64 *); +b53_build_op(read64, u64 *); - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read16(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read32(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read48(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read64(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write8(struct b53_device *dev, u8 page, u8 reg, u8 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write8(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write16(struct b53_device *dev, u8 page, u8 reg, - u16 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write16(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write32(struct b53_device *dev, u8 page, u8 reg, - u32 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write32(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write48(struct b53_device *dev, u8 page, u8 reg, - u64 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write48(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write64(struct b53_device *dev, u8 page, u8 reg, - u64 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write64(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} +b53_build_op(write8, u8); +b53_build_op(write16, u16); +b53_build_op(write32, u32); +b53_build_op(write48, u64); +b53_build_op(write64, u64); struct b53_arl_entry { u8 port; From b409a9efa183d92d99bc314fb26ebc1c2a8b4379 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:48 -0700 Subject: [PATCH 0058/2177] net: dsa: b53: Move Broadcom header setup to b53 The code to enable Broadcom tags/headers is largely switch independent, and in preparation for enabling it for multiple devices with b53, move the code we have in bcm_sf2.c to b53_common.c Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 47 ++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/b53/b53_regs.h | 7 +++++ drivers/net/dsa/bcm_sf2.c | 43 ++--------------------------- drivers/net/dsa/bcm_sf2_regs.h | 8 ------ 5 files changed, 57 insertions(+), 49 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 3297af6aab8a..aa2187c71ea5 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -538,6 +538,53 @@ static void b53_disable_port(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + u8 hdr_ctl, val; + u16 reg; + + /* Resolve which bit controls the Broadcom tag */ + switch (port) { + case 8: + val = BRCM_HDR_P8_EN; + break; + case 7: + val = BRCM_HDR_P7_EN; + break; + case 5: + val = BRCM_HDR_P5_EN; + break; + default: + val = 0; + break; + } + + /* Enable Broadcom tags for IMP port */ + b53_read8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, &hdr_ctl); + hdr_ctl |= val; + b53_write8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, hdr_ctl); + + /* Registers below are only accessible on newer devices */ + if (!is58xx(dev)) + return; + + /* Enable reception Broadcom tag for CPU TX (switch RX) to + * allow us to tag outgoing frames + */ + b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, ®); + reg &= ~BIT(port); + b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, reg); + + /* Enable transmission of Broadcom tags from the switch (CPU RX) to + * allow delivering frames to the per-port net_devices + */ + b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, ®); + reg &= ~BIT(port); + b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, reg); +} +EXPORT_SYMBOL(b53_brcm_hdr_setup); + static void b53_enable_cpu_port(struct b53_device *dev, int port) { u8 port_ctrl; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 5bebe97900e8..77102f685da0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -309,5 +309,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); #endif diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index e5c86d44667a..5e8b8e31fee8 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -210,6 +210,7 @@ #define B53_BRCM_HDR 0x03 #define BRCM_HDR_P8_EN BIT(0) /* Enable tagging on port 8 */ #define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */ +#define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */ /* Mirror capture control register (16 bit) */ #define B53_MIR_CAP_CTL 0x10 @@ -249,6 +250,12 @@ /* Revision ID register (8 bit) */ #define B53_REV_ID 0x40 +/* Broadcom header RX control (16 bit) */ +#define B53_BRCM_HDR_RX_DIS 0x60 + +/* Broadcom header TX control (16 bit) */ +#define B53_BRCM_HDR_TX_DIS 0x62 + /************************************************************************* * ARL Access Page Registers *************************************************************************/ diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 8acbd17bc1fd..49cb51223f70 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -60,45 +60,6 @@ static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } } -static void bcm_sf2_brcm_hdr_setup(struct bcm_sf2_priv *priv, int port) -{ - u32 reg, val; - - /* Resolve which bit controls the Broadcom tag */ - switch (port) { - case 8: - val = BRCM_HDR_EN_P8; - break; - case 7: - val = BRCM_HDR_EN_P7; - break; - case 5: - val = BRCM_HDR_EN_P5; - break; - default: - val = 0; - break; - } - - /* Enable Broadcom tags for IMP port */ - reg = core_readl(priv, CORE_BRCM_HDR_CTRL); - reg |= val; - core_writel(priv, reg, CORE_BRCM_HDR_CTRL); - - /* Enable reception Broadcom tag for CPU TX (switch RX) to - * allow us to tag outgoing frames - */ - reg = core_readl(priv, CORE_BRCM_HDR_RX_DIS); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_BRCM_HDR_RX_DIS); - - /* Enable transmission of Broadcom tags from the switch (CPU RX) to - * allow delivering frames to the per-port net_devices - */ - reg = core_readl(priv, CORE_BRCM_HDR_TX_DIS); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_BRCM_HDR_TX_DIS); -} static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { @@ -138,7 +99,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) reg |= i << (PRT_TO_QID_SHIFT * i); core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port)); - bcm_sf2_brcm_hdr_setup(priv, port); + b53_brcm_hdr_setup(ds, port); /* Force link status for IMP port */ reg = core_readl(priv, offset); @@ -247,7 +208,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, /* Enable Broadcom tags for that port if requested */ if (priv->brcm_tag_mask & BIT(port)) - bcm_sf2_brcm_hdr_setup(priv, port); + b53_brcm_hdr_setup(ds, port); /* Configure Traffic Class to QoS mapping, allow each priority to map * to a different queue number diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 49695fcc2ea8..788361ad68a0 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -205,16 +205,8 @@ enum bcm_sf2_reg_offs { #define CORE_IMP0_PRT_ID 0x0804 -#define CORE_BRCM_HDR_CTRL 0x0080c -#define BRCM_HDR_EN_P8 (1 << 0) -#define BRCM_HDR_EN_P5 (1 << 1) -#define BRCM_HDR_EN_P7 (1 << 2) - #define CORE_RST_MIB_CNT_EN 0x0950 -#define CORE_BRCM_HDR_RX_DIS 0x0980 -#define CORE_BRCM_HDR_TX_DIS 0x0988 - #define CORE_ARLA_VTBL_RWCTRL 0x1600 #define ARLA_VTBL_CMD_WRITE 0 #define ARLA_VTBL_CMD_READ 1 From 909d812a668e8950ed8bb33ecdc9afbcdb0d371b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:49 -0700 Subject: [PATCH 0059/2177] net: dsa: b53: Define EEE register page In preparation for migrating the EEE code from bcm_sf2 to b53, define the full EEE register page and offsets within that page. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_regs.h | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 5e8b8e31fee8..2a9f421680aa 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -50,6 +50,9 @@ /* Jumbo Frame Registers */ #define B53_JUMBO_PAGE 0x40 +/* EEE Control Registers Page */ +#define B53_EEE_PAGE 0x92 + /* CFP Configuration Registers Page */ #define B53_CFP_PAGE 0xa1 @@ -471,6 +474,44 @@ #define JMS_MIN_SIZE 1518 #define JMS_MAX_SIZE 9724 +/************************************************************************* + * EEE Configuration Page Registers + *************************************************************************/ + +/* EEE Enable control register (16 bit) */ +#define B53_EEE_EN_CTRL 0x00 + +/* EEE LPI assert status register (16 bit) */ +#define B53_EEE_LPI_ASSERT_STS 0x02 + +/* EEE LPI indicate status register (16 bit) */ +#define B53_EEE_LPI_INDICATE 0x4 + +/* EEE Receiving idle symbols status register (16 bit) */ +#define B53_EEE_RX_IDLE_SYM_STS 0x6 + +/* EEE Pipeline timer register (32 bit) */ +#define B53_EEE_PIP_TIMER 0xC + +/* EEE Sleep timer Gig register (32 bit) */ +#define B53_EEE_SLEEP_TIMER_GIG(i) (0x10 + 4 * (i)) + +/* EEE Sleep timer FE register (32 bit) */ +#define B53_EEE_SLEEP_TIMER_FE(i) (0x34 + 4 * (i)) + +/* EEE Minimum LP timer Gig register (32 bit) */ +#define B53_EEE_MIN_LP_TIMER_GIG(i) (0x58 + 4 * (i)) + +/* EEE Minimum LP timer FE register (32 bit) */ +#define B53_EEE_MIN_LP_TIMER_FE(i) (0x7c + 4 * (i)) + +/* EEE Wake timer Gig register (16 bit) */ +#define B53_EEE_WAKE_TIMER_GIG(i) (0xa0 + 2 * (i)) + +/* EEE Wake timer FE register (16 bit) */ +#define B53_EEE_WAKE_TIMER_FE(i) (0xb2 + 2 * (i)) + + /************************************************************************* * CFP Configuration Page Registers *************************************************************************/ From 22256b0afb12333571ad11799fa68fd27e4f4e80 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:50 -0700 Subject: [PATCH 0060/2177] net: dsa: b53: Move EEE functions to b53 Move the bcm_sf2 EEE-related functions to the b53 driver because this is shared code amongst Gigabit capable switch, only 5325 and 5365 are too old to support that. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 63 ++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 5 +++ drivers/net/dsa/bcm_sf2.c | 66 +++----------------------------- drivers/net/dsa/bcm_sf2.h | 2 - drivers/net/dsa/bcm_sf2_regs.h | 3 -- 5 files changed, 74 insertions(+), 65 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index aa2187c71ea5..491e4ffa8a0e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1531,6 +1531,69 @@ void b53_mirror_del(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_mirror_del); +void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) +{ + struct b53_device *dev = ds->priv; + u16 reg; + + b53_read16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, ®); + if (enable) + reg |= BIT(port); + else + reg &= ~BIT(port); + b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); +} +EXPORT_SYMBOL(b53_eee_enable_set); + + +/* Returns 0 if EEE was not enabled, or 1 otherwise + */ +int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) +{ + int ret; + + ret = phy_init_eee(phy, 0); + if (ret) + return 0; + + b53_eee_enable_set(ds, port, true); + + return 1; +} +EXPORT_SYMBOL(b53_eee_init); + +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +{ + struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; + u16 reg; + + if (is5325(dev) || is5365(dev)) + return -EOPNOTSUPP; + + b53_read16(dev, B53_EEE_PAGE, B53_EEE_LPI_INDICATE, ®); + e->eee_enabled = p->eee_enabled; + e->eee_active = !!(reg & BIT(port)); + + return 0; +} +EXPORT_SYMBOL(b53_get_mac_eee); + +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +{ + struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; + + if (is5325(dev) || is5365(dev)) + return -EOPNOTSUPP; + + p->eee_enabled = e->eee_enabled; + b53_eee_enable_set(ds, port, e->eee_enabled); + + return 0; +} +EXPORT_SYMBOL(b53_set_mac_eee); + static const struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 77102f685da0..aabe80eab25d 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -70,6 +70,7 @@ enum { struct b53_port { u16 vlan_ctl_mask; + struct ethtool_eee eee; }; struct b53_vlan { @@ -310,5 +311,9 @@ int b53_mirror_add(struct dsa_switch *ds, int port, void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); +void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable); +int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); #endif diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 49cb51223f70..4e8ef4c07eab 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -107,19 +107,6 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) core_writel(priv, reg, offset); } -static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - u32 reg; - - reg = core_readl(priv, CORE_EEE_EN_CTRL); - if (enable) - reg |= 1 << port; - else - reg &= ~(1 << port); - core_writel(priv, reg, CORE_EEE_EN_CTRL); -} - static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -256,8 +243,8 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, bcm_sf2_imp_vlan_setup(ds, cpu_port); /* If EEE was enabled, restore it */ - if (priv->port_sts[port].eee.eee_enabled) - bcm_sf2_eee_enable_set(ds, port, true); + if (priv->dev->ports[port].eee.eee_enabled) + b53_eee_enable_set(ds, port, true); return 0; } @@ -292,47 +279,6 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); } -/* Returns 0 if EEE was not enabled, or 1 otherwise - */ -static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, - struct phy_device *phy) -{ - int ret; - - ret = phy_init_eee(phy, 0); - if (ret) - return 0; - - bcm_sf2_eee_enable_set(ds, port, true); - - return 1; -} - -static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; - u32 reg; - - reg = core_readl(priv, CORE_EEE_LPI_INDICATE); - e->eee_enabled = p->eee_enabled; - e->eee_active = !!(reg & (1 << port)); - - return 0; -} - -static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; - - p->eee_enabled = e->eee_enabled; - bcm_sf2_eee_enable_set(ds, port, e->eee_enabled); - - return 0; -} static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr, int regnum, u16 val) @@ -567,7 +513,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; + struct ethtool_eee *p = &priv->dev->ports[port].eee; u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg, offset; @@ -649,7 +595,7 @@ force_link: core_writel(priv, reg, offset); if (!phydev->is_pseudo_fixed_link) - p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); + p->eee_enabled = b53_eee_init(ds, port, phydev); } static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, @@ -978,8 +924,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .set_wol = bcm_sf2_sw_set_wol, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, - .get_mac_eee = bcm_sf2_sw_get_mac_eee, - .set_mac_eee = bcm_sf2_sw_set_mac_eee, + .get_mac_eee = b53_get_mac_eee, + .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 02c499f9c56b..1922e027ff59 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -48,8 +48,6 @@ struct bcm_sf2_hw_params { struct bcm_sf2_port_status { unsigned int link; - - struct ethtool_eee eee; }; struct bcm_sf2_cfp_priv { diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 788361ad68a0..d8b8074a47b9 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -244,9 +244,6 @@ enum bcm_sf2_reg_offs { #define CORE_JOIN_ALL_VLAN_EN 0xd140 -#define CORE_EEE_EN_CTRL 0x24800 -#define CORE_EEE_LPI_INDICATE 0x24810 - #define CORE_CFP_ACC 0x28000 #define OP_STR_DONE (1 << 0) #define OP_SEL_SHIFT 1 From f43a2dbe9597038578171e70cb8fe751a0383fe2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:51 -0700 Subject: [PATCH 0061/2177] net: dsa: b53: Wire-up EEE Add support for enabling and disabling EEE, as well as re-negotiating it in .adjust_link() and in .port_enable(). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 491e4ffa8a0e..4e37ec27e496 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -523,6 +523,10 @@ static int b53_enable_port(struct dsa_switch *ds, int port, b53_imp_vlan_setup(ds, cpu_port); + /* If EEE was enabled, restore it */ + if (dev->ports[port].eee.eee_enabled) + b53_eee_enable_set(ds, port, true); + return 0; } @@ -879,6 +883,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; u8 rgmii_ctrl = 0, reg = 0, off; if (!phy_is_pseudo_fixed_link(phydev)) @@ -1000,6 +1005,9 @@ static void b53_adjust_link(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, po_reg, gmii_po); } } + + /* Re-negotiate EEE if it was enabled already */ + p->eee_enabled = b53_eee_init(ds, port, phydev); } int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) @@ -1605,6 +1613,8 @@ static const struct dsa_switch_ops b53_switch_ops = { .adjust_link = b53_adjust_link, .port_enable = b53_enable_port, .port_disable = b53_disable_port, + .get_mac_eee = b53_get_mac_eee, + .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, From aac028672cbea038f23e378ddb3af08c1dbe668c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:52 -0700 Subject: [PATCH 0062/2177] net: dsa: b53: Export b53_imp_vlan_setup() bcm_sf2 and b53 do exactly the same thing, so share that piece. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 ++- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 23 +---------------------- 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4e37ec27e496..c3f1cd2c33ea 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -484,7 +484,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid) return b53_flush_arl(dev, FAST_AGE_VLAN); } -static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) +void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { struct b53_device *dev = ds->priv; unsigned int i; @@ -500,6 +500,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), pvlan); } } +EXPORT_SYMBOL(b53_imp_vlan_setup); static int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index aabe80eab25d..8f4f83e2e4bd 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -284,6 +284,7 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev) #endif /* Exported functions towards other drivers */ +void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port); void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data); void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 4e8ef4c07eab..08639674947a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -40,27 +40,6 @@ static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_BRCM; } -static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int i; - u32 reg; - - /* Enable the IMP Port to be in the same VLAN as the other ports - * on a per-port basis such that we only have Port i and IMP in - * the same VLAN. - */ - for (i = 0; i < priv->hw_params.num_ports; i++) { - if (!((1 << i) & ds->enabled_port_mask)) - continue; - - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg |= (1 << cpu_port); - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - } -} - - static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -240,7 +219,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg |= priv->dev->ports[port].vlan_ctl_mask; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); - bcm_sf2_imp_vlan_setup(ds, cpu_port); + b53_imp_vlan_setup(ds, cpu_port); /* If EEE was enabled, restore it */ if (priv->dev->ports[port].eee.eee_enabled) From 152b6fd60ae075dbe41707ed9c7caddc18b03b35 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:53 -0700 Subject: [PATCH 0063/2177] net: dsa: bcm_sf2: Use SF2_NUM_EGRESS_QUEUES for CFP The magic number 8 in 3 locations in bcm_sf2_cfp.c actually designates the number of switch port egress queues, so use that define instead of open-coding it. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 8a1da7e67707..94649e1481ec 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -144,7 +144,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * destination port is enabled and that we are within the * number of ports supported by the switch */ - port_num = fs->ring_cookie / 8; + port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; if (fs->ring_cookie == RX_CLS_FLOW_DISC || !(BIT(port_num) & ds->enabled_port_mask) || @@ -280,7 +280,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * We have a small oddity where Port 6 just does not have a * valid bit here (so we subtract by one). */ - queue_num = fs->ring_cookie % 8; + queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES; if (port_num >= 7) port_num -= 1; @@ -401,7 +401,7 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, /* There is no Port 6, so we compensate for that here */ if (nfc->fs.ring_cookie >= 6) nfc->fs.ring_cookie++; - nfc->fs.ring_cookie *= 8; + nfc->fs.ring_cookie *= SF2_NUM_EGRESS_QUEUES; /* Extract the destination queue */ queue_num = (reg >> NEW_TC_SHIFT) & NEW_TC_MASK; From f86ad77faf248d6101394f8c79d03f16a9ea461d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:54 -0700 Subject: [PATCH 0064/2177] net: dsa: bcm_sf2: Utilize b53_{enable, disable}_port Export b53_{enable,disable}_port and use these two functions in bcm_sf2_port_setup and bcm_sf2_port_disable. The generic functions cannot be used without wrapping because we need to manage additional switch integration details (PHY, Broadcom tag etc.). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 8 ++++---- drivers/net/dsa/b53/b53_priv.h | 2 ++ drivers/net/dsa/bcm_sf2.c | 26 ++------------------------ 3 files changed, 8 insertions(+), 28 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c3f1cd2c33ea..a9f2a5b55a5e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -502,8 +502,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } EXPORT_SYMBOL(b53_imp_vlan_setup); -static int b53_enable_port(struct dsa_switch *ds, int port, - struct phy_device *phy) +int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; unsigned int cpu_port = dev->cpu_port; @@ -530,9 +529,9 @@ static int b53_enable_port(struct dsa_switch *ds, int port, return 0; } +EXPORT_SYMBOL(b53_enable_port); -static void b53_disable_port(struct dsa_switch *ds, int port, - struct phy_device *phy) +void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; u8 reg; @@ -542,6 +541,7 @@ static void b53_disable_port(struct dsa_switch *ds, int port, reg |= PORT_CTRL_RX_DISABLE | PORT_CTRL_TX_DISABLE; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +EXPORT_SYMBOL(b53_disable_port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) { diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 8f4f83e2e4bd..603c66d240d8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -311,6 +311,8 @@ int b53_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); +void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable); int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 08639674947a..0072a959db5b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -163,7 +163,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, struct phy_device *phy) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - s8 cpu_port = ds->dst->cpu_dp->index; unsigned int i; u32 reg; @@ -184,9 +183,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg |= i << (PRT_TO_QID_SHIFT * i); core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port)); - /* Clear the Rx and Tx disable bits and set to no spanning tree */ - core_writel(priv, 0, CORE_G_PCTL_PORT(port)); - /* Re-enable the GPHY and re-apply workarounds */ if (priv->int_phy_mask & 1 << port && priv->hw_params.num_gphy == 1) { bcm_sf2_gphy_enable_set(ds, true); @@ -209,23 +205,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, if (port == priv->moca_port) bcm_sf2_port_intr_enable(priv, port); - /* Set this port, and only this one to be in the default VLAN, - * if member of a bridge, restore its membership prior to - * bringing down this port. - */ - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - reg &= ~PORT_VLAN_CTRL_MASK; - reg |= (1 << port); - reg |= priv->dev->ports[port].vlan_ctl_mask; - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); - - b53_imp_vlan_setup(ds, cpu_port); - - /* If EEE was enabled, restore it */ - if (priv->dev->ports[port].eee.eee_enabled) - b53_eee_enable_set(ds, port, true); - - return 0; + return b53_enable_port(ds, port, phy); } static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, @@ -248,9 +228,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, else off = CORE_G_PCTL_PORT(port); - reg = core_readl(priv, off); - reg |= RX_DIS | TX_DIS; - core_writel(priv, reg, off); + b53_disable_port(ds, port, phy); /* Power down the port memory */ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); From 7e936bd73483eb1873b9ab6f375c9a1d781bc5cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:11 +0200 Subject: [PATCH 0065/2177] test_rhashtable: don't allocate huge static array Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 0ffca990a833..c40d6e636f33 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -72,8 +72,6 @@ struct thread_data { struct test_obj *objs; }; -static struct test_obj array[MAX_ENTRIES]; - static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), @@ -85,7 +83,7 @@ static struct rhashtable_params test_rht_params = { static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); -static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, +static int insert_retry(struct rhashtable *ht, struct test_obj *obj, const struct rhashtable_params params) { int err, retries = -1, enomem_retries = 0; @@ -93,7 +91,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, do { retries++; cond_resched(); - err = rhashtable_insert_fast(ht, obj, params); + err = rhashtable_insert_fast(ht, &obj->node, params); if (err == -ENOMEM && enomem_retry) { enomem_retries++; err = -EBUSY; @@ -107,7 +105,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, return err ? : retries; } -static int __init test_rht_lookup(struct rhashtable *ht) +static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) { unsigned int i; @@ -186,7 +184,7 @@ static void test_bucket_stats(struct rhashtable *ht) pr_warn("Test failed: Total count mismatch ^^^"); } -static s64 __init test_rhashtable(struct rhashtable *ht) +static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) { struct test_obj *obj; int err; @@ -203,7 +201,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht) struct test_obj *obj = &array[i]; obj->value.id = i * 2; - err = insert_retry(ht, &obj->node, test_rht_params); + err = insert_retry(ht, obj, test_rht_params); if (err > 0) insert_retries += err; else if (err) @@ -216,7 +214,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht) test_bucket_stats(ht); rcu_read_lock(); - test_rht_lookup(ht); + test_rht_lookup(ht, array); rcu_read_unlock(); test_bucket_stats(ht); @@ -286,7 +284,7 @@ static int threadfunc(void *data) for (i = 0; i < entries; i++) { tdata->objs[i].value.id = i; tdata->objs[i].value.tid = tdata->id; - err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params); + err = insert_retry(&ht, &tdata->objs[i], test_rht_params); if (err > 0) { insert_retries += err; } else if (err) { @@ -349,6 +347,10 @@ static int __init test_rht_init(void) test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); test_rht_params.nelem_hint = size; + objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj)); + if (!objs) + return -ENOMEM; + pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n", size, max_size, shrinking); @@ -356,7 +358,8 @@ static int __init test_rht_init(void) s64 time; pr_info("Test %02d:\n", i); - memset(&array, 0, sizeof(array)); + memset(objs, 0, test_rht_params.max_size * sizeof(struct test_obj)); + err = rhashtable_init(&ht, &test_rht_params); if (err < 0) { pr_warn("Test failed: Unable to initialize hashtable: %d\n", @@ -364,9 +367,10 @@ static int __init test_rht_init(void) continue; } - time = test_rhashtable(&ht); + time = test_rhashtable(&ht, objs); rhashtable_destroy(&ht); if (time < 0) { + vfree(objs); pr_warn("Test failed: return code %lld\n", time); return -EINVAL; } @@ -374,6 +378,7 @@ static int __init test_rht_init(void) total_time += time; } + vfree(objs); do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); From f651616e799ff01d1e21abc053ee1e23ac1a2344 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:12 +0200 Subject: [PATCH 0066/2177] test_rhashtable: don't use global entries variable pass the entries to test as an argument instead. Followup patch will add an rhlist test case; rhlist delete opererations are slow so we need to use a smaller number to test it. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index c40d6e636f33..69f5b3849980 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -28,9 +28,9 @@ #define MAX_ENTRIES 1000000 #define TEST_INSERT_FAIL INT_MAX -static int entries = 50000; -module_param(entries, int, 0); -MODULE_PARM_DESC(entries, "Number of entries to add (default: 50000)"); +static int parm_entries = 50000; +module_param(parm_entries, int, 0); +MODULE_PARM_DESC(parm_entries, "Number of entries to add (default: 50000)"); static int runs = 4; module_param(runs, int, 0); @@ -67,6 +67,7 @@ struct test_obj { }; struct thread_data { + unsigned int entries; int id; struct task_struct *task; struct test_obj *objs; @@ -105,11 +106,12 @@ static int insert_retry(struct rhashtable *ht, struct test_obj *obj, return err ? : retries; } -static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) +static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array, + unsigned int entries) { unsigned int i; - for (i = 0; i < entries * 2; i++) { + for (i = 0; i < entries; i++) { struct test_obj *obj; bool expected = !(i % 2); struct test_obj_val key = { @@ -142,7 +144,7 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) return 0; } -static void test_bucket_stats(struct rhashtable *ht) +static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) { unsigned int err, total = 0, chain_len = 0; struct rhashtable_iter hti; @@ -184,7 +186,8 @@ static void test_bucket_stats(struct rhashtable *ht) pr_warn("Test failed: Total count mismatch ^^^"); } -static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) +static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, + unsigned int entries) { struct test_obj *obj; int err; @@ -212,12 +215,12 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) pr_info(" %u insertions retried due to memory pressure\n", insert_retries); - test_bucket_stats(ht); + test_bucket_stats(ht, entries); rcu_read_lock(); - test_rht_lookup(ht, array); + test_rht_lookup(ht, array, entries); rcu_read_unlock(); - test_bucket_stats(ht); + test_bucket_stats(ht, entries); pr_info(" Deleting %d keys\n", entries); for (i = 0; i < entries; i++) { @@ -245,6 +248,7 @@ static struct rhashtable ht; static int thread_lookup_test(struct thread_data *tdata) { + unsigned int entries = tdata->entries; int i, err = 0; for (i = 0; i < entries; i++) { @@ -281,7 +285,7 @@ static int threadfunc(void *data) if (down_interruptible(&startup_sem)) pr_err(" thread[%d]: down_interruptible failed\n", tdata->id); - for (i = 0; i < entries; i++) { + for (i = 0; i < tdata->entries; i++) { tdata->objs[i].value.id = i; tdata->objs[i].value.tid = tdata->id; err = insert_retry(&ht, &tdata->objs[i], test_rht_params); @@ -305,7 +309,7 @@ static int threadfunc(void *data) } for (step = 10; step > 0; step--) { - for (i = 0; i < entries; i += step) { + for (i = 0; i < tdata->entries; i += step) { if (tdata->objs[i].value.id == TEST_INSERT_FAIL) continue; err = rhashtable_remove_fast(&ht, &tdata->objs[i].node, @@ -336,12 +340,16 @@ out: static int __init test_rht_init(void) { + unsigned int entries; int i, err, started_threads = 0, failed_threads = 0; u64 total_time = 0; struct thread_data *tdata; struct test_obj *objs; - entries = min(entries, MAX_ENTRIES); + if (parm_entries < 0) + parm_entries = 1; + + entries = min(parm_entries, MAX_ENTRIES); test_rht_params.automatic_shrinking = shrinking; test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); @@ -367,7 +375,7 @@ static int __init test_rht_init(void) continue; } - time = test_rhashtable(&ht, objs); + time = test_rhashtable(&ht, objs, entries); rhashtable_destroy(&ht); if (time < 0) { vfree(objs); @@ -409,6 +417,7 @@ static int __init test_rht_init(void) } for (i = 0; i < tcount; i++) { tdata[i].id = i; + tdata[i].entries = entries; tdata[i].objs = objs + i * entries; tdata[i].task = kthread_run(threadfunc, &tdata[i], "rhashtable_thrad[%d]", i); From a6359bd8dd1c3a15c09e7dbb533bf89d13b42acd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:13 +0200 Subject: [PATCH 0067/2177] test_rhashtable: add a check for max_size add a test that tries to insert more than max_size elements. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 69f5b3849980..1eee90e6e394 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -246,6 +246,43 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, static struct rhashtable ht; +static int __init test_rhashtable_max(struct test_obj *array, + unsigned int entries) +{ + unsigned int i, insert_retries = 0; + int err; + + test_rht_params.max_size = roundup_pow_of_two(entries / 8); + err = rhashtable_init(&ht, &test_rht_params); + if (err) + return err; + + for (i = 0; i < ht.max_elems; i++) { + struct test_obj *obj = &array[i]; + + obj->value.id = i * 2; + err = insert_retry(&ht, obj, test_rht_params); + if (err > 0) + insert_retries += err; + else if (err) + return err; + } + + err = insert_retry(&ht, &array[ht.max_elems], test_rht_params); + if (err == -E2BIG) { + err = 0; + } else { + pr_info("insert element %u should have failed with %d, got %d\n", + ht.max_elems, -E2BIG, err); + if (err == 0) + err = -1; + } + + rhashtable_destroy(&ht); + + return err; +} + static int thread_lookup_test(struct thread_data *tdata) { unsigned int entries = tdata->entries; @@ -386,7 +423,11 @@ static int __init test_rht_init(void) total_time += time; } + pr_info("test if its possible to exceed max_size %d: %s\n", + test_rht_params.max_size, test_rhashtable_max(objs, entries) == 0 ? + "no, ok" : "YES, failed"); vfree(objs); + do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); From cdd4de372ea06a18e104100b9f2c442527d26db1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:14 +0200 Subject: [PATCH 0068/2177] test_rhashtable: add test case for rhl_table interface also test rhltable. rhltable remove operations are slow as deletions require a list walk, thus test with 1/16th of the given entry count number to get a run duration similar to rhashtable one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 196 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 194 insertions(+), 2 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1eee90e6e394..de4d0584631a 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #define MAX_ENTRIES 1000000 @@ -66,6 +67,11 @@ struct test_obj { struct rhash_head node; }; +struct test_obj_rhl { + struct test_obj_val value; + struct rhlist_head list_node; +}; + struct thread_data { unsigned int entries; int id; @@ -245,6 +251,186 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, } static struct rhashtable ht; +static struct rhltable rhlt __initdata; + +static int __init test_rhltable(unsigned int entries) +{ + struct test_obj_rhl *rhl_test_objects; + unsigned long *obj_in_table; + unsigned int i, j, k; + int ret, err; + + if (entries == 0) + entries = 1; + + rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries); + if (!rhl_test_objects) + return -ENOMEM; + + ret = -ENOMEM; + obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long)); + if (!obj_in_table) + goto out_free; + + /* nulls_base not supported in rhlist interface */ + test_rht_params.nulls_base = 0; + err = rhltable_init(&rhlt, &test_rht_params); + if (WARN_ON(err)) + goto out_free; + + k = prandom_u32(); + ret = 0; + for (i = 0; i < entries; i++) { + rhl_test_objects[i].value.id = k; + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, + test_rht_params); + if (WARN(err, "error %d on element %d\n", err, i)) + break; + if (err == 0) + set_bit(i, obj_in_table); + } + + if (err) + ret = err; + + pr_info("test %d add/delete pairs into rhlist\n", entries); + for (i = 0; i < entries; i++) { + struct rhlist_head *h, *pos; + struct test_obj_rhl *obj; + struct test_obj_val key = { + .id = k, + }; + bool found; + + rcu_read_lock(); + h = rhltable_lookup(&rhlt, &key, test_rht_params); + if (WARN(!h, "key not found during iteration %d of %d", i, entries)) { + rcu_read_unlock(); + break; + } + + if (i) { + j = i - 1; + rhl_for_each_entry_rcu(obj, pos, h, list_node) { + if (WARN(pos == &rhl_test_objects[j].list_node, "old element found, should be gone")) + break; + } + } + + cond_resched_rcu(); + + found = false; + + rhl_for_each_entry_rcu(obj, pos, h, list_node) { + if (pos == &rhl_test_objects[i].list_node) { + found = true; + break; + } + } + + rcu_read_unlock(); + + if (WARN(!found, "element %d not found", i)) + break; + + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "rhltable_remove: err %d for iteration %d\n", err, i); + if (err == 0) + clear_bit(i, obj_in_table); + } + + if (ret == 0 && err) + ret = err; + + for (i = 0; i < entries; i++) { + WARN(test_bit(i, obj_in_table), "elem %d allegedly still present", i); + + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, + test_rht_params); + if (WARN(err, "error %d on element %d\n", err, i)) + break; + if (err == 0) + set_bit(i, obj_in_table); + } + + pr_info("test %d random rhlist add/delete operations\n", entries); + for (j = 0; j < entries; j++) { + u32 i = prandom_u32_max(entries); + u32 prand = prandom_u32(); + + cond_resched(); + + if (prand == 0) + prand = prandom_u32(); + + if (prand & 1) { + prand >>= 1; + continue; + } + + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (test_bit(i, obj_in_table)) { + clear_bit(i, obj_in_table); + if (WARN(err, "cannot remove element at slot %d", i)) + continue; + } else { + if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d", + i, err, -ENOENT)) + continue; + } + + if (prand & 1) { + prand >>= 1; + continue; + } + + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (err == 0) { + if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i)) + continue; + } else { + if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i)) + continue; + } + + if (prand & 1) { + prand >>= 1; + continue; + } + + i = prandom_u32_max(entries); + if (test_bit(i, obj_in_table)) { + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "cannot remove element at slot %d", i); + if (err == 0) + clear_bit(i, obj_in_table); + } else { + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "failed to insert object %d", i); + if (err == 0) + set_bit(i, obj_in_table); + } + } + + for (i = 0; i < entries; i++) { + cond_resched(); + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (test_bit(i, obj_in_table)) { + if (WARN(err, "cannot remove element at slot %d", i)) + continue; + } else { + if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d", + err, -ENOENT)) + continue; + } + } + + rhltable_destroy(&rhlt); +out_free: + vfree(rhl_test_objects); + vfree(obj_in_table); + return ret; +} static int __init test_rhashtable_max(struct test_obj *array, unsigned int entries) @@ -480,11 +666,17 @@ static int __init test_rht_init(void) failed_threads++; } } - pr_info("Started %d threads, %d failed\n", - started_threads, failed_threads); rhashtable_destroy(&ht); vfree(tdata); vfree(objs); + + /* + * rhltable_remove is very expensive, default values can cause test + * to run for 2 minutes or more, use a smaller number instead. + */ + err = test_rhltable(entries / 16); + pr_info("Started %d threads, %d failed, rhltable test returns %d\n", + started_threads, failed_threads, err); return 0; } From bd7d2106b63adfd0dfd08331344e356461c29d70 Mon Sep 17 00:00:00 2001 From: Jim Hanko Date: Tue, 19 Sep 2017 11:33:39 -0700 Subject: [PATCH 0069/2177] team: fall back to hash if table entry is empty If the hash to port mapping table does not have a valid port (i.e. when a port goes down), fall back to the simple hashing mechanism to avoid dropping packets. Signed-off-by: Jim Hanko Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team_mode_loadbalance.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 1468ddf424cc..a5ef97010eb3 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -137,7 +137,13 @@ static struct team_port *lb_htpm_select_tx_port(struct team *team, struct sk_buff *skb, unsigned char hash) { - return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); + struct team_port *port; + + port = rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); + if (likely(port)) + return port; + /* If no valid port in the table, fall back to simple hash */ + return lb_hash_select_tx_port(team, lb_priv, skb, hash); } struct lb_select_tx_port { From 752fbcc33405d6f8249465e4b2c4e420091bb825 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 Sep 2017 13:15:42 -0700 Subject: [PATCH 0070/2177] net_sched: no need to free qdisc in RCU callback gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller no longer needs to wait for a grace period. So this patch gets rid of it. Cc: Jamal Hadi Salim Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - net/sched/sch_generic.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..684d8ed27eaa 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -93,7 +93,6 @@ struct Qdisc { unsigned long state; struct Qdisc *next_sched; struct sk_buff *skb_bad_txq; - struct rcu_head rcu_head; int padded; refcount_t refcnt; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 92237e75dbbc..1fb0c754b7fd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -688,10 +688,8 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_rcu_free(struct rcu_head *head) +static void qdisc_free(struct Qdisc *qdisc) { - struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); free_percpu(qdisc->cpu_qstats); @@ -724,11 +722,7 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(qdisc->gso_skb); kfree_skb(qdisc->skb_bad_txq); - /* - * gen_estimator est_timer() might access qdisc->q.lock, - * wait a RCU grace period before freeing qdisc. - */ - call_rcu(&qdisc->rcu_head, qdisc_rcu_free); + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); From 16dff336b33d87c15d9cbe933cfd275aae2a8251 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:03 -0700 Subject: [PATCH 0071/2177] kobject: add kobject_uevent_net_broadcast() This removes some #ifdef pollution and will ease follow up patches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 96 ++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index e590523ea476..4f48cc3b11d5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -294,6 +294,57 @@ static void cleanup_uevent_env(struct subprocess_info *info) } #endif +static int kobject_uevent_net_broadcast(struct kobject *kobj, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + int retval = 0; +#if defined(CONFIG_NET) + struct uevent_sock *ue_sk; + + /* send netlink message */ + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; + struct sk_buff *skb; + size_t len; + + if (!netlink_has_listeners(uevent_sock, 1)) + continue; + + /* allocate message with the maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (skb) { + char *scratch; + int i; + + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + /* copy keys to our continuous event payload buffer */ + for (i = 0; i < env->envp_idx; i++) { + len = strlen(env->envp[i]) + 1; + scratch = skb_put(skb, len); + strcpy(scratch, env->envp[i]); + } + + NETLINK_CB(skb).dst_group = 1; + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS || retval == -ESRCH) + retval = 0; + } else + retval = -ENOMEM; + } +#endif + return retval; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -316,9 +367,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, const struct kset_uevent_ops *uevent_ops; int i = 0; int retval = 0; -#ifdef CONFIG_NET - struct uevent_sock *ue_sk; -#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -427,46 +475,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, mutex_unlock(&uevent_sock_mutex); goto exit; } - -#if defined(CONFIG_NET) - /* send netlink message */ - list_for_each_entry(ue_sk, &uevent_sock_list, list) { - struct sock *uevent_sock = ue_sk->sk; - struct sk_buff *skb; - size_t len; - - if (!netlink_has_listeners(uevent_sock, 1)) - continue; - - /* allocate message with the maximum possible size */ - len = strlen(action_string) + strlen(devpath) + 2; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); - if (skb) { - char *scratch; - - /* add header */ - scratch = skb_put(skb, len); - sprintf(scratch, "%s@%s", action_string, devpath); - - /* copy keys to our continuous event payload buffer */ - for (i = 0; i < env->envp_idx; i++) { - len = strlen(env->envp[i]) + 1; - scratch = skb_put(skb, len); - strcpy(scratch, env->envp[i]); - } - - NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast_filtered(uevent_sock, skb, - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); - /* ENOBUFS should be handled in userspace */ - if (retval == -ENOBUFS || retval == -ESRCH) - retval = 0; - } else - retval = -ENOMEM; - } -#endif + retval = kobject_uevent_net_broadcast(kobj, env, action_string, + devpath); mutex_unlock(&uevent_sock_mutex); #ifdef CONFIG_UEVENT_HELPER From 4a336a23d619e96aef37d4d054cfadcdd1b581ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:04 -0700 Subject: [PATCH 0072/2177] kobject: copy env blob in one go No need to iterate over strings, just copy in one efficient memcpy() call. Tested: time perf record "(for f in `seq 1 3000` ; do ip netns add tast$f; done)" [ perf record: Woken up 10 times to write data ] [ perf record: Captured and wrote 8.224 MB perf.data (~359301 samples) ] real 0m52.554s # instead of 1m7.492s user 0m0.309s sys 0m51.375s # instead of 1m6.875s 9.88% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 8.86% ip [kernel.kallsyms] [k] string 7.37% ip [kernel.kallsyms] [k] __ip6addrlbl_add 5.68% ip [kernel.kallsyms] [k] netlink_has_listeners 5.52% ip [kernel.kallsyms] [k] memcpy_erms 4.76% ip [kernel.kallsyms] [k] __alloc_skb 4.54% ip [kernel.kallsyms] [k] vsnprintf 3.94% ip [kernel.kallsyms] [k] format_decode 3.80% ip [kernel.kallsyms] [k] kmem_cache_alloc_node_trace 3.71% ip [kernel.kallsyms] [k] kmem_cache_alloc_node 3.66% ip [kernel.kallsyms] [k] kobject_uevent_env 3.38% ip [kernel.kallsyms] [k] strlen 2.65% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 2.20% ip [kernel.kallsyms] [k] kfree 2.09% ip [kernel.kallsyms] [k] memset_erms 2.07% ip [kernel.kallsyms] [k] ___cache_free 1.95% ip [kernel.kallsyms] [k] kmem_cache_free 1.91% ip [kernel.kallsyms] [k] _raw_read_lock 1.45% ip [kernel.kallsyms] [k] ksize 1.25% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.00% ip [kernel.kallsyms] [k] widen_string Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 4f48cc3b11d5..78b2a7e378c0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -317,18 +317,12 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, skb = alloc_skb(len + env->buflen, GFP_KERNEL); if (skb) { char *scratch; - int i; /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); - /* copy keys to our continuous event payload buffer */ - for (i = 0; i < env->envp_idx; i++) { - len = strlen(env->envp[i]) + 1; - scratch = skb_put(skb, len); - strcpy(scratch, env->envp[i]); - } + skb_put_data(skb, env->buf, env->buflen); NETLINK_CB(skb).dst_group = 1; retval = netlink_broadcast_filtered(uevent_sock, skb, From d464e84eed02993d40ad55fdc19f4523e4deee5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:05 -0700 Subject: [PATCH 0073/2177] kobject: factorize skb setup in kobject_uevent_net_broadcast() We can build one skb and let it be cloned in netlink. This is much faster, and use less memory (all clones will share the same skb->head) Tested: time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.110 MB perf.data (~179584 samples) ] real 0m24.227s # instead of 0m52.554s user 0m0.329s sys 0m23.753s # instead of 0m51.375s 14.77% ip [kernel.kallsyms] [k] __ip6addrlbl_add 14.56% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 11.65% ip [kernel.kallsyms] [k] netlink_has_listeners 6.19% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.66% ip [kernel.kallsyms] [k] kobject_uevent_env 4.97% ip [kernel.kallsyms] [k] memset_erms 4.67% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.41% ip [kernel.kallsyms] [k] _raw_read_lock 3.59% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.13% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.55% ip [kernel.kallsyms] [k] __wake_up 1.20% ip [kernel.kallsyms] [k] strlen 1.03% ip [kernel.kallsyms] [k] __wake_up_common 0.93% ip [kernel.kallsyms] [k] consume_skb 0.92% ip [kernel.kallsyms] [k] netlink_trim 0.87% ip [kernel.kallsyms] [k] insert_header 0.63% ip [kernel.kallsyms] [k] unmap_page_range Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 78b2a7e378c0..147db91c10d0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -301,23 +301,26 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, { int retval = 0; #if defined(CONFIG_NET) + struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; /* send netlink message */ list_for_each_entry(ue_sk, &uevent_sock_list, list) { struct sock *uevent_sock = ue_sk->sk; - struct sk_buff *skb; - size_t len; if (!netlink_has_listeners(uevent_sock, 1)) continue; - /* allocate message with the maximum possible size */ - len = strlen(action_string) + strlen(devpath) + 2; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); - if (skb) { + if (!skb) { + /* allocate message with the maximum possible size */ + size_t len = strlen(action_string) + strlen(devpath) + 2; char *scratch; + retval = -ENOMEM; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (!skb) + continue; + /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); @@ -325,16 +328,17 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, skb_put_data(skb, env->buf, env->buflen); NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast_filtered(uevent_sock, skb, - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); - /* ENOBUFS should be handled in userspace */ - if (retval == -ENOBUFS || retval == -ESRCH) - retval = 0; - } else - retval = -ENOMEM; + } + + retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb), + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS || retval == -ESRCH) + retval = 0; } + consume_skb(skb); #endif return retval; } From a90c9347e90ed1e9323d71402ed18023bc910cd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:06 -0700 Subject: [PATCH 0074/2177] ipv6: addrlabel: per netns list Having a global list of labels do not scale to thousands of netns in the cloud era. This causes quadratic behavior on netns creation and deletion. This is time having a per netns list of ~10 labels. Tested: $ time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 3.637 MB perf.data (~158898 samples) ] real 0m20.837s # instead of 0m24.227s user 0m0.328s sys 0m20.338s # instead of 0m23.753s 16.17% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 12.30% ip [kernel.kallsyms] [k] netlink_has_listeners 6.76% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.78% ip [kernel.kallsyms] [k] memset_erms 5.77% ip [kernel.kallsyms] [k] kobject_uevent_env 5.18% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.96% ip [kernel.kallsyms] [k] _raw_read_lock 3.82% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.33% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 2.11% ip [kernel.kallsyms] [k] unmap_page_range 1.77% ip [kernel.kallsyms] [k] __wake_up 1.69% ip [kernel.kallsyms] [k] strlen 1.17% ip [kernel.kallsyms] [k] __wake_up_common 1.09% ip [kernel.kallsyms] [k] insert_header 1.04% ip [kernel.kallsyms] [k] page_remove_rmap 1.01% ip [kernel.kallsyms] [k] consume_skb 0.98% ip [kernel.kallsyms] [k] netlink_trim 0.51% ip [kernel.kallsyms] [k] kernfs_link_sibling 0.51% ip [kernel.kallsyms] [k] filemap_map_pages 0.46% ip [kernel.kallsyms] [k] memcpy_erms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 5 +++ net/ipv6/addrlabel.c | 81 +++++++++++++++------------------------- 2 files changed, 35 insertions(+), 51 deletions(-) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2544f9760a42..2ea1ed341ef8 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -89,6 +89,11 @@ struct netns_ipv6 { atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; + struct { + struct hlist_head head; + spinlock_t lock; + u32 seq; + } ip6addrlbl_table; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b055bc79f56d..c6311d7108f6 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -30,7 +30,6 @@ * Policy Table */ struct ip6addrlbl_entry { - possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -41,19 +40,6 @@ struct ip6addrlbl_entry { struct rcu_head rcu; }; -static struct ip6addrlbl_table -{ - struct hlist_head head; - spinlock_t lock; - u32 seq; -} ip6addrlbl_table; - -static inline -struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) -{ - return read_pnet(&lbl->lbl_net); -} - /* * Default policy table (RFC6724 + extensions) * @@ -148,13 +134,10 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) } /* Find label */ -static bool __ip6addrlbl_match(struct net *net, - const struct ip6addrlbl_entry *p, +static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { - if (!net_eq(ip6addrlbl_net(p), net)) - return false; if (p->ifindex && p->ifindex != ifindex) return false; if (p->addrtype && p->addrtype != addrtype) @@ -169,8 +152,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, int type, int ifindex) { struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (__ip6addrlbl_match(net, p, addr, type, ifindex)) + + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (__ip6addrlbl_match(p, addr, type, ifindex)) return p; } return NULL; @@ -196,8 +180,7 @@ u32 ipv6_addr_label(struct net *net, } /* allocate one entry */ -static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, - const struct in6_addr *prefix, +static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { @@ -236,24 +219,23 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); - write_pnet(&newp->lbl_net, net); refcount_set(&newp->refcnt, 1); return newp; } /* add a label */ -static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, + int replace) { - struct hlist_node *n; struct ip6addrlbl_entry *last = NULL, *p = NULL; + struct hlist_node *n; int ret = 0; ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, replace); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { @@ -273,10 +255,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (last) hlist_add_behind_rcu(&newp->list, &last->list); else - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); + hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head); out: if (!ret) - ip6addrlbl_table.seq++; + net->ipv6.ip6addrlbl_table.seq++; return ret; } @@ -292,12 +274,12 @@ static int ip6addrlbl_add(struct net *net, __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); - newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); + newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); - spin_lock(&ip6addrlbl_table.lock); - ret = __ip6addrlbl_add(newp, replace); - spin_unlock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + ret = __ip6addrlbl_add(net, newp, replace); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); if (ret) ip6addrlbl_free(newp); return ret; @@ -315,9 +297,8 @@ static int __ip6addrlbl_del(struct net *net, ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && - net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); @@ -340,9 +321,9 @@ static int ip6addrlbl_del(struct net *net, __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); - spin_lock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); return ret; } @@ -354,6 +335,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net) ADDRLABEL(KERN_DEBUG "%s\n", __func__); + spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); + INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); + for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { int ret = ip6addrlbl_add(net, ip6addrlbl_init_table[i].prefix, @@ -373,14 +357,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) struct hlist_node *n; /* Remove all labels belonging to the exiting net */ - spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { - if (net_eq(ip6addrlbl_net(p), net)) { - hlist_del_rcu(&p->list); - ip6addrlbl_put(p); - } + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); } - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); } static struct pernet_operations ipv6_addr_label_ops = { @@ -390,8 +372,6 @@ static struct pernet_operations ipv6_addr_label_ops = { int __init ipv6_addr_label_init(void) { - spin_lock_init(&ip6addrlbl_table.lock); - return register_pernet_subsys(&ipv6_addr_label_ops); } @@ -510,11 +490,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (idx >= s_idx && - net_eq(ip6addrlbl_net(p), net)) { + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (idx >= s_idx) { err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, + net->ipv6.ip6addrlbl_table.seq, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, @@ -571,7 +550,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); if (p && !ip6addrlbl_hold(p)) p = NULL; - lseq = ip6addrlbl_table.seq; + lseq = net->ipv6.ip6addrlbl_table.seq; rcu_read_unlock(); if (!p) { From 789e6ddb0b2fb5d5024b760b178a47876e4de7a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:07 -0700 Subject: [PATCH 0075/2177] tcp: batch tcp_net_metrics_exit When dealing with a list of dismantling netns, we can scan tcp_metrics once, saving cpu cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 102b2c90bb80..0ab78abc811b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -892,10 +892,14 @@ static void tcp_metrics_flush_all(struct net *net) for (row = 0; row < max_rows; row++, hb++) { struct tcp_metrics_block __rcu **pp; + bool match; + spin_lock_bh(&tcp_metrics_lock); pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { - if (net_eq(tm_net(tm), net)) { + match = net ? net_eq(tm_net(tm), net) : + !atomic_read(&tm_net(tm)->count); + if (match) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); } else { @@ -1018,14 +1022,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) return 0; } -static void __net_exit tcp_net_metrics_exit(struct net *net) +static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list) { - tcp_metrics_flush_all(net); + tcp_metrics_flush_all(NULL); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { - .init = tcp_net_metrics_init, - .exit = tcp_net_metrics_exit, + .init = tcp_net_metrics_init, + .exit_batch = tcp_net_metrics_exit_batch, }; void __init tcp_metrics_init(void) From bb401caefe9d2c65e0c0fa23b21deecfbfa473fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:08 -0700 Subject: [PATCH 0076/2177] ipv6: speedup ipv6 tunnels dismantle Implement exit_batch() method to dismantle more devices per round. (rtnl_lock() ... unregister_netdevice_many() ... rtnl_unlock()) Tested: $ cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait ; grep net_namespace /proc/slabinfo Before patch : $ time ./add_del_unshare.sh net_namespace 110 267 5504 1 2 : tunables 8 4 0 : slabdata 110 267 0 real 3m25.292s user 0m0.644s sys 0m40.153s After patch: $ time ./add_del_unshare.sh net_namespace 126 282 5504 1 2 : tunables 8 4 0 : slabdata 126 282 0 real 1m38.965s user 0m0.688s sys 0m37.017s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++++--- net/ipv6/ip6_tunnel.c | 20 +++++++++++--------- net/ipv6/ip6_vti.c | 23 ++++++++++++++--------- net/ipv6/sit.c | 9 ++++++--- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b7a72d409334..c82d41ef25e2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1155,19 +1155,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_net(struct net *net) +static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) { + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip6gre_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit = ip6gre_exit_net, + .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ae73164559d5..3d6df489b39f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2167,17 +2167,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; - LIST_HEAD(list); for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6_link_ops) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); @@ -2186,12 +2185,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } - - unregister_netdevice_many(&list); } static int __net_init ip6_tnl_init_net(struct net *net) @@ -2235,16 +2232,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_net(struct net *net) +static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) { + struct net *net; + LIST_HEAD(list); + rtnl_lock(); - ip6_tnl_destroy_tunnels(net); + list_for_each_entry(net, net_list, exit_list) + ip6_tnl_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit = ip6_tnl_exit_net, + .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 79444a4bfd6d..714914d1bb98 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1052,23 +1052,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; -static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, + struct list_head *list) { int h; struct ip6_tnl *t; - LIST_HEAD(list); for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); - unregister_netdevice_many(&list); + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) @@ -1108,18 +1107,24 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_net(struct net *net) +static void __net_exit vti6_exit_batch_net(struct list_head *net_list) { - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + struct net *net; + LIST_HEAD(list); rtnl_lock(); - vti6_destroy_tunnels(ip6n); + list_for_each_entry(net, net_list, exit_list) { + ip6n = net_generic(net, vti6_net_id); + vti6_destroy_tunnels(ip6n, &list); + } + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit = vti6_exit_net, + .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac912bb21747..a799f5258614 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1848,19 +1848,22 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_net(struct net *net) +static void __net_exit sit_exit_batch_net(struct list_head *net_list) { LIST_HEAD(list); + struct net *net; rtnl_lock(); - sit_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + sit_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit = sit_exit_net, + .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), }; From 64bc17811b72758753e2b64cd8f2a63812c61fe1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:09 -0700 Subject: [PATCH 0077/2177] ipv4: speedup ipv6 tunnels dismantle Implement exit_batch() method to dismantle more devices per round. (rtnl_lock() ... unregister_netdevice_many() ... rtnl_unlock()) Tested: $ cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait ; grep net_namespace /proc/slabinfo Before patch : $ time ./add_del_unshare.sh net_namespace 126 282 5504 1 2 : tunables 8 4 0 : slabdata 126 282 0 real 1m38.965s user 0m0.688s sys 0m37.017s After patch: $ time ./add_del_unshare.sh net_namespace 135 291 5504 1 2 : tunables 8 4 0 : slabdata 135 291 0 real 0m22.117s user 0m0.728s sys 0m35.328s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 22 +++++++++------------- net/ipv4/ip_tunnel.c | 12 +++++++++--- net/ipv4/ip_vti.c | 7 +++---- net/ipv4/ipip.c | 7 +++---- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 992652856fe8..b41a1e057fce 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -258,7 +258,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); +void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, + struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0162fb955b33..9cee986ac6b8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1013,15 +1013,14 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_net(struct net *net) +static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); - ip_tunnel_delete_net(itn, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit = ipgre_exit_net, + .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1540,15 +1539,14 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_net(struct net *net) +static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); - ip_tunnel_delete_net(itn, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit = ipgre_tap_exit_net, + .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1559,16 +1557,14 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_net(struct net *net) +static void __net_exit erspan_exit_batch_net(struct list_head *net_list) { - struct ip_tunnel_net *itn = net_generic(net, erspan_net_id); - - ip_tunnel_delete_net(itn, &erspan_link_ops); + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit = erspan_exit_net, + .exit_batch = erspan_exit_batch_net, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e9805ad664ac..fe6fee728ce4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, } } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) +void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, + struct rtnl_link_ops *ops) { + struct ip_tunnel_net *itn; + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list, ops); + list_for_each_entry(net, net_list, exit_list) { + itn = net_generic(net, id); + ip_tunnel_destroy(itn, &list, ops); + } unregister_netdevice_many(&list); rtnl_unlock(); } -EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); +EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5ed63d250950..02d70ca99db1 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -452,15 +452,14 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_net(struct net *net) +static void __net_exit vti_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - ip_tunnel_delete_net(itn, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit = vti_exit_net, + .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22b5e29..1e47818e38c7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -634,15 +634,14 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_net(struct net *net) +static void __net_exit ipip_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); - ip_tunnel_delete_net(itn, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit = ipip_exit_net, + .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; From 91f4aa977947f046dc144fa9e3b06f0ffb53be79 Mon Sep 17 00:00:00 2001 From: Diogenes Pereira Date: Wed, 9 Aug 2017 13:19:24 -0300 Subject: [PATCH 0078/2177] mac802154: replace hardcoded value with macro Use IEEE802154_SCF_SECLEVEL_NONE macro defined at ieee802154.h file. Signed-off-by: Diogenes Pereira Signed-off-by: Stefan Schmidt --- net/mac802154/llsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 1e1c9b20bab7..edec2f9919d0 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -713,7 +713,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) return -EINVAL; - if (!hdr.fc.security_enabled || hdr.sec.level == 0) { + if (!hdr.fc.security_enabled || + (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) { skb_push(skb, hlen); return 0; } From 3e4962667efb0f6c09fa3111e6ee53838118b227 Mon Sep 17 00:00:00 2001 From: Diogenes Pereira Date: Tue, 5 Sep 2017 09:18:04 -0300 Subject: [PATCH 0079/2177] mac802154: Fix MAC header and payload encrypted According to 802.15.4-2003/2006/2015 specifications the MAC frame is composed of MHR, MAC payload and MFR and just the outgoing MAC payload must be encrypted. If communication is secure,sender build Auxiliary Security Header(ASH), insert it next to the standard MHR header with security enabled bit ON, and secure frames before transmitting them. According to the information carried within the ASH, recipient retrieves the right cryptographic key and correctly un-secure MAC frames. The error scenario occurs on Linux using IEEE802154_SCF_SECLEVEL_ENC(4) security level when llsec_do_encrypt_unauth() function builds theses MAC frames incorrectly. On recipients these MAC frames are discarded,logging "got invalid frame" messages. Signed-off-by: Diogenes Pereira Signed-off-by: Stefan Schmidt --- net/mac802154/llsec.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index edec2f9919d0..2fb703d70803 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, u8 iv[16]; struct scatterlist src; SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); - int err; + int err, datalen; + unsigned char *data; llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); - sg_init_one(&src, skb->data, skb->len); + /* Compute data payload offset and data length */ + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + sg_init_one(&src, data, datalen); + skcipher_request_set_tfm(req, key->tfm0); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &src, &src, skb->len, iv); + skcipher_request_set_crypt(req, &src, &src, datalen, iv); err = crypto_skcipher_encrypt(req); skcipher_request_zero(req); return err; From d5dd29e4dafef4baad7bf529ad73cafeb13e1aa8 Mon Sep 17 00:00:00 2001 From: Josef Filzmaier Date: Thu, 14 Sep 2017 14:32:10 +0200 Subject: [PATCH 0080/2177] ieee802154: atusb: Driver for Busware HUL dongle Busware manufactured an USB dongle that is quite similar to the atben and rzusb USB dongles. that are already supported. This patch aims to support the Busware HUL dongle (called hulusb) alongside atusb and rzusb. hulusb is using the at86rf212 transceiver which is specifically designed to support the 700/800/900 MHz wave band. The source code is heavily inspired by the existing atusb and at86rf2xx drivers. Signed-off-by: Josef Filzmaier Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 317 ++++++++++++++++++++++++++++----- drivers/net/ieee802154/atusb.h | 8 + 2 files changed, 285 insertions(+), 40 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index ef688518ad77..115fa3f37a86 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -21,6 +21,9 @@ * * USB initialization is * Copyright (c) 2013 Alexander Aring + * + * Busware HUL support is + * Copyright (c) 2017 Josef Filzmaier */ #include @@ -42,9 +45,12 @@ #define ATUSB_ALLOC_DELAY_MS 100 /* delay after failed allocation */ #define ATUSB_TX_TIMEOUT_MS 200 /* on the air timeout */ +struct atusb_chip_data; + struct atusb { struct ieee802154_hw *hw; struct usb_device *usb_dev; + struct atusb_chip_data *data; int shutdown; /* non-zero if shutting down */ int err; /* set by first error */ @@ -65,6 +71,14 @@ struct atusb { unsigned char fw_hw_type; /* Firmware hardware type */ }; +struct atusb_chip_data { + u16 t_channel_switch; + int rssi_base_val; + + int (*set_channel)(struct ieee802154_hw*, u8, u8); + int (*set_txpower)(struct ieee802154_hw*, s32); +}; + /* ----- USB commands without data ----------------------------------------- */ /* To reduce the number of error checks in the code, we record the first error @@ -163,6 +177,18 @@ static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, return ret; } +static int atusb_read_subreg(struct atusb *lp, + unsigned int addr, unsigned int mask, + unsigned int shift) +{ + int rc; + + rc = atusb_read_reg(lp, addr); + rc = (rc & mask) >> shift; + + return rc; +} + static int atusb_get_and_clear_error(struct atusb *atusb) { int err = atusb->err; @@ -379,18 +405,6 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) return ret; } -static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel) -{ - struct atusb *atusb = hw->priv; - int ret; - - ret = atusb_write_subreg(atusb, SR_CHANNEL, channel); - if (ret < 0) - return ret; - msleep(1); /* @@@ ugly synchronization */ - return 0; -} - static int atusb_ed(struct ieee802154_hw *hw, u8 *level) { BUG_ON(!level); @@ -474,6 +488,17 @@ static const s32 atusb_powers[ATUSB_MAX_TX_POWERS + 1] = { -900, -1200, -1700, }; +static int +atusb_txpower(struct ieee802154_hw *hw, s32 mbm) +{ + struct atusb *atusb = hw->priv; + + if (atusb->data) + return atusb->data->set_txpower(hw, mbm); + else + return -ENOTSUPP; +} + static int atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) { @@ -488,12 +513,43 @@ atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) return -EINVAL; } +static int +hulusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) +{ + u32 i; + + for (i = 0; i < hw->phy->supported.tx_powers_size; i++) { + if (hw->phy->supported.tx_powers[i] == mbm) + return atusb_write_subreg(hw->priv, SR_TX_PWR_212, i); + } + + return -EINVAL; +} + #define ATUSB_MAX_ED_LEVELS 0xF static const s32 atusb_ed_levels[ATUSB_MAX_ED_LEVELS + 1] = { -9100, -8900, -8700, -8500, -8300, -8100, -7900, -7700, -7500, -7300, -7100, -6900, -6700, -6500, -6300, -6100, }; +#define AT86RF212_MAX_TX_POWERS 0x1F +static const s32 at86rf212_powers[AT86RF212_MAX_TX_POWERS + 1] = { + 500, 400, 300, 200, 100, 0, -100, -200, -300, -400, -500, -600, -700, + -800, -900, -1000, -1100, -1200, -1300, -1400, -1500, -1600, -1700, + -1800, -1900, -2000, -2100, -2200, -2300, -2400, -2500, -2600, +}; + +#define AT86RF2XX_MAX_ED_LEVELS 0xF +static const s32 at86rf212_ed_levels_100[AT86RF2XX_MAX_ED_LEVELS + 1] = { + -10000, -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, + -8000, -7800, -7600, -7400, -7200, -7000, +}; + +static const s32 at86rf212_ed_levels_98[AT86RF2XX_MAX_ED_LEVELS + 1] = { + -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, -8000, + -7800, -7600, -7400, -7200, -7000, -6800, +}; + static int atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca) { @@ -527,6 +583,30 @@ atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca) return atusb_write_subreg(atusb, SR_CCA_MODE, val); } +static int hulusb_set_cca_ed_level(struct atusb *lp, int rssi_base_val) +{ + unsigned int cca_ed_thres; + + cca_ed_thres = atusb_read_subreg(lp, SR_CCA_ED_THRES); + + switch (rssi_base_val) { + case -98: + lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_98; + lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_98); + lp->hw->phy->cca_ed_level = at86rf212_ed_levels_98[cca_ed_thres]; + break; + case -100: + lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100; + lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100); + lp->hw->phy->cca_ed_level = at86rf212_ed_levels_100[cca_ed_thres]; + break; + default: + WARN_ON(1); + } + + return 0; +} + static int atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) { @@ -541,6 +621,92 @@ atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) return -EINVAL; } +static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + struct atusb *atusb = hw->priv; + int ret = -ENOTSUPP; + + if (atusb->data) { + ret = atusb->data->set_channel(hw, page, channel); + /* @@@ ugly synchronization */ + msleep(atusb->data->t_channel_switch); + } + + return ret; +} + +static int atusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + struct atusb *atusb = hw->priv; + int ret; + + ret = atusb_write_subreg(atusb, SR_CHANNEL, channel); + if (ret < 0) + return ret; + return 0; +} + +static int hulusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + int rc; + int rssi_base_val; + + struct atusb *lp = hw->priv; + + if (channel == 0) + rc = atusb_write_subreg(lp, SR_SUB_MODE, 0); + else + rc = atusb_write_subreg(lp, SR_SUB_MODE, 1); + if (rc < 0) + return rc; + + if (page == 0) { + rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 0); + rssi_base_val = -100; + } else { + rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 1); + rssi_base_val = -98; + } + if (rc < 0) + return rc; + + rc = hulusb_set_cca_ed_level(lp, rssi_base_val); + if (rc < 0) + return rc; + + /* This sets the symbol_duration according frequency on the 212. + * TODO move this handling while set channel and page in cfg802154. + * We can do that, this timings are according 802.15.4 standard. + * If we do that in cfg802154, this is a more generic calculation. + * + * This should also protected from ifs_timer. Means cancel timer and + * init with a new value. For now, this is okay. + */ + if (channel == 0) { + if (page == 0) { + /* SUB:0 and BPSK:0 -> BPSK-20 */ + lp->hw->phy->symbol_duration = 50; + } else { + /* SUB:1 and BPSK:0 -> BPSK-40 */ + lp->hw->phy->symbol_duration = 25; + } + } else { + if (page == 0) + /* SUB:0 and BPSK:1 -> OQPSK-100/200/400 */ + lp->hw->phy->symbol_duration = 40; + else + /* SUB:1 and BPSK:1 -> OQPSK-250/500/1000 */ + lp->hw->phy->symbol_duration = 16; + } + + lp->hw->phy->lifs_period = IEEE802154_LIFS_PERIOD * + lp->hw->phy->symbol_duration; + lp->hw->phy->sifs_period = IEEE802154_SIFS_PERIOD * + lp->hw->phy->symbol_duration; + + return atusb_write_subreg(lp, SR_CHANNEL, channel); +} + static int atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries) { @@ -558,6 +724,14 @@ atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries return atusb_write_subreg(atusb, SR_MAX_CSMA_RETRIES, retries); } +static int +hulusb_set_lbt(struct ieee802154_hw *hw, bool on) +{ + struct atusb *atusb = hw->priv; + + return atusb_write_subreg(atusb, SR_CSMA_LBT_MODE, on); +} + static int atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries) { @@ -593,6 +767,20 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) return 0; } +struct atusb_chip_data atusb_chip_data = { + .t_channel_switch = 1, + .rssi_base_val = -91, + .set_txpower = atusb_set_txpower, + .set_channel = atusb_set_channel, +}; + +struct atusb_chip_data hulusb_chip_data = { + .t_channel_switch = 11, + .rssi_base_val = -100, + .set_txpower = hulusb_set_txpower, + .set_channel = hulusb_set_channel, +}; + static const struct ieee802154_ops atusb_ops = { .owner = THIS_MODULE, .xmit_async = atusb_xmit, @@ -601,7 +789,8 @@ static const struct ieee802154_ops atusb_ops = { .start = atusb_start, .stop = atusb_stop, .set_hw_addr_filt = atusb_set_hw_addr_filt, - .set_txpower = atusb_set_txpower, + .set_txpower = atusb_txpower, + .set_lbt = hulusb_set_lbt, .set_cca_mode = atusb_set_cca_mode, .set_cca_ed_level = atusb_set_cca_ed_level, .set_csma_params = atusb_set_csma_params, @@ -614,6 +803,7 @@ static const struct ieee802154_ops atusb_ops = { static int atusb_get_and_show_revision(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; + char *hw_name; unsigned char *buffer; int ret; @@ -630,9 +820,31 @@ static int atusb_get_and_show_revision(struct atusb *atusb) atusb->fw_ver_min = buffer[1]; atusb->fw_hw_type = buffer[2]; + switch (atusb->fw_hw_type) { + case ATUSB_HW_TYPE_100813: + case ATUSB_HW_TYPE_101216: + case ATUSB_HW_TYPE_110131: + hw_name = "ATUSB"; + atusb->data = &atusb_chip_data; + break; + case ATUSB_HW_TYPE_RZUSB: + hw_name = "RZUSB"; + atusb->data = &atusb_chip_data; + break; + case ATUSB_HW_TYPE_HULUSB: + hw_name = "HULUSB"; + atusb->data = &hulusb_chip_data; + break; + default: + hw_name = "UNKNOWN"; + atusb->err = -ENOTSUPP; + ret = -ENOTSUPP; + break; + } + dev_info(&usb_dev->dev, - "Firmware: major: %u, minor: %u, hardware type: %u\n", - atusb->fw_ver_maj, atusb->fw_ver_min, atusb->fw_hw_type); + "Firmware: major: %u, minor: %u, hardware type: %s (%d)\n", + atusb->fw_ver_maj, atusb->fw_ver_min, hw_name, atusb->fw_hw_type); } if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 2) { dev_info(&usb_dev->dev, @@ -667,11 +879,12 @@ static int atusb_get_and_show_build(struct atusb *atusb) return ret; } -static int atusb_get_and_show_chip(struct atusb *atusb) +static int atusb_get_and_conf_chip(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; uint8_t man_id_0, man_id_1, part_num, version_num; const char *chip; + struct ieee802154_hw *hw = atusb->hw; man_id_0 = atusb_read_reg(atusb, RG_MAN_ID_0); man_id_1 = atusb_read_reg(atusb, RG_MAN_ID_1); @@ -681,6 +894,22 @@ static int atusb_get_and_show_chip(struct atusb *atusb) if (atusb->err) return atusb->err; + hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | + IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; + + hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | + WPAN_PHY_FLAG_CCA_MODE; + + hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | + BIT(NL802154_CCA_CARRIER) | + BIT(NL802154_CCA_ENERGY_CARRIER); + hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | + BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); + + hw->phy->cca.mode = NL802154_CCA_ENERGY; + + hw->phy->current_page = 0; + if ((man_id_1 << 8 | man_id_0) != ATUSB_JEDEC_ATMEL) { dev_err(&usb_dev->dev, "non-Atmel transceiver xxxx%02x%02x\n", @@ -691,9 +920,36 @@ static int atusb_get_and_show_chip(struct atusb *atusb) switch (part_num) { case 2: chip = "AT86RF230"; + atusb->hw->phy->supported.channels[0] = 0x7FFF800; + atusb->hw->phy->current_channel = 11; /* reset default */ + atusb->hw->phy->symbol_duration = 16; + atusb->hw->phy->supported.tx_powers = atusb_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); + hw->phy->supported.cca_ed_levels = atusb_ed_levels; + hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); break; case 3: chip = "AT86RF231"; + atusb->hw->phy->supported.channels[0] = 0x7FFF800; + atusb->hw->phy->current_channel = 11; /* reset default */ + atusb->hw->phy->symbol_duration = 16; + atusb->hw->phy->supported.tx_powers = atusb_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); + hw->phy->supported.cca_ed_levels = atusb_ed_levels; + hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); + break; + case 7: + chip = "AT86RF212"; + atusb->hw->flags |= IEEE802154_HW_LBT; + atusb->hw->phy->supported.channels[0] = 0x00007FF; + atusb->hw->phy->supported.channels[2] = 0x00007FF; + atusb->hw->phy->current_channel = 5; + atusb->hw->phy->symbol_duration = 25; + atusb->hw->phy->supported.lbt = NL802154_SUPPORTED_BOOL_BOTH; + atusb->hw->phy->supported.tx_powers = at86rf212_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(at86rf212_powers); + atusb->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100; + atusb->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100); break; default: dev_err(&usb_dev->dev, @@ -702,6 +958,9 @@ static int atusb_get_and_show_chip(struct atusb *atusb) goto fail; } + hw->phy->transmit_power = hw->phy->supported.tx_powers[0]; + hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7]; + dev_info(&usb_dev->dev, "ATUSB: %s version %d\n", chip, version_num); return 0; @@ -794,32 +1053,9 @@ static int atusb_probe(struct usb_interface *interface, goto fail; hw->parent = &usb_dev->dev; - hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | - IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; - - hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | - WPAN_PHY_FLAG_CCA_MODE; - - hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | - BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER); - hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | - BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); - - hw->phy->supported.cca_ed_levels = atusb_ed_levels; - hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); - - hw->phy->cca.mode = NL802154_CCA_ENERGY; - - hw->phy->current_page = 0; - hw->phy->current_channel = 11; /* reset default */ - hw->phy->supported.channels[0] = 0x7FFF800; - hw->phy->supported.tx_powers = atusb_powers; - hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); - hw->phy->transmit_power = hw->phy->supported.tx_powers[0]; - hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7]; atusb_command(atusb, ATUSB_RF_RESET, 0); - atusb_get_and_show_chip(atusb); + atusb_get_and_conf_chip(atusb); atusb_get_and_show_revision(atusb); atusb_get_and_show_build(atusb); atusb_set_extended_addr(atusb); @@ -941,5 +1177,6 @@ MODULE_AUTHOR("Alexander Aring "); MODULE_AUTHOR("Richard Sharpe "); MODULE_AUTHOR("Stefan Schmidt "); MODULE_AUTHOR("Werner Almesberger "); +MODULE_AUTHOR("Josef Filzmaier "); MODULE_DESCRIPTION("ATUSB IEEE 802.15.4 Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ieee802154/atusb.h b/drivers/net/ieee802154/atusb.h index b22bbaa77590..555d14bf14a3 100644 --- a/drivers/net/ieee802154/atusb.h +++ b/drivers/net/ieee802154/atusb.h @@ -50,6 +50,14 @@ enum atusb_requests { ATUSB_EUI64_READ, }; +enum { + ATUSB_HW_TYPE_100813, /* 2010-08-13 */ + ATUSB_HW_TYPE_101216, /* 2010-12-16 */ + ATUSB_HW_TYPE_110131, /* 2011-01-31, ATmega32U2-based */ + ATUSB_HW_TYPE_RZUSB, /* Atmel Raven USB dongle with at86rf230 */ + ATUSB_HW_TYPE_HULUSB, /* Busware HUL USB dongle with at86rf212 */ +}; + /* * Direction bRequest wValue wIndex wLength * From 421eedff1180ffa8f1780932d0f2561d67a6b44f Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:21 +0530 Subject: [PATCH 0081/2177] rsi: add p2p support parameters to mac80211 This patch adds p2p supported parameters to mac80211 hw and wiphy structures during mac80211 registration. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index fa12c05d9e23..992ac6cf9a34 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -139,6 +139,32 @@ static const u32 rsi_max_ap_stas[16] = { 4, /* 14 - AP + BT Dual */ }; +static const struct ieee80211_iface_limit rsi_iface_limits[] = { + { + .max = 1, + .types = BIT(NL80211_IFTYPE_STATION), + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO), + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_P2P_DEVICE), + }, +}; + +static const struct ieee80211_iface_combination rsi_iface_combinations[] = { + { + .num_different_channels = 1, + .max_interfaces = 3, + .limits = rsi_iface_limits, + .n_limits = ARRAY_SIZE(rsi_iface_limits), + }, +}; + /** * rsi_is_cipher_wep() - This function determines if the cipher is WEP or not. * @common: Pointer to the driver private structure. @@ -1581,7 +1607,11 @@ int rsi_mac80211_attach(struct rsi_common *common) ether_addr_copy(hw->wiphy->addr_mask, addr_mask); wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | - BIT(NL80211_IFTYPE_AP); + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO); + wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; wiphy->retry_short = RETRY_SHORT; wiphy->retry_long = RETRY_LONG; @@ -1608,6 +1638,11 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + /* Wi-Fi direct parameters */ + hw->max_listen_interval = 10; + wiphy->iface_combinations = rsi_iface_combinations; + wiphy->n_iface_combinations = ARRAY_SIZE(rsi_iface_combinations); + status = ieee80211_register_hw(hw); if (status) return status; From b8bd3a439f3593a5d40e45ce14a17a086a0f6fe2 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:22 +0530 Subject: [PATCH 0082/2177] rsi: add/remove interface enhancements for p2p STA_OPMODE and AP_OPMODE macros are renamed to RSI_OPMODE_STA and RSI_OPMODE_AP. New opmodes are added to this list for P2P support. Mapping of mac80211 interface types to rsi interface types are added. Add and remove interface callbacks are handled for P2P mode. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 117 ++++++++++++-------- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 12 +- drivers/net/wireless/rsi/rsi_main.h | 6 +- drivers/net/wireless/rsi/rsi_mgmt.h | 9 +- 4 files changed, 89 insertions(+), 55 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 992ac6cf9a34..db69d87bd12e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -355,6 +355,24 @@ static void rsi_mac80211_stop(struct ieee80211_hw *hw) mutex_unlock(&common->mutex); } +static int rsi_map_intf_mode(enum nl80211_iftype vif_type) +{ + switch (vif_type) { + case NL80211_IFTYPE_STATION: + return RSI_OPMODE_STA; + case NL80211_IFTYPE_AP: + return RSI_OPMODE_AP; + case NL80211_IFTYPE_P2P_DEVICE: + return RSI_OPMODE_P2P_CLIENT; + case NL80211_IFTYPE_P2P_CLIENT: + return RSI_OPMODE_P2P_CLIENT; + case NL80211_IFTYPE_P2P_GO: + return RSI_OPMODE_P2P_GO; + default: + return RSI_OPMODE_UNSUPPORTED; + } +} + /** * rsi_mac80211_add_interface() - This function is called when a netdevice * attached to the hardware is enabled. @@ -368,54 +386,62 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw, { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; + struct vif_priv *vif_info = (struct vif_priv *)vif->drv_priv; enum opmode intf_mode; - int ret = -EOPNOTSUPP; + enum vap_status vap_status; + int vap_idx = -1, i; vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD; mutex_lock(&common->mutex); - if (adapter->sc_nvifs > 1) { - mutex_unlock(&common->mutex); - return -EOPNOTSUPP; - } - - switch (vif->type) { - case NL80211_IFTYPE_STATION: - rsi_dbg(INFO_ZONE, "Station Mode"); - intf_mode = STA_OPMODE; - break; - case NL80211_IFTYPE_AP: - rsi_dbg(INFO_ZONE, "AP Mode"); - intf_mode = AP_OPMODE; - break; - default: + intf_mode = rsi_map_intf_mode(vif->type); + if (intf_mode == RSI_OPMODE_UNSUPPORTED) { rsi_dbg(ERR_ZONE, "%s: Interface type %d not supported\n", __func__, vif->type); - goto out; + mutex_unlock(&common->mutex); + return -EOPNOTSUPP; } + if ((vif->type == NL80211_IFTYPE_P2P_DEVICE) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT) || + (vif->type == NL80211_IFTYPE_P2P_GO)) + common->p2p_enabled = true; - adapter->vifs[adapter->sc_nvifs++] = vif; - ret = rsi_set_vap_capabilities(common, intf_mode, common->mac_addr, - 0, VAP_ADD); - if (ret) { + /* Get free vap index */ + for (i = 0; i < RSI_MAX_VIFS; i++) { + if (!adapter->vifs[i]) { + vap_idx = i; + break; + } + } + if (vap_idx < 0) { + rsi_dbg(ERR_ZONE, "Reject: Max VAPs reached\n"); + mutex_unlock(&common->mutex); + return -EOPNOTSUPP; + } + vif_info->vap_id = vap_idx; + adapter->vifs[vap_idx] = vif; + adapter->sc_nvifs++; + vap_status = VAP_ADD; + + if (rsi_set_vap_capabilities(common, intf_mode, vif->addr, + vif_info->vap_id, vap_status)) { rsi_dbg(ERR_ZONE, "Failed to set VAP capabilities\n"); - goto out; + mutex_unlock(&common->mutex); + return -EINVAL; } - if (vif->type == NL80211_IFTYPE_AP) { - int i; - + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { rsi_send_rx_filter_frame(common, DISALLOW_BEACONS); common->min_rate = RSI_RATE_AUTO; for (i = 0; i < common->max_stations; i++) common->stations[i].sta = NULL; } -out: mutex_unlock(&common->mutex); - return ret; + return 0; } /** @@ -432,6 +458,7 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw, struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; enum opmode opmode; + int i; rsi_dbg(INFO_ZONE, "Remove Interface Called\n"); @@ -442,23 +469,22 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw, return; } - switch (vif->type) { - case NL80211_IFTYPE_STATION: - opmode = STA_OPMODE; - break; - case NL80211_IFTYPE_AP: - opmode = AP_OPMODE; - break; - default: + opmode = rsi_map_intf_mode(vif->type); + if (opmode == RSI_OPMODE_UNSUPPORTED) { + rsi_dbg(ERR_ZONE, "Opmode error : %d\n", opmode); mutex_unlock(&common->mutex); return; } - rsi_set_vap_capabilities(common, opmode, vif->addr, - 0, VAP_DELETE); - adapter->sc_nvifs--; - - if (!memcmp(adapter->vifs[0], vif, sizeof(struct ieee80211_vif))) - adapter->vifs[0] = NULL; + for (i = 0; i < RSI_MAX_VIFS; i++) { + if (!adapter->vifs[i]) + continue; + if (vif == adapter->vifs[i]) { + rsi_set_vap_capabilities(common, opmode, vif->addr, + i, VAP_DELETE); + adapter->sc_nvifs--; + adapter->vifs[i] = NULL; + } + } mutex_unlock(&common->mutex); } @@ -652,7 +678,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, rsi_send_rx_filter_frame(common, rx_filter_word); } rsi_inform_bss_status(common, - STA_OPMODE, + RSI_OPMODE_STA, bss_conf->assoc, bss_conf->bssid, bss_conf->qos, @@ -1285,8 +1311,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, /* Send peer notify to device */ rsi_dbg(INFO_ZONE, "Indicate bss status to device\n"); - rsi_inform_bss_status(common, AP_OPMODE, 1, sta->addr, - sta->wme, sta->aid, sta, sta_idx); + rsi_inform_bss_status(common, RSI_OPMODE_AP, 1, + sta->addr, sta->wme, sta->aid, + sta, sta_idx); if (common->key) { struct ieee80211_key_conf *key = common->key; @@ -1358,7 +1385,7 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, if (!rsta->sta) continue; if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) { - rsi_inform_bss_status(common, AP_OPMODE, 0, + rsi_inform_bss_status(common, RSI_OPMODE_AP, 0, sta->addr, sta->wme, sta->aid, sta, sta_idx); rsta->sta = NULL; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index f7b550f900c4..082329d1b42b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -482,9 +482,9 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common, memset(skb->data, 0, frame_len); peer_notify = (struct rsi_peer_notify *)skb->data; - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) peer_notify->command = cpu_to_le16(PEER_TYPE_AP << 1); - else if (opmode == AP_OPMODE) + else if (opmode == RSI_OPMODE_AP) peer_notify->command = cpu_to_le16(PEER_TYPE_STA << 1); switch (notify_event) { @@ -1321,7 +1321,7 @@ void rsi_inform_bss_status(struct rsi_common *common, u16 sta_id) { if (status) { - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) common->hw_data_qs_blocked = true; rsi_hal_send_sta_notify_frame(common, opmode, @@ -1331,12 +1331,12 @@ void rsi_inform_bss_status(struct rsi_common *common, aid, sta_id); if (common->min_rate == 0xffff) rsi_send_auto_rate_request(common, sta, sta_id); - if (opmode == STA_OPMODE) { + if (opmode == RSI_OPMODE_STA) { if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; } } else { - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) common->hw_data_qs_blocked = true; rsi_hal_send_sta_notify_frame(common, opmode, @@ -1344,7 +1344,7 @@ void rsi_inform_bss_status(struct rsi_common *common, addr, qos_enable, aid, sta_id); - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) rsi_send_block_unblock_frame(common, true); } } diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index 2c18dde633ea..a4837fab1d50 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -60,7 +60,7 @@ enum RSI_FSM_STATES { extern u32 rsi_zone_enabled; extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); -#define RSI_MAX_VIFS 1 +#define RSI_MAX_VIFS 3 #define NUM_EDCA_QUEUES 4 #define IEEE80211_ADDR_LEN 6 #define FRAME_DESC_SZ 16 @@ -157,6 +157,7 @@ struct vif_priv { bool is_ht; bool sgi; u16 seq_start; + int vap_id; }; struct rsi_event { @@ -270,6 +271,9 @@ struct rsi_common { int num_stations; int max_stations; struct ieee80211_key_conf *key; + + /* Wi-Fi direct mode related */ + bool p2p_enabled; }; enum host_intf { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index c6e1fa669a27..8bd7dfacf259 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -200,8 +200,11 @@ #define RSI_DATA_DESC_INSERT_SEQ_NO BIT(2) enum opmode { - AP_OPMODE = 0, - STA_OPMODE, + RSI_OPMODE_UNSUPPORTED = -1, + RSI_OPMODE_AP = 0, + RSI_OPMODE_STA, + RSI_OPMODE_P2P_GO, + RSI_OPMODE_P2P_CLIENT }; enum vap_status { @@ -363,9 +366,9 @@ struct rsi_vap_caps { u8 vif_type; u8 channel_bw; __le16 antenna_info; + __le16 token; u8 radioid_macid; u8 vap_id; - __le16 reserved3; u8 mac_addr[6]; __le16 keep_alive_period; u8 bssid[6]; From df771911914ab9f80dd38a2710e50c5a418200ba Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:23 +0530 Subject: [PATCH 0083/2177] rsi: add support for p2p listen Remain-on-channel and cancel-remain-on-channel are implemented to support p2p listen phase. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 15 ++ drivers/net/wireless/rsi/rsi_91x_mac80211.c | 188 +++++++++++++++++--- drivers/net/wireless/rsi/rsi_91x_main.c | 9 +- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 4 +- drivers/net/wireless/rsi/rsi_common.h | 4 +- drivers/net/wireless/rsi/rsi_main.h | 2 + drivers/net/wireless/rsi/rsi_mgmt.h | 2 +- 7 files changed, 199 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 2b0516d2f63d..d2121965265b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -334,6 +334,21 @@ struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr) return NULL; } +struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac) +{ + struct ieee80211_vif *vif; + int i; + + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (!memcmp(vif->addr, mac, ETH_ALEN)) + return vif; + } + return NULL; +} + /** * rsi_core_xmit() - This function transmits the packets received from mac80211 * @common: Pointer to the driver private structure. diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index db69d87bd12e..6780c1cd8c62 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -503,35 +503,44 @@ static int rsi_channel_change(struct ieee80211_hw *hw) int status = -EOPNOTSUPP; struct ieee80211_channel *curchan = hw->conf.chandef.chan; u16 channel = curchan->hw_value; - struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss; + bool assoc = false; + int i; rsi_dbg(INFO_ZONE, "%s: Set channel: %d MHz type: %d channel_no %d\n", __func__, curchan->center_freq, curchan->flags, channel); - if (bss->assoc) { + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (vif->type == NL80211_IFTYPE_STATION) { + bss = &vif->bss_conf; + if (bss->assoc) { + assoc = true; + break; + } + } + } + if (assoc) { if (!common->hw_data_qs_blocked && - (rsi_get_connected_channel(adapter) != channel)) { + (rsi_get_connected_channel(vif) != channel)) { rsi_dbg(INFO_ZONE, "blk data q %d\n", channel); if (!rsi_send_block_unblock_frame(common, true)) common->hw_data_qs_blocked = true; } } - status = rsi_band_check(common); + status = rsi_band_check(common, curchan); if (!status) status = rsi_set_channel(adapter->priv, curchan); - if (bss->assoc) { + if (assoc) { if (common->hw_data_qs_blocked && - (rsi_get_connected_channel(adapter) == channel)) { - rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel); - if (!rsi_send_block_unblock_frame(common, false)) - common->hw_data_qs_blocked = false; - } - } else { - if (common->hw_data_qs_blocked) { + (rsi_get_connected_channel(vif) == channel)) { rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel); if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; @@ -632,16 +641,42 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, * * Return: Current connected AP's channel number is returned. */ -u16 rsi_get_connected_channel(struct rsi_hw *adapter) +u16 rsi_get_connected_channel(struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = adapter->vifs[0]; - if (vif) { - struct ieee80211_bss_conf *bss = &vif->bss_conf; - struct ieee80211_channel *channel = bss->chandef.chan; - return channel->hw_value; - } + struct ieee80211_bss_conf *bss; + struct ieee80211_channel *channel; - return 0; + if (!vif) + return 0; + + bss = &vif->bss_conf; + channel = bss->chandef.chan; + + if (!channel) + return 0; + + return channel->hw_value; +} + +static void rsi_switch_channel(struct rsi_hw *adapter, + struct ieee80211_vif *vif) +{ + struct rsi_common *common = adapter->priv; + struct ieee80211_channel *channel; + + if (common->iface_down) + return; + if (!vif) + return; + + channel = vif->bss_conf.chandef.chan; + + if (!channel) + return; + + rsi_band_check(common, channel); + rsi_set_channel(common, channel); + rsi_dbg(INFO_ZONE, "Switched to channel - %d\n", channel->hw_value); } /** @@ -1561,6 +1596,114 @@ static void rsi_mac80211_rfkill_poll(struct ieee80211_hw *hw) mutex_unlock(&common->mutex); } +static void rsi_resume_conn_channel(struct rsi_common *common) +{ + struct rsi_hw *adapter = common->priv; + struct ieee80211_vif *vif; + int cnt; + + for (cnt = 0; cnt < RSI_MAX_VIFS; cnt++) { + vif = adapter->vifs[cnt]; + if (!vif) + continue; + + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { + rsi_switch_channel(adapter, vif); + break; + } + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && + vif->bss_conf.assoc) { + rsi_switch_channel(adapter, vif); + break; + } + } +} + +void rsi_roc_timeout(unsigned long data) +{ + struct rsi_common *common = (struct rsi_common *)data; + + rsi_dbg(INFO_ZONE, "Remain on channel expired\n"); + + mutex_lock(&common->mutex); + ieee80211_remain_on_channel_expired(common->priv->hw); + + if (timer_pending(&common->roc_timer)) + del_timer(&common->roc_timer); + + rsi_resume_conn_channel(common); + mutex_unlock(&common->mutex); +} + +static int rsi_mac80211_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_channel *chan, int duration, + enum ieee80211_roc_type type) +{ + struct rsi_hw *adapter = (struct rsi_hw *)hw->priv; + struct rsi_common *common = (struct rsi_common *)adapter->priv; + int status = 0; + + rsi_dbg(INFO_ZONE, "***** Remain on channel *****\n"); + + mutex_lock(&common->mutex); + rsi_dbg(INFO_ZONE, "%s: channel: %d duration: %dms\n", + __func__, chan->hw_value, duration); + + if (timer_pending(&common->roc_timer)) { + rsi_dbg(INFO_ZONE, "Stop on-going ROC\n"); + del_timer(&common->roc_timer); + } + common->roc_timer.expires = msecs_to_jiffies(duration) + jiffies; + add_timer(&common->roc_timer); + + /* Configure band */ + if (rsi_band_check(common, chan)) { + rsi_dbg(ERR_ZONE, "Failed to set band\n"); + status = -EINVAL; + goto out; + } + + /* Configure channel */ + if (rsi_set_channel(common, chan)) { + rsi_dbg(ERR_ZONE, "Failed to set the channel\n"); + status = -EINVAL; + goto out; + } + + common->roc_vif = vif; + ieee80211_ready_on_channel(hw); + rsi_dbg(INFO_ZONE, "%s: Ready on channel :%d\n", + __func__, chan->hw_value); + +out: + mutex_unlock(&common->mutex); + + return status; +} + +static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw) +{ + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + + rsi_dbg(INFO_ZONE, "Cancel remain on channel\n"); + + mutex_lock(&common->mutex); + if (!timer_pending(&common->roc_timer)) { + mutex_unlock(&common->mutex); + return 0; + } + + del_timer(&common->roc_timer); + + rsi_resume_conn_channel(common); + mutex_unlock(&common->mutex); + + return 0; +} + static const struct ieee80211_ops mac80211_ops = { .tx = rsi_mac80211_tx, .start = rsi_mac80211_start, @@ -1580,6 +1723,8 @@ static const struct ieee80211_ops mac80211_ops = { .set_antenna = rsi_mac80211_set_antenna, .get_antenna = rsi_mac80211_get_antenna, .rfkill_poll = rsi_mac80211_rfkill_poll, + .remain_on_channel = rsi_mac80211_roc, + .cancel_remain_on_channel = rsi_mac80211_cancel_roc, }; /** @@ -1666,6 +1811,9 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); /* Wi-Fi direct parameters */ + wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; + wiphy->flags |= WIPHY_FLAG_OFFCHAN_TX; + wiphy->max_remain_on_channel_duration = 10000; hw->max_listen_interval = 10; wiphy->iface_combinations = rsi_iface_combinations; wiphy->n_iface_combinations = ARRAY_SIZE(rsi_iface_combinations); diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 3e1e80888d98..b57bfdcf3549 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -74,6 +74,8 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common, struct skb_info *rx_params; struct sk_buff *skb = NULL; u8 payload_offset; + struct ieee80211_vif *vif; + struct ieee80211_hdr *wh; if (WARN(!pkt_len, "%s: Dummy pkt received", __func__)) return NULL; @@ -92,11 +94,13 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common, payload_offset = (extended_desc + FRAME_DESC_SZ); skb_put(skb, pkt_len); memcpy((skb->data), (buffer + payload_offset), skb->len); + wh = (struct ieee80211_hdr *)skb->data; + vif = rsi_get_vif(common->priv, wh->addr1); info = IEEE80211_SKB_CB(skb); rx_params = (struct skb_info *)info->driver_data; rx_params->rssi = rsi_get_rssi(buffer); - rx_params->channel = rsi_get_connected_channel(common->priv); + rx_params->channel = rsi_get_connected_channel(vif); return skb; } @@ -233,6 +237,9 @@ struct rsi_hw *rsi_91x_init(void) rsi_default_ps_params(adapter); spin_lock_init(&adapter->ps_lock); + common->roc_timer.data = (unsigned long)common; + common->roc_timer.function = (void *)&rsi_roc_timeout; + init_timer(&common->roc_timer); common->init_done = true; return adapter; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 082329d1b42b..aaf5f32aca54 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -926,13 +926,13 @@ static int rsi_send_reset_mac(struct rsi_common *common) * * Return: 0 on success, corresponding error code on failure. */ -int rsi_band_check(struct rsi_common *common) +int rsi_band_check(struct rsi_common *common, + struct ieee80211_channel *curchan) { struct rsi_hw *adapter = common->priv; struct ieee80211_hw *hw = adapter->hw; u8 prev_bw = common->channel_width; u8 prev_ep = common->endpoint; - struct ieee80211_channel *curchan = hw->conf.chandef.chan; int status = 0; if (common->band != curchan->band) { diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index e579d694d13c..272e18d750ff 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -79,9 +79,11 @@ static inline int rsi_kill_thread(struct rsi_thread *handle) } void rsi_mac80211_detach(struct rsi_hw *hw); -u16 rsi_get_connected_channel(struct rsi_hw *adapter); +u16 rsi_get_connected_channel(struct ieee80211_vif *vif); struct rsi_hw *rsi_91x_init(void); void rsi_91x_deinit(struct rsi_hw *adapter); int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len); struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr); +struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac); +void rsi_roc_timeout(unsigned long data); #endif diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index a4837fab1d50..fd07b377d8c0 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -274,6 +274,8 @@ struct rsi_common { /* Wi-Fi direct mode related */ bool p2p_enabled; + struct timer_list roc_timer; + struct ieee80211_vif *roc_vif; }; enum host_intf { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 8bd7dfacf259..08e3b43b6a10 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -633,7 +633,7 @@ void rsi_core_qos_processor(struct rsi_common *common); void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb); int rsi_send_mgmt_pkt(struct rsi_common *common, struct sk_buff *skb); int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb); -int rsi_band_check(struct rsi_common *common); +int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); From 4671c209ac461c8826c1241ba423e75f84ae486b Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:24 +0530 Subject: [PATCH 0084/2177] rsi: handle peer connection and disconnection in p2p mode Parameter 'vif' is passed to inform_bss_status function to check the type of vif and to get vap_id. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 9 ++++- drivers/net/wireless/rsi/rsi_91x_hal.c | 45 ++++++++++++--------- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 22 ++++++---- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 15 ++++--- drivers/net/wireless/rsi/rsi_hal.h | 3 +- drivers/net/wireless/rsi/rsi_main.h | 2 + drivers/net/wireless/rsi/rsi_mgmt.h | 5 ++- 7 files changed, 63 insertions(+), 38 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index d2121965265b..432d6ebd14a3 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -361,8 +361,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) struct rsi_hw *adapter = common->priv; struct ieee80211_tx_info *info; struct skb_info *tx_params; - struct ieee80211_hdr *wh; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_hdr *wh = NULL; + struct ieee80211_vif *vif; u8 q_num, tid = 0; struct rsi_sta *rsta = NULL; @@ -381,6 +381,11 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) wh = (struct ieee80211_hdr *)&skb->data[0]; tx_params->sta_id = 0; + vif = rsi_get_vif(adapter, wh->addr2); + if (!vif) + goto xmit_fail; + tx_params->vif = vif; + tx_params->vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id; if ((ieee80211_is_mgmt(wh->frame_control)) || (ieee80211_is_ctl(wh->frame_control)) || (ieee80211_is_qos_nullfunc(wh->frame_control))) { diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 070dfd68bb83..d0b119e3c6df 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -42,7 +42,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) struct ieee80211_hdr *wh = NULL; struct ieee80211_tx_info *info; struct ieee80211_conf *conf = &adapter->hw->conf; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_vif *vif; struct rsi_mgmt_desc *mgmt_desc; struct skb_info *tx_params; struct ieee80211_bss_conf *bss = NULL; @@ -57,6 +57,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; + vif = tx_params->vif; /* Update header size */ header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc); @@ -78,7 +79,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) tx_params->internal_hdr_size = header_size; memset(&skb->data[0], 0, header_size); - bss = &info->control.vif->bss_conf; + bss = &vif->bss_conf; wh = (struct ieee80211_hdr *)&skb->data[header_size]; mgmt_desc = (struct rsi_mgmt_desc *)skb->data; @@ -95,10 +96,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) mgmt_desc->seq_ctrl = cpu_to_le16(IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl))); - if (common->band == NL80211_BAND_2GHZ) - mgmt_desc->rate_info = RSI_RATE_1; + if ((common->band == NL80211_BAND_2GHZ) && !common->p2p_enabled) + mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_1); else - mgmt_desc->rate_info = RSI_RATE_6; + mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_6); if (conf_is_ht40(conf)) mgmt_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE); @@ -121,7 +122,8 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) xtend_desc->retry_cnt = PROBE_RESP_RETRY_CNT; } - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (ieee80211_is_action(wh->frame_control))) { struct rsi_sta *rsta = rsi_find_sta(common, wh->addr1); @@ -130,6 +132,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) else return -EINVAL; } + mgmt_desc->rate_info |= + cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) & + RSI_DESC_VAP_ID_MASK); + return 0; } @@ -306,21 +312,11 @@ int rsi_send_mgmt_pkt(struct rsi_common *common, struct ieee80211_tx_info *info; struct skb_info *tx_params; int status = -E2BIG; - u8 extnd_size; info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; - extnd_size = ((uintptr_t)skb->data & 0x3); if (tx_params->flags & INTERNAL_MGMT_PKT) { - skb->data[1] |= BIT(7); /* Immediate Wakeup bit*/ - if ((extnd_size) > skb_headroom(skb)) { - rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__); - dev_kfree_skb(skb); - return -ENOSPC; - } - skb_push(skb, extnd_size); - skb->data[extnd_size + 4] = extnd_size; status = adapter->host_intf_ops->write_pkt(common->priv, (u8 *)skb->data, skb->len); @@ -352,12 +348,23 @@ int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb) struct rsi_data_desc *bcn_frm; struct ieee80211_hw *hw = common->priv->hw; struct ieee80211_conf *conf = &hw->conf; + struct ieee80211_vif *vif; struct sk_buff *mac_bcn; - u8 vap_id = 0; - u16 tim_offset; + u8 vap_id = 0, i; + u16 tim_offset = 0; + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) + break; + } + if (!vif) + return -EINVAL; mac_bcn = ieee80211_beacon_get_tim(adapter->hw, - adapter->vifs[adapter->sc_nvifs - 1], + vif, &tim_offset, NULL); if (!mac_bcn) { rsi_dbg(ERR_ZONE, "Failed to get beacon from mac80211\n"); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 6780c1cd8c62..ce2f911eace1 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -718,7 +718,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, bss_conf->bssid, bss_conf->qos, bss_conf->aid, - NULL, 0); + NULL, 0, vif); adapter->ps_info.dtim_interval_duration = bss->dtim_period; adapter->ps_info.listen_interval = conf->listen_interval; @@ -862,7 +862,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, rsi_dbg(ERR_ZONE, "%s: Cipher 0x%x key_type: %d key_len: %d\n", __func__, key->cipher, key_type, key->keylen); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { if (sta) { rsta = rsi_find_sta(adapter->priv, sta->addr); if (rsta) @@ -1297,7 +1298,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, mutex_lock(&common->mutex); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { u8 cnt; int sta_idx = -1; int free_index = -1; @@ -1348,7 +1350,7 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, rsi_dbg(INFO_ZONE, "Indicate bss status to device\n"); rsi_inform_bss_status(common, RSI_OPMODE_AP, 1, sta->addr, sta->wme, sta->aid, - sta, sta_idx); + sta, sta_idx, vif); if (common->key) { struct ieee80211_key_conf *key = common->key; @@ -1368,7 +1370,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, } } - if (vif->type == NL80211_IFTYPE_STATION) { + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { rsi_set_min_rate(hw, sta, common); if (sta->ht_cap.ht_supported) { common->vif_info[0].is_ht = true; @@ -1409,7 +1412,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, mutex_lock(&common->mutex); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { u8 sta_idx, cnt; /* Send peer notify to device */ @@ -1422,7 +1426,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) { rsi_inform_bss_status(common, RSI_OPMODE_AP, 0, sta->addr, sta->wme, - sta->aid, sta, sta_idx); + sta->aid, sta, sta_idx, + vif); rsta->sta = NULL; rsta->sta_id = -1; for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++) @@ -1436,7 +1441,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, rsi_dbg(ERR_ZONE, "%s: No station found\n", __func__); } - if (vif->type == NL80211_IFTYPE_STATION) { + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { /* Resetting all the fields to default values */ memcpy((u8 *)bss->bssid, (u8 *)sta->addr, ETH_ALEN); bss->qos = sta->wme; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index aaf5f32aca54..428369bf02f0 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -460,12 +460,12 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common, const unsigned char *bssid, u8 qos_enable, u16 aid, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb = NULL; struct rsi_peer_notify *peer_notify; - u16 vap_id = 0; + u16 vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id; int status; u16 frame_len = sizeof(struct rsi_peer_notify); @@ -1318,7 +1318,8 @@ void rsi_inform_bss_status(struct rsi_common *common, u8 qos_enable, u16 aid, struct ieee80211_sta *sta, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { if (status) { if (opmode == RSI_OPMODE_STA) @@ -1328,7 +1329,8 @@ void rsi_inform_bss_status(struct rsi_common *common, STA_CONNECTED, addr, qos_enable, - aid, sta_id); + aid, sta_id, + vif); if (common->min_rate == 0xffff) rsi_send_auto_rate_request(common, sta, sta_id); if (opmode == RSI_OPMODE_STA) { @@ -1343,7 +1345,8 @@ void rsi_inform_bss_status(struct rsi_common *common, STA_DISCONNECTED, addr, qos_enable, - aid, sta_id); + aid, sta_id, + vif); if (opmode == RSI_OPMODE_STA) rsi_send_block_unblock_frame(common, true); } diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index 7c145053da6d..ad0d6537a678 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -121,8 +121,7 @@ struct rsi_mgmt_desc { u8 xtend_desc_size; u8 header_len; __le16 frame_info; - u8 rate_info; - u8 reserved1; + __le16 rate_info; __le16 bbp_info; __le16 seq_ctrl; u8 reserved2; diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index fd07b377d8c0..34089ab111aa 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -124,6 +124,8 @@ struct skb_info { s8 tid; s8 sta_id; u8 internal_hdr_size; + struct ieee80211_vif *vif; + u8 vap_id; }; enum edca_queue { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 08e3b43b6a10..a82552cfb9d7 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -189,6 +189,8 @@ IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) +#define RSI_DESC_VAP_ID_MASK 0xC000 +#define RSI_DESC_VAP_ID_OFST 14 #define RSI_DATA_DESC_MAC_BBP_INFO BIT(0) #define RSI_DATA_DESC_NO_ACK_IND BIT(9) #define RSI_DATA_DESC_QOS_EN BIT(12) @@ -623,7 +625,8 @@ int rsi_send_vap_dynamic_update(struct rsi_common *common); int rsi_send_block_unblock_frame(struct rsi_common *common, bool event); void rsi_inform_bss_status(struct rsi_common *common, enum opmode opmode, u8 status, const u8 *addr, u8 qos_enable, u16 aid, - struct ieee80211_sta *sta, u16 sta_id); + struct ieee80211_sta *sta, u16 sta_id, + struct ieee80211_vif *vif); void rsi_indicate_pkt_to_os(struct rsi_common *common, struct sk_buff *skb); int rsi_mac80211_attach(struct rsi_common *common); void rsi_indicate_tx_status(struct rsi_hw *common, struct sk_buff *skb, From eac4eed3224b1bc769489ae2e146105cbba3a8ad Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:25 +0530 Subject: [PATCH 0085/2177] rsi: tx and rx path enhancements for p2p mode Data descriptor is updated to include vap_id. TX command frame key config also updated to include vap_id. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 9 +++++-- drivers/net/wireless/rsi/rsi_91x_hal.c | 26 ++++++++++++++------- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 23 ++++++++++++++---- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 4 ++-- drivers/net/wireless/rsi/rsi_mgmt.h | 3 ++- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 432d6ebd14a3..bc18a191aef9 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -95,6 +95,8 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num) s16 txop = common->tx_qinfo[q_num].txop * 32; __le16 r_txop; struct ieee80211_rate rate; + struct ieee80211_hdr *wh; + struct ieee80211_vif *vif; rate.bitrate = RSI_RATE_MCS0 * 5 * 10; /* Convert to Kbps */ if (q_num == VI_Q) @@ -106,8 +108,10 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num) return 0; do { + wh = (struct ieee80211_hdr *)skb->data; + vif = rsi_get_vif(adapter, wh->addr2); r_txop = ieee80211_generic_frame_duration(adapter->hw, - adapter->vifs[0], + vif, common->band, skb->len, &rate); txop -= le16_to_cpu(r_txop); @@ -403,7 +407,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) q_num = skb->priority; tx_params->tid = tid; - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (!is_broadcast_ether_addr(wh->addr1)) && (!is_multicast_ether_addr(wh->addr1))) { rsta = rsi_find_sta(common, wh->addr1); diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index d0b119e3c6df..7e8e5d4f5f3d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -157,7 +157,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) u16 seq_num; info = IEEE80211_SKB_CB(skb); - bss = &info->control.vif->bss_conf; + vif = info->control.vif; + bss = &vif->bss_conf; tx_params = (struct skb_info *)info->driver_data; header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc); @@ -181,7 +182,6 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) xtend_desc = (struct xtended_desc *)&skb->data[FRAME_DESC_SZ]; wh = (struct ieee80211_hdr *)&skb->data[header_size]; seq_num = IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl)); - vif = adapter->vifs[0]; data_desc->xtend_desc_size = header_size - FRAME_DESC_SZ; @@ -190,7 +190,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) data_desc->mac_flags |= cpu_to_le16(RSI_QOS_ENABLE); } - if ((vif->type == NL80211_IFTYPE_STATION) && + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && (adapter->ps_state == PS_ENABLED)) wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE); @@ -246,17 +247,23 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) data_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT); data_desc->sta_id = vap_id; - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { if (common->band == NL80211_BAND_5GHZ) data_desc->rate_info = cpu_to_le16(RSI_RATE_6); else data_desc->rate_info = cpu_to_le16(RSI_RATE_1); } } - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (ieee80211_has_moredata(wh->frame_control))) data_desc->frame_info |= cpu_to_le16(MORE_DATA_PRESENT); + data_desc->rate_info |= + cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) & + RSI_DESC_VAP_ID_MASK); + return 0; } @@ -264,7 +271,7 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb) { struct rsi_hw *adapter = common->priv; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_vif *vif; struct ieee80211_tx_info *info; struct ieee80211_bss_conf *bss; int status = -EINVAL; @@ -277,9 +284,12 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); if (!info->control.vif) goto err; - bss = &info->control.vif->bss_conf; + vif = info->control.vif; + bss = &vif->bss_conf; - if ((vif->type == NL80211_IFTYPE_STATION) && (!bss->assoc)) + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && + (!bss->assoc)) goto err; status = rsi_prepare_data_desc(common, skb); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index ce2f911eace1..b1550bc243dd 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -879,7 +879,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, RSI_PAIRWISE_KEY, key->keyidx, key->cipher, - sta_id); + sta_id, + vif); if (status) return status; } @@ -891,7 +892,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, key_type, key->keyidx, key->cipher, - sta_id); + sta_id, + vif); } /** @@ -1165,7 +1167,9 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, struct rsi_common *common, struct ieee80211_rx_status *rxs) { - struct ieee80211_bss_conf *bss = &common->priv->vifs[0]->bss_conf; + struct rsi_hw *adapter = common->priv; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct skb_info *rx_params = (struct skb_info *)info->driver_data; struct ieee80211_hdr *hdr; @@ -1173,6 +1177,7 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, u8 hdrlen = 0; u8 channel = rx_params->channel; s32 freq; + int i; hdr = ((struct ieee80211_hdr *)(skb->data)); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -1201,6 +1206,15 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, rxs->flag |= RX_FLAG_IV_STRIPPED; } + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (vif->type == NL80211_IFTYPE_STATION) + bss = &vif->bss_conf; + } + if (!bss) + return; /* CQM only for connected AP beacons, the RSSI is a weighted avg */ if (bss->assoc && !(memcmp(bss->bssid, hdr->addr2, ETH_ALEN))) { if (ieee80211_is_beacon(hdr->frame_control)) @@ -1363,7 +1377,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, RSI_PAIRWISE_KEY, key->keyidx, key->cipher, - sta_idx); + sta_idx, + vif); } common->num_stations++; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 428369bf02f0..4e50cfdc2f47 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -716,9 +716,9 @@ int rsi_hal_load_key(struct rsi_common *common, u8 key_type, u8 key_id, u32 cipher, - s16 sta_id) + s16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb = NULL; struct rsi_set_key *set_key; u16 key_descriptor = 0; diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index a82552cfb9d7..331d64b05649 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -618,7 +618,8 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common, u16 tid, u16 ssn, u8 buf_size, u8 event, u8 sta_id); int rsi_hal_load_key(struct rsi_common *common, u8 *data, u16 key_len, - u8 key_type, u8 key_id, u32 cipher, s16 sta_id); + u8 key_type, u8 key_id, u32 cipher, s16 sta_id, + struct ieee80211_vif *vif); int rsi_set_channel(struct rsi_common *common, struct ieee80211_channel *channel); int rsi_send_vap_dynamic_update(struct rsi_common *common); From efe877aa0f40dc1f2be450aba5baf8d90e7d9707 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:26 +0530 Subject: [PATCH 0086/2177] rsi: disallow power save config when AP vap running When AP or P2P GO VAP is running, power save configuration should be disallowed. To check interface type in power save configuration 'vif' parameters is passed. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 33 ++++++++++++++------- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 5 ++-- drivers/net/wireless/rsi/rsi_91x_ps.c | 15 +++++----- drivers/net/wireless/rsi/rsi_mgmt.h | 2 ++ drivers/net/wireless/rsi/rsi_ps.h | 7 ++--- 5 files changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index b1550bc243dd..09ad909e27ed 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -592,7 +592,6 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; - struct ieee80211_vif *vif = adapter->vifs[0]; struct ieee80211_conf *conf = &hw->conf; int status = -EOPNOTSUPP; @@ -608,16 +607,30 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, } /* Power save parameters */ - if ((changed & IEEE80211_CONF_CHANGE_PS) && - (vif->type == NL80211_IFTYPE_STATION)) { + if (changed & IEEE80211_CONF_CHANGE_PS) { + struct ieee80211_vif *vif; unsigned long flags; + int i, set_ps = 1; - spin_lock_irqsave(&adapter->ps_lock, flags); - if (conf->flags & IEEE80211_CONF_PS) - rsi_enable_ps(adapter); - else - rsi_disable_ps(adapter); - spin_unlock_irqrestore(&adapter->ps_lock, flags); + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + /* Don't go to power save if AP vap exists */ + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { + set_ps = 0; + break; + } + } + if (set_ps) { + spin_lock_irqsave(&adapter->ps_lock, flags); + if (conf->flags & IEEE80211_CONF_PS) + rsi_enable_ps(adapter, vif); + else + rsi_disable_ps(adapter, vif); + spin_unlock_irqrestore(&adapter->ps_lock, flags); + } } /* RTS threshold */ @@ -726,7 +739,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, if (bss->assoc) { if (common->uapsd_bitmap) { rsi_dbg(INFO_ZONE, "Configuring UAPSD\n"); - rsi_conf_uapsd(adapter); + rsi_conf_uapsd(adapter, vif); } } else { common->uapsd_bitmap = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 4e50cfdc2f47..383cd432b237 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1474,10 +1474,11 @@ int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word) return rsi_send_internal_mgmt_frame(common, skb); } -int rsi_send_ps_request(struct rsi_hw *adapter, bool enable) +int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, + struct ieee80211_vif *vif) { struct rsi_common *common = adapter->priv; - struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf; + struct ieee80211_bss_conf *bss = &vif->bss_conf; struct rsi_request_ps *ps; struct rsi_ps_info *ps_info; struct sk_buff *skb; diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c index 48c79f035c59..523f5329d2b7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_ps.c +++ b/drivers/net/wireless/rsi/rsi_91x_ps.c @@ -67,7 +67,7 @@ void rsi_default_ps_params(struct rsi_hw *adapter) ps_info->deep_sleep_wakeup_period = RSI_DEF_DS_WAKEUP_PERIOD; } -void rsi_enable_ps(struct rsi_hw *adapter) +void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif) { if (adapter->ps_state != PS_NONE) { rsi_dbg(ERR_ZONE, @@ -76,7 +76,7 @@ void rsi_enable_ps(struct rsi_hw *adapter) return; } - if (rsi_send_ps_request(adapter, true)) { + if (rsi_send_ps_request(adapter, true, vif)) { rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", __func__); @@ -86,7 +86,8 @@ void rsi_enable_ps(struct rsi_hw *adapter) rsi_modify_ps_state(adapter, PS_ENABLE_REQ_SENT); } -void rsi_disable_ps(struct rsi_hw *adapter) +/* This function is used to disable power save */ +void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif) { if (adapter->ps_state != PS_ENABLED) { rsi_dbg(ERR_ZONE, @@ -95,7 +96,7 @@ void rsi_disable_ps(struct rsi_hw *adapter) return; } - if (rsi_send_ps_request(adapter, false)) { + if (rsi_send_ps_request(adapter, false, vif)) { rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", __func__); @@ -105,16 +106,16 @@ void rsi_disable_ps(struct rsi_hw *adapter) rsi_modify_ps_state(adapter, PS_DISABLE_REQ_SENT); } -void rsi_conf_uapsd(struct rsi_hw *adapter) +void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif) { int ret; if (adapter->ps_state != PS_ENABLED) return; - ret = rsi_send_ps_request(adapter, false); + ret = rsi_send_ps_request(adapter, false, vif); if (!ret) - ret = rsi_send_ps_request(adapter, true); + ret = rsi_send_ps_request(adapter, true, vif); if (ret) rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 331d64b05649..b9d0802c1b0f 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -641,4 +641,6 @@ int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); +int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, + struct ieee80211_vif *vif); #endif diff --git a/drivers/net/wireless/rsi/rsi_ps.h b/drivers/net/wireless/rsi/rsi_ps.h index d8475873df36..98ff6a4ced57 100644 --- a/drivers/net/wireless/rsi/rsi_ps.h +++ b/drivers/net/wireless/rsi/rsi_ps.h @@ -55,10 +55,9 @@ struct rsi_ps_info { } __packed; char *str_psstate(enum ps_state state); -void rsi_enable_ps(struct rsi_hw *adapter); -void rsi_disable_ps(struct rsi_hw *adapter); +void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif); +void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif); int rsi_handle_ps_confirm(struct rsi_hw *adapter, u8 *msg); void rsi_default_ps_params(struct rsi_hw *hw); -int rsi_send_ps_request(struct rsi_hw *adapter, bool enable); -void rsi_conf_uapsd(struct rsi_hw *adapter); +void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif); #endif From c7245c0975f134cb10f113b0626ebd11799c8931 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:27 +0530 Subject: [PATCH 0087/2177] rsi: aggregation changes for p2p mode P2P Go condition is added wherever AP mode is there in aggregation path. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 09ad909e27ed..79426a2defc4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1005,7 +1005,8 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, if (ssn != NULL) seq_no = *ssn; - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { rsta = rsi_find_sta(common, sta->addr); if (!rsta) { rsi_dbg(ERR_ZONE, "No station mapped\n"); @@ -1039,9 +1040,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, break; case IEEE80211_AMPDU_TX_START: - if (vif->type == NL80211_IFTYPE_STATION) + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) common->vif_info[ii].seq_start = seq_no; - else if (vif->type == NL80211_IFTYPE_AP) + else if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) rsta->seq_start[tid] = seq_no; ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); status = 0; @@ -1061,9 +1064,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, break; case IEEE80211_AMPDU_TX_OPERATIONAL: - if (vif->type == NL80211_IFTYPE_STATION) + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) seq_start = common->vif_info[ii].seq_start; - else if (vif->type == NL80211_IFTYPE_AP) + else if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) seq_start = rsta->seq_start[tid]; status = rsi_send_aggregation_params_frame(common, tid, From af75687286bfea63c428591f08e9df6a7e7a9ff2 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:28 +0530 Subject: [PATCH 0088/2177] rsi: miscellaneous changes for p2p mode Add P2P_GO condition as well when handling BEACON_ENABLE from mac80211. Also passing 'vif' for auto rate request. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 15 +++++++++------ drivers/net/wireless/rsi/rsi_91x_mgmt.c | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 79426a2defc4..b1f5dbbde3cb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -756,7 +756,8 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, } if ((changed & BSS_CHANGED_BEACON_ENABLED) && - (vif->type == NL80211_IFTYPE_AP)) { + ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO))) { if (bss->enable_beacon) { rsi_dbg(INFO_ZONE, "===> BEACON ENABLED <===\n"); common->beacon_enabled = 1; @@ -1147,9 +1148,9 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw, */ static void rsi_perform_cqm(struct rsi_common *common, u8 *bssid, - s8 rssi) + s8 rssi, + struct ieee80211_vif *vif) { - struct rsi_hw *adapter = common->priv; s8 last_event = common->cqm_info.last_cqm_event_rssi; int thold = common->cqm_info.rssi_thold; u32 hyst = common->cqm_info.rssi_hyst; @@ -1165,7 +1166,7 @@ static void rsi_perform_cqm(struct rsi_common *common, common->cqm_info.last_cqm_event_rssi = rssi; rsi_dbg(INFO_ZONE, "CQM: Notifying event: %d\n", event); - ieee80211_cqm_rssi_notify(adapter->vifs[0], event, rssi, GFP_KERNEL); + ieee80211_cqm_rssi_notify(vif, event, rssi, GFP_KERNEL); return; } @@ -1228,15 +1229,17 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, vif = adapter->vifs[i]; if (!vif) continue; - if (vif->type == NL80211_IFTYPE_STATION) + if (vif->type == NL80211_IFTYPE_STATION) { bss = &vif->bss_conf; + break; + } } if (!bss) return; /* CQM only for connected AP beacons, the RSSI is a weighted avg */ if (bss->assoc && !(memcmp(bss->bssid, hdr->addr2, ETH_ALEN))) { if (ieee80211_is_beacon(hdr->frame_control)) - rsi_perform_cqm(common, hdr->addr2, rxs->signal); + rsi_perform_cqm(common, hdr->addr2, rxs->signal, vif); } return; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 383cd432b237..4b94190c9797 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1160,9 +1160,9 @@ static bool rsi_map_rates(u16 rate, int *offset) */ static int rsi_send_auto_rate_request(struct rsi_common *common, struct ieee80211_sta *sta, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb; struct rsi_auto_rate *auto_rate; int ii = 0, jj = 0, kk = 0; @@ -1332,7 +1332,7 @@ void rsi_inform_bss_status(struct rsi_common *common, aid, sta_id, vif); if (common->min_rate == 0xffff) - rsi_send_auto_rate_request(common, sta, sta_id); + rsi_send_auto_rate_request(common, sta, sta_id, vif); if (opmode == RSI_OPMODE_STA) { if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; From 6508497cbdc70b92130fbca57402af6a94e05d20 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 16:24:52 +0100 Subject: [PATCH 0089/2177] rsi: fix a dereference on adapter before it has been null checked The assignment of dev is dereferencing adapter before adapter has been null checked, potentially leading to a null pointer dereference. Fix this by simply moving the assignment of dev to a later point after the sanity null check of adapter. Detected by CoverityScan CID#1398383 ("Dereference before null check") Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 81df09dd2636..08730227cd18 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -73,8 +73,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter, u8 *data, u32 count) { - struct rsi_91x_usbdev *dev = - (struct rsi_91x_usbdev *)adapter->rsi_dev; + struct rsi_91x_usbdev *dev; if (!adapter) return -ENODEV; @@ -82,6 +81,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter, if (endpoint == 0) return -EINVAL; + dev = (struct rsi_91x_usbdev *)adapter->rsi_dev; if (dev->write_fail) return -ENETDOWN; From e31fbe1034d9066cfff0d0a4f7ce440ddf75643f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Sep 2017 19:15:50 +0100 Subject: [PATCH 0090/2177] b43: fix unitialized reads of ret by initializing the array to zero The u8 char array ret is not being initialized and elements outside the range start to end contain just garbage values from the stack. This results in a later scan of the array to read potentially uninitialized values. Fix this by initializing the array to zero. This seems to have been an issue since the very first commit. Detected by CoverityScan CID#139652 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Reviewed-by: Michael Buesch Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/phy_g.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c index 822dcaa8ace6..f59c02166462 100644 --- a/drivers/net/wireless/broadcom/b43/phy_g.c +++ b/drivers/net/wireless/broadcom/b43/phy_g.c @@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_detect(struct b43_wldev *dev, u8 channel) static u8 b43_gphy_aci_scan(struct b43_wldev *dev) { struct b43_phy *phy = &dev->phy; - u8 ret[13]; + u8 ret[13] = { 0 }; unsigned int channel = phy->channel; unsigned int i, j, start, end; From e3ae1c7720462136aaa1119c656704bd96952f4d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Sep 2017 19:16:58 +0100 Subject: [PATCH 0091/2177] b43legacy: fix unitialized reads of ret by initializing the array to zero The u8 char array ret is not being initialized and elements outside the range start to end contain just garbage values from the stack. This results in a later scan of the array to read potentially uninitialized values. Fix this by initializing the array to zero. This seems to have been an issue since the very first commit. Detected by CoverityScan CID#139653 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Reviewed-by: Michael Buesch Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43legacy/radio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c index 9501420340a9..eab1c9387846 100644 --- a/drivers/net/wireless/broadcom/b43legacy/radio.c +++ b/drivers/net/wireless/broadcom/b43legacy/radio.c @@ -280,7 +280,7 @@ u8 b43legacy_radio_aci_detect(struct b43legacy_wldev *dev, u8 channel) u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev) { struct b43legacy_phy *phy = &dev->phy; - u8 ret[13]; + u8 ret[13] = { 0 }; unsigned int channel = phy->channel; unsigned int i; unsigned int j; From 0f61953dd0f5fbed6ac833f58dc590293b7569b0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 31 Aug 2017 09:48:59 -0500 Subject: [PATCH 0092/2177] rtlwifi: btcoexist: 23b 1ant: fix duplicated code for different branches A typo led to this issue, which was detected with the help of Coccinelle. In addition to fixing the error, the code is refactored to eliminate an if statement. Addresses-Coverity-ID: 1226788 Reported-by: Gustavo A. R. Silva Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- .../realtek/rtlwifi/btcoexist/halbtc8723b1ant.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c index c04425236ce4..5f726f6d3567 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c @@ -2260,14 +2260,11 @@ static void halbtc8723b1ant_run_coexist_mechanism(struct btc_coexist *btcoexist) if (iot_peer != BTC_IOT_PEER_CISCO && iot_peer != BTC_IOT_PEER_BROADCOM) { - if (bt_link_info->sco_exist) - halbtc8723b1ant_limited_rx(btcoexist, - NORMAL_EXEC, false, - false, 0x5); - else - halbtc8723b1ant_limited_rx(btcoexist, - NORMAL_EXEC, false, - false, 0x5); + bool sco_exist = bt_link_info->sco_exist; + + halbtc8723b1ant_limited_rx(btcoexist, + NORMAL_EXEC, sco_exist, + false, 0x5); } else { if (bt_link_info->sco_exist) { halbtc8723b1ant_limited_rx(btcoexist, From 519ce2f933fa14acf69d5c8cabcc18711943d629 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 14 Sep 2017 13:17:44 -0500 Subject: [PATCH 0093/2177] rtlwifi: rtl8192ee: Fix memory leak when loading firmware In routine rtl92ee_set_fw_rsvdpagepkt(), the driver allocates an skb, but never calls rtl_cmd_send_packet(), which will free the buffer. All other rtlwifi drivers perform this operation correctly. This problem has been in the driver since it was included in the kernel. Fortunately, each firmware load only leaks 4 buffers, which likely explains why it has not previously been detected. Cc: Stable # 3.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 7eae27f8e173..f9563ae301ad 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -682,7 +682,7 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct sk_buff *skb = NULL; - + bool rtstatus; u32 totalpacketlen; u8 u1rsvdpageloc[5] = { 0 }; bool b_dlok = false; @@ -768,7 +768,9 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) skb = dev_alloc_skb(totalpacketlen); skb_put_data(skb, &reserved_page_packet, totalpacketlen); - b_dlok = true; + rtstatus = rtl_cmd_send_packet(hw, skb); + if (rtstatus) + b_dlok = true; if (b_dlok) { RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD , From 31726ff20190bafcf41d5eb5a7615f052f15bdd3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Thu, 31 Aug 2017 01:08:35 +0530 Subject: [PATCH 0094/2177] mwifiex: notify cfg80211 about scan abort Driver sends a series of scan commands to firmware to serve a user scan request. If an intermediate scan command fails, driver aborts the scan but it is not being informed to cfg80211. This will cause issues in applications performing periodic scans. Fix this by informing scan abort. Signed-off-by: Cathy Luo Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 0fba5b10ef2d..1bd4e13b8449 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -70,11 +70,7 @@ mwifiex_process_cmdresp_error(struct mwifiex_private *priv, break; case HostCmd_CMD_802_11_SCAN: case HostCmd_CMD_802_11_SCAN_EXT: - mwifiex_cancel_pending_scan_cmd(adapter); - - spin_lock_irqsave(&adapter->mwifiex_cmd_lock, flags); - adapter->scan_processing = false; - spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, flags); + mwifiex_cancel_scan(adapter); break; case HostCmd_CMD_MAC_CONTROL: From 26177d7f3969da1827bc2b5923edcfc2ff4c3a61 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Fri, 8 Sep 2017 02:02:43 +0530 Subject: [PATCH 0095/2177] mwifiex: check for mfg_mode in add_virtual_intf If driver is loaded with 'mfg_mode' enabled, then the sending commands are not allowed. So, skip sending commands, to firmware in mwifiex_add_virtual_intf if 'mfg_mode' is enabled. Fixes: 7311ea850079 ("mwifiex: fix AP start problem for newly added interface") Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/cfg80211.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 32c5074da84c..ad1ebd8844d0 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2959,18 +2959,21 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, } mwifiex_init_priv_params(priv, dev); - mwifiex_set_mac_address(priv, dev); priv->netdev = dev; - ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, - HostCmd_ACT_GEN_SET, 0, NULL, true); - if (ret) - goto err_set_bss_mode; + if (!adapter->mfg_mode) { + mwifiex_set_mac_address(priv, dev); - ret = mwifiex_sta_init_cmd(priv, false, false); - if (ret) - goto err_sta_init; + ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, + HostCmd_ACT_GEN_SET, 0, NULL, true); + if (ret) + goto err_set_bss_mode; + + ret = mwifiex_sta_init_cmd(priv, false, false); + if (ret) + goto err_sta_init; + } mwifiex_setup_ht_caps(&wiphy->bands[NL80211_BAND_2GHZ]->ht_cap, priv); if (adapter->is_hw_11ac_capable) From 85dafc12919659ec744e111b1714792cd3d0a9ca Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Mon, 11 Sep 2017 18:16:04 +0530 Subject: [PATCH 0096/2177] mwifiex: remove unnecessary call to memset call to memset to assign 0 value immediately after allocating memory with kzalloc is unnecesaary as kzalloc allocates the memory filled with 0 value. Semantic patch used to resolve this issue: @@ expression e,e2; constant c; statement S; @@ e = kzalloc(e2, c); if(e == NULL) S - memset(e, 0, e2); Signed-off-by: Himanshu Jha Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/scan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index c9d41ed77fc7..8838b88cd998 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1936,8 +1936,6 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) if (!user_scan_cfg) return -ENOMEM; - memset(user_scan_cfg, 0, sizeof(*user_scan_cfg)); - for (id = 0; id < MWIFIEX_USER_SCAN_CHAN_MAX; id++) { if (!priv->hidden_chan[id].chan_number) break; From d157bcfaf8542612fd9ffc0cbbc1e52b85157640 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 16 Sep 2017 16:34:24 +0100 Subject: [PATCH 0097/2177] mwifiex: make const arrays static to shink object code size Don't populate const arrays on the stack, instead make them static Makes the object code smaller by nearly 300 bytes: Before: text data bss dec hex filename 69260 16149 576 85985 14fe1 cfg80211.o After: text data bss dec hex filename 68385 16725 576 85686 14eb6 cfg80211.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index ad1ebd8844d0..a3d142bdf946 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -142,7 +142,7 @@ mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; if (mwifiex_set_encode(priv, NULL, NULL, 0, key_index, peer_mac, 1)) { @@ -454,7 +454,7 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); struct mwifiex_wep_key *wep_key; - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP && @@ -3253,8 +3253,8 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv, int i, filt_num = 0, ret = 0; bool first_pat = true; u8 byte_seq[MWIFIEX_MEF_MAX_BYTESEQ + 1]; - const u8 ipv4_mc_mac[] = {0x33, 0x33}; - const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; + static const u8 ipv4_mc_mac[] = {0x33, 0x33}; + static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; mef_entry->mode = MEF_MODE_HOST_SLEEP; mef_entry->action = MEF_ACTION_ALLOW_AND_WAKEUP_HOST; @@ -3547,9 +3547,9 @@ static int mwifiex_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, static int mwifiex_get_coalesce_pkt_type(u8 *byte_seq) { - const u8 ipv4_mc_mac[] = {0x33, 0x33}; - const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff}; + static const u8 ipv4_mc_mac[] = {0x33, 0x33}; + static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff}; if ((byte_seq[0] & 0x01) && (byte_seq[MWIFIEX_COALESCE_MAX_BYTESEQ] == 1)) From e251a882c0bae39d3d31efe993e977104605a9b3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 18 Sep 2017 12:25:02 +0530 Subject: [PATCH 0098/2177] mwifiex: avoid storing random_mac in private Application will keep track of whether MAC address randomization is enabled or not during scan. But at present driver is storing 'random_mac' in mwifiex_private which implies even after scan is done driver has some reference to the earlier 'scan request'. To avoid this, make use of 'mac_addr' variable in 'scan_request' to store 'random_mac'. This structure will be freed by cfg80211 once scan is done. Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 11 ++++------- drivers/net/wireless/marvell/mwifiex/main.h | 1 - drivers/net/wireless/marvell/mwifiex/scan.c | 3 ++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a3d142bdf946..f28040c45d86 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2529,15 +2529,12 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - ether_addr_copy(priv->random_mac, request->mac_addr); for (i = 0; i < ETH_ALEN; i++) { - priv->random_mac[i] &= request->mac_addr_mask[i]; - priv->random_mac[i] |= get_random_int() & - ~(request->mac_addr_mask[i]); + request->mac_addr[i] &= request->mac_addr_mask[i]; + request->mac_addr[i] |= + get_random_int() & ~(request->mac_addr_mask[i]); } - ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); - } else { - eth_zero_addr(priv->random_mac); + ether_addr_copy(user_scan_cfg->random_mac, request->mac_addr); } user_scan_cfg->num_ssids = request->n_ssids; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index a76bd797e454..a34de8582e91 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -680,7 +680,6 @@ struct mwifiex_private { struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; u8 assoc_resp_ht_param; bool ht_param_present; - u8 random_mac[ETH_ALEN]; }; diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 8838b88cd998..d7ce7f75ae38 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1946,7 +1946,8 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) adapter->active_scan_triggered = true; if (priv->scan_request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) - ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); + ether_addr_copy(user_scan_cfg->random_mac, + priv->scan_request->mac_addr); user_scan_cfg->num_ssids = priv->scan_request->n_ssids; user_scan_cfg->ssid_list = priv->scan_request->ssids; From e9a3846afaa4d2d44f33a6aa2dd919e481be599e Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 18 Sep 2017 13:12:17 +0530 Subject: [PATCH 0099/2177] mwifiex: use get_random_mask_addr() helper Avoid calculating random MAC address in driver. Instead make use of 'get_random_mask_addr()' function. Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index f28040c45d86..ac01af4d7bfb 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2503,6 +2503,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, struct ieee80211_channel *chan; struct ieee_types_header *ie; struct mwifiex_user_scan_cfg *user_scan_cfg; + u8 mac_addr[ETH_ALEN]; mwifiex_dbg(priv->adapter, CMD, "info: received scan request on %s\n", dev->name); @@ -2529,12 +2530,10 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - for (i = 0; i < ETH_ALEN; i++) { - request->mac_addr[i] &= request->mac_addr_mask[i]; - request->mac_addr[i] |= - get_random_int() & ~(request->mac_addr_mask[i]); - } - ether_addr_copy(user_scan_cfg->random_mac, request->mac_addr); + get_random_mask_addr(mac_addr, request->mac_addr, + request->mac_addr_mask); + ether_addr_copy(request->mac_addr, mac_addr); + ether_addr_copy(user_scan_cfg->random_mac, mac_addr); } user_scan_cfg->num_ssids = request->n_ssids; From d4e1b299ec2853dd3d90b71ae86fa2626e60dc25 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 20 Sep 2017 12:18:17 -0400 Subject: [PATCH 0100/2177] ipv6: Use ipv6_authlen for len in ipv6_skip_exthdr In ipv6_skip_exthdr, the lengh of AH header is computed manually as (hp->hdrlen+2)<<2. However, in include/linux/ipv6.h, a macro named ipv6_authlen is already defined for exactly the same job. This commit replaces the manual computation code with the macro. Signed-off-by: Xiang Gao Signed-off-by: David S. Miller --- net/ipv6/exthdrs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 305e2ed730bf..115d60919f72 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; + hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); From 07850a4f74ea0433fe26dccb1ab9556c738a6584 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Wed, 20 Sep 2017 08:12:23 +0800 Subject: [PATCH 0101/2177] macvlan: code refine to check data before using This patch checks data first at one place, return if it's null. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2aea961e0f4..1ffe77e95d46 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1231,11 +1231,14 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } - if (data && data[IFLA_MACVLAN_FLAGS] && + if (!data) + return 0; + + if (data[IFLA_MACVLAN_FLAGS] && nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC) return -EINVAL; - if (data && data[IFLA_MACVLAN_MODE]) { + if (data[IFLA_MACVLAN_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) { case MACVLAN_MODE_PRIVATE: case MACVLAN_MODE_VEPA: @@ -1248,7 +1251,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], } } - if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { + if (data[IFLA_MACVLAN_MACADDR_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) { case MACVLAN_MACADDR_ADD: case MACVLAN_MACADDR_DEL: @@ -1260,7 +1263,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], } } - if (data && data[IFLA_MACVLAN_MACADDR]) { + if (data[IFLA_MACVLAN_MACADDR]) { if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN) return -EINVAL; @@ -1268,7 +1271,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } - if (data && data[IFLA_MACVLAN_MACADDR_COUNT]) + if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL; return 0; From 00ba4cb36da682c68dc87d1703a8aaffe2b4e9c5 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 16 Sep 2017 16:18:33 +0200 Subject: [PATCH 0102/2177] bridge: also trigger RTM_NEWLINK when interface is released from bridge Currently, when an interface is released from a bridge via ioctl(), we get a RTM_DELLINK event through netlink: Deleted 2: dummy0: mtu 1500 master bridge0 state UNKNOWN link/ether 6e:23:c2:54:3a:b3 Userspace has to interpret that as a removal from the bridge, not as a complete removal of the interface. When an bridged interface is completely removed, we get two events: Deleted 2: dummy0: mtu 1500 master bridge0 state DOWN link/ether 6e:23:c2:54:3a:b3 Deleted 2: dummy0: mtu 1500 qdisc noop state DOWN group default link/ether 6e:23:c2:54:3a:b3 brd ff:ff:ff:ff:ff:ff In constrast, when an interface is released from a bond, we get a RTM_NEWLINK with only the new characteristics (no master): 3: dummy1: mtu 1500 qdisc noqueue master bond0 state UNKNOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 4: bond0: mtu 1500 qdisc noqueue state UP group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state DOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state DOWN group default link/ether ca:c8:7b:66:f8:25 brd ff:ff:ff:ff:ff:ff 4: bond0: mtu 1500 qdisc noqueue state UP group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff Userland may be confused by the fact we say a link is deleted while its characteristics are only modified. A first solution would have been to turn the RTM_DELLINK event in del_nbp() into a RTM_NEWLINK event. However, maybe some piece of userland is relying on this RTM_DELLINK to detect when a bridged interface is released. Instead, we also emit a RTM_NEWLINK event once the interface is released (without master info). Deleted 2: dummy0: mtu 1500 master bridge0 state UNKNOWN link/ether 8a:bb:e7:94:b1:f8 2: dummy0: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 8a:bb:e7:94:b1:f8 brd ff:ff:ff:ff:ff:ff This is done only when using ioctl(). When using Netlink, such an event is already automatically emitted in do_setlink(). Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 7970f8540cbb..3148cb3a8e82 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -99,8 +99,10 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (isadd) ret = br_add_if(br, dev); - else + else { ret = br_del_if(br, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); + } return ret; } From 4d6a78b477dd9686e7034aa140057ce7a5da5fd3 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 19 Sep 2017 10:09:24 +0200 Subject: [PATCH 0103/2177] net: dsa: lan9303: Add adjust_link() method Make the driver react to device tree "fixed-link" declaration on CPU port. - turn off autonegotiation - force speed 10 or 100 mb/s - force duplex mode Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index b471413d3df9..07355db2ad81 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "lan9303.h" @@ -57,6 +58,7 @@ #define LAN9303_SWITCH_CSR_CMD_LANES (BIT(19) | BIT(18) | BIT(17) | BIT(16)) #define LAN9303_VIRT_PHY_BASE 0x70 #define LAN9303_VIRT_SPECIAL_CTRL 0x77 +#define LAN9303_VIRT_SPECIAL_TURBO BIT(10) /*Turbo MII Enable*/ /*13.4 Switch Fabric Control and Status Registers * Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA. @@ -760,6 +762,43 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, return chip->ops->phy_write(chip, phy, regnum, val); } +static void lan9303_adjust_link(struct dsa_switch *ds, int port, + struct phy_device *phydev) +{ + struct lan9303 *chip = ds->priv; + int ctl, res; + + if (!phy_is_pseudo_fixed_link(phydev)) + return; + + ctl = lan9303_phy_read(ds, port, MII_BMCR); + + ctl &= ~BMCR_ANENABLE; + + if (phydev->speed == SPEED_100) + ctl |= BMCR_SPEED100; + else if (phydev->speed == SPEED_10) + ctl &= ~BMCR_SPEED100; + else + dev_err(ds->dev, "unsupported speed: %d\n", phydev->speed); + + if (phydev->duplex == DUPLEX_FULL) + ctl |= BMCR_FULLDPLX; + else + ctl &= ~BMCR_FULLDPLX; + + res = lan9303_phy_write(ds, port, MII_BMCR, ctl); + + if (port == chip->phy_addr_sel_strap) { + /* Virtual Phy: Remove Turbo 200Mbit mode */ + lan9303_read(chip->regmap, LAN9303_VIRT_SPECIAL_CTRL, &ctl); + + ctl &= ~LAN9303_VIRT_SPECIAL_TURBO; + res = regmap_write(chip->regmap, + LAN9303_VIRT_SPECIAL_CTRL, ctl); + } +} + static int lan9303_port_enable(struct dsa_switch *ds, int port, struct phy_device *phy) { @@ -803,6 +842,7 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .get_strings = lan9303_get_strings, .phy_read = lan9303_phy_read, .phy_write = lan9303_phy_write, + .adjust_link = lan9303_adjust_link, .get_ethtool_stats = lan9303_get_ethtool_stats, .get_sset_count = lan9303_get_sset_count, .port_enable = lan9303_port_enable, From 9457642a405fd68d5ce90f5c432e462277fb666e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:41 +0800 Subject: [PATCH 0104/2177] virtio-net: remove unnecessary parameter of virtnet_xdp_xmit() CC: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 511f8339fa96..a0ef4b08f0e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -373,7 +373,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, } static bool virtnet_xdp_xmit(struct virtnet_info *vi, - struct receive_queue *rq, struct xdp_buff *xdp) { struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -542,7 +541,7 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp))) + if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); rcu_read_unlock(); goto xdp_xmit; @@ -677,7 +676,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp))) + if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) From 312403453532b209879aa7faeff296bd3dea78af Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:42 +0800 Subject: [PATCH 0105/2177] virtio-net: add packet len average only when needed during XDP There's no need to add packet len average in the case of XDP_PASS since it will be done soon after skb is created. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a0ef4b08f0e9..db5924c085aa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -656,6 +656,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp.data_end = xdp.data + (len - vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); + if (act != XDP_PASS) + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); + switch (act) { case XDP_PASS: /* recalculate offset to account for any header @@ -671,14 +674,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } break; case XDP_TX: if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); @@ -690,7 +691,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, case XDP_DROP: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); goto err_xdp; } } From 186b3c998c50fc241b51b905081c748455d16b4a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:43 +0800 Subject: [PATCH 0106/2177] virtio-net: support XDP_REDIRECT This patch tries to add XDP_REDIRECT for virtio-net. The changes are not complex as we could use exist XDP_TX helpers for most of the work. The rest is passing the XDP_TX to NAPI handler for implementing batching. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net. | 0 drivers/net/virtio_net.c | 77 ++++++++++++++++++++++++++++++++-------- 2 files changed, 62 insertions(+), 15 deletions(-) create mode 100644 drivers/net/virtio_net. diff --git a/drivers/net/virtio_net. b/drivers/net/virtio_net. new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index db5924c085aa..f6c1f135a024 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -29,6 +29,7 @@ #include #include #include +#include #include static int napi_weight = NAPI_POLL_WEIGHT; @@ -372,8 +373,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } -static bool virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_buff *xdp) +static void virtnet_xdp_flush(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct send_queue *sq; + unsigned int qp; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + + virtqueue_kick(sq->vq); +} + +static bool __virtnet_xdp_xmit(struct virtnet_info *vi, + struct xdp_buff *xdp) { struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int len; @@ -407,10 +420,19 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, return false; } - virtqueue_kick(sq->vq); return true; } +static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +{ + struct virtnet_info *vi = netdev_priv(dev); + bool sent = __virtnet_xdp_xmit(vi, xdp); + + if (!sent) + return -ENOSPC; + return 0; +} + static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; @@ -483,7 +505,8 @@ static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, void *ctx, - unsigned int len) + unsigned int len, + bool *xdp_xmit) { struct sk_buff *skb; struct bpf_prog *xdp_prog; @@ -493,7 +516,7 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); - unsigned int delta = 0; + unsigned int delta = 0, err; struct page *xdp_page; len -= vi->hdr_len; @@ -541,8 +564,16 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) + if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); + else + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (!err) + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -603,7 +634,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct receive_queue *rq, void *buf, void *ctx, - unsigned int len) + unsigned int len, + bool *xdp_xmit) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -613,6 +645,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); + int err; head_skb = NULL; @@ -678,12 +711,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) + if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); + else + *xdp_xmit = true; if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (err) + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -788,7 +829,7 @@ xdp_xmit: } static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len, void **ctx) + void *buf, unsigned int len, void **ctx, bool *xdp_xmit) { struct net_device *dev = vi->dev; struct sk_buff *skb; @@ -809,11 +850,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, vi, rq, buf, ctx, len); + skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len); else - skb = receive_small(dev, vi, rq, buf, ctx, len); + skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); if (unlikely(!skb)) return 0; @@ -1071,7 +1112,7 @@ static void refill_work(struct work_struct *work) } } -static int virtnet_receive(struct receive_queue *rq, int budget) +static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int len, received = 0, bytes = 0; @@ -1083,13 +1124,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget) while (received < budget && (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { - bytes += receive_buf(vi, rq, buf, len, ctx); + bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); received++; } } else { while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - bytes += receive_buf(vi, rq, buf, len, NULL); + bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); received++; } } @@ -1161,15 +1202,19 @@ static int virtnet_poll(struct napi_struct *napi, int budget) struct receive_queue *rq = container_of(napi, struct receive_queue, napi); unsigned int received; + bool xdp_xmit = false; virtnet_poll_cleantx(rq); - received = virtnet_receive(rq, budget); + received = virtnet_receive(rq, budget, &xdp_xmit); /* Out of packets? */ if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); + if (xdp_xmit) + xdp_do_flush_map(); + return received; } @@ -2069,6 +2114,8 @@ static const struct net_device_ops virtnet_netdev = { .ndo_poll_controller = virtnet_netpoll, #endif .ndo_xdp = virtnet_xdp, + .ndo_xdp_xmit = virtnet_xdp_xmit, + .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, }; From 0d4a6608f68c7532dcbfec2ea1150c9761767d03 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 19 Sep 2017 12:11:43 +0200 Subject: [PATCH 0107/2177] udp: do rmem bulk free even if the rx sk queue is empty The commit 6b229cf77d68 ("udp: add batching to udp_rmem_release()") reduced greatly the cacheline contention between the BH and the US reader batching the rmem updates in most scenarios. Such optimization is explicitly avoided if the US reader is faster then BH processing. My fault, I initially suggested this kind of behavior due to concerns of possible regressions with small sk_rcvbuf values. Tests showed such concerns are misplaced, so this commit relaxes the condition for rmem bulk updates, obtaining small but measurable performance gain in the scenario described above. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ef29df8648e4..784ced0b9150 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1212,8 +1212,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2) && - !skb_queue_empty(&up->reader_queue)) + if (size < (sk->sk_rcvbuf >> 2)) return; } else { size += up->forward_deficit; From eccaa9e51b77e504fa8e8357e4979bde724e22a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 20 Sep 2017 15:39:59 -0700 Subject: [PATCH 0108/2177] Revert "bridge: also trigger RTM_NEWLINK when interface is released from bridge" This reverts commit 00ba4cb36da682c68dc87d1703a8aaffe2b4e9c5. Discussion with David Ahern determined that this change is actually not needed. Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 3148cb3a8e82..7970f8540cbb 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -99,10 +99,8 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (isadd) ret = br_add_if(br, dev); - else { + else ret = br_del_if(br, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); - } return ret; } From 53bade8a3372eea0f1276bc96892735c517330fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:00:37 -0700 Subject: [PATCH 0109/2177] net: dsa: Utilize dsa_slave_dev_check() Instead of open coding the check. Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d51b10450e1b..6fc9eb094267 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1294,7 +1294,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (dev->netdev_ops != &dsa_slave_netdev_ops) + if (!dsa_slave_dev_check(dev)) return NOTIFY_DONE; if (event == NETDEV_CHANGEUPPER) From 5c1adf606d6ff519b5c1b9b22b8055f161dd98dd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:03:45 -0700 Subject: [PATCH 0110/2177] blackfin: tcm-bf518: Remove dsa.h inclusion Nothing in that file uses definitions from that header, so just get rid of it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/blackfin/mach-bf518/boards/tcm-bf518.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/blackfin/mach-bf518/boards/tcm-bf518.c b/arch/blackfin/mach-bf518/boards/tcm-bf518.c index 240d5cb1f02c..37d868085f6a 100644 --- a/arch/blackfin/mach-bf518/boards/tcm-bf518.c +++ b/arch/blackfin/mach-bf518/boards/tcm-bf518.c @@ -25,7 +25,6 @@ #include #include #include -#include /* * Name the Board for the /proc/cpuinfo From 637ae0f44580040702b53e8f0593a5a5fb00473e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:03:46 -0700 Subject: [PATCH 0111/2177] blackfin: ezbrd: Remove non-functional DSA/KSZ8893M code There is no in tree driver for the KSZ8893M switch driver, so just get rid of the code in that board file. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/blackfin/mach-bf518/boards/ezbrd.c | 47 ------------------------- 1 file changed, 47 deletions(-) diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c index d022112927c2..c51d1b810ac3 100644 --- a/arch/blackfin/mach-bf518/boards/ezbrd.c +++ b/arch/blackfin/mach-bf518/boards/ezbrd.c @@ -25,7 +25,6 @@ #include #include #include -#include /* * Name the Board for the /proc/cpuinfo @@ -105,11 +104,7 @@ static const unsigned short bfin_mac_peripherals[] = { static struct bfin_phydev_platform_data bfin_phydev_data[] = { { -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - .addr = 3, -#else .addr = 1, -#endif .irq = IRQ_MAC_PHYINT, }, }; @@ -119,9 +114,6 @@ static struct bfin_mii_bus_platform_data bfin_mii_bus_data = { .phydev_data = bfin_phydev_data, .phy_mode = PHY_INTERFACE_MODE_MII, .mac_peripherals = bfin_mac_peripherals, -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - .phy_mask = 0xfff7, /* Only probe the port phy connect to the on chip MAC */ -#endif .vlan1_mask = 1, .vlan2_mask = 2, }; @@ -140,29 +132,6 @@ static struct platform_device bfin_mac_device = { } }; -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) -static struct dsa_chip_data ksz8893m_switch_chip_data = { - .mii_bus = &bfin_mii_bus.dev, - .port_names = { - NULL, - "eth%d", - "eth%d", - "cpu", - }, -}; -static struct dsa_platform_data ksz8893m_switch_data = { - .nr_chips = 1, - .netdev = &bfin_mac_device.dev, - .chip = &ksz8893m_switch_chip_data, -}; - -static struct platform_device ksz8893m_switch_device = { - .name = "dsa", - .id = 0, - .num_resources = 0, - .dev.platform_data = &ksz8893m_switch_data, -}; -#endif #endif #if IS_ENABLED(CONFIG_MTD_M25P80) @@ -228,19 +197,6 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = { }, #endif -#if IS_ENABLED(CONFIG_BFIN_MAC) -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - { - .modalias = "ksz8893m", - .max_speed_hz = 5000000, - .bus_num = 0, - .chip_select = 1, - .platform_data = NULL, - .mode = SPI_MODE_3, - }, -#endif -#endif - #if IS_ENABLED(CONFIG_MMC_SPI) { .modalias = "mmc_spi", @@ -714,9 +670,6 @@ static struct platform_device *stamp_devices[] __initdata = { #if IS_ENABLED(CONFIG_BFIN_MAC) &bfin_mii_bus, &bfin_mac_device, -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - &ksz8893m_switch_device, -#endif #endif #if IS_ENABLED(CONFIG_SPI_BFIN5XX) From 34929cb4d691f7f9e217ba0e3f536978cd56aa6c Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 20 Sep 2017 11:32:07 +0530 Subject: [PATCH 0112/2177] cxgb4: add new T5 pci device id's Add 0x50a5, 0x50a6, 0x50a7, 0x50a8 and 0x50a9 T5 device id's. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index aa28299aef5f..37d90d63e4a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -176,6 +176,11 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a5), /* Custom T522-BT */ + CH_PCI_ID_TABLE_FENTRY(0x50a6), /* Custom T522-BT-SO */ + CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */ + CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */ /* T6 adapters: */ From dff37b58ca53b1978e02edf6f8c1dd681799342b Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:01 +0200 Subject: [PATCH 0113/2177] mlxsw: spectrum_switchdev: Change mc_router to mrouter Change the naming of mc_router to mrouter to keep consistency. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d39ffbfcc436..22f8d7428d96 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -699,10 +699,10 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } -static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, - struct net_device *orig_dev, - bool is_port_mc_router) +static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_port_mrouter) { struct mlxsw_sp_bridge_port *bridge_port; int err; @@ -720,12 +720,12 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, MLXSW_SP_FLOOD_TYPE_MC, - is_port_mc_router); + is_port_mrouter); if (err) return err; out: - bridge_port->mrouter = is_port_mc_router; + bridge_port->mrouter = is_port_mrouter; return 0; } @@ -793,9 +793,9 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_PORT_MROUTER: - err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans, - attr->orig_dev, - attr->u.mrouter); + err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, From 4cdc35e4ebf2e6b1cf4fe028eb9e711723f9199a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:02 +0200 Subject: [PATCH 0114/2177] mlxsw: spectrum_switchdev: Add a ports bitmap to the mid db Add a bitmap of ports to the mid struct to hold the ports that are registered to this mid. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../mellanox/mlxsw/spectrum_switchdev.c | 20 ++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 7180d8f3de75..0424bee88407 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -95,6 +95,7 @@ struct mlxsw_sp_mid { u16 fid; u16 mid; unsigned int ref_count; + unsigned long *ports_in_mid; /* bits array */ }; enum mlxsw_sp_span_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 22f8d7428d96..0fde16a22b72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1239,6 +1239,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, u16 fid) { struct mlxsw_sp_mid *mid; + size_t alloc_size; u16 mid_idx; mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, @@ -1250,6 +1251,14 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!mid) return NULL; + alloc_size = sizeof(unsigned long) * + BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core)); + mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); + if (!mid->ports_in_mid) { + kfree(mid); + return NULL; + } + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); ether_addr_copy(mid->addr, addr); mid->fid = fid; @@ -1260,12 +1269,16 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, return mid; } -static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp, +static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_mid *mid) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); if (--mid->ref_count == 0) { list_del(&mid->list); clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + kfree(mid->ports_in_mid); kfree(mid); return 1; } @@ -1311,6 +1324,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, } } mid->ref_count++; + set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, mid->ref_count == 1); @@ -1331,7 +1345,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_out: - __mlxsw_sp_mc_dec_ref(mlxsw_sp, mid); + __mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid); return err; } @@ -1437,7 +1451,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) { + if (__mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid)) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid_idx, false); if (err) From 0161b9505ab59d4bfc0de66073c9240d1b05040d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:03 +0200 Subject: [PATCH 0115/2177] mlxsw: spectrum_switchdev: Remove reference count from mid Since there is a bitmap for the ports registered to each mid, there is no need for a ref count, since it will always be the number of set bits in this bitmap. Any check of the ref count was replaced with checking if the bitmap is empty. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 - .../mellanox/mlxsw/spectrum_switchdev.c | 20 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0424bee88407..6fd0afe4b7a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -94,7 +94,6 @@ struct mlxsw_sp_mid { unsigned char addr[ETH_ALEN]; u16 fid; u16 mid; - unsigned int ref_count; unsigned long *ports_in_mid; /* bits array */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0fde16a22b72..cb2275ed42ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1263,19 +1263,19 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->mid = mid_idx; - mid->ref_count = 0; list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); return mid; } -static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_mid *mid) +static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mid *mid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - if (--mid->ref_count == 0) { + if (bitmap_empty(mid->ports_in_mid, + mlxsw_core_max_ports(mlxsw_sp->core))) { list_del(&mid->list); clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); kfree(mid->ports_in_mid); @@ -1296,6 +1296,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; + bool is_new_mid = false; u16 fid_index; int err = 0; @@ -1322,18 +1323,17 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; } + is_new_mid = true; } - mid->ref_count++; set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, - mid->ref_count == 1); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, is_new_mid); if (err) { netdev_err(dev, "Unable to set SMID\n"); goto err_out; } - if (mid->ref_count == 1) { + if (is_new_mid) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid->mid, true); if (err) { @@ -1345,7 +1345,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_out: - __mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid); + mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); return err; } @@ -1451,7 +1451,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (__mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid)) { + if (mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid)) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid_idx, false); if (err) From b80888a9194f3aecd5edf6a7ede5c23d77bade8b Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:04 +0200 Subject: [PATCH 0116/2177] mlxsw: spectrum_switchdev: Save mids list per bridge device Instead of saving all the mids in the same list, save them per vlan device. This change allows a more efficient mid find. Also, in the next patches, there will be added a lot of loops over all the mids in bridge device for multicast disable, mrouter change and ndb flush. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index cb2275ed42ab..2ba8a44e1933 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -67,7 +67,6 @@ struct mlxsw_sp_bridge { u32 ageing_time; bool vlan_enabled_exists; struct list_head bridges_list; - struct list_head mids_list; DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX); const struct mlxsw_sp_bridge_ops *bridge_8021q_ops; const struct mlxsw_sp_bridge_ops *bridge_8021d_ops; @@ -77,6 +76,7 @@ struct mlxsw_sp_bridge_device { struct net_device *dev; struct list_head list; struct list_head ports_list; + struct list_head mids_list; u8 vlan_enabled:1, multicast_enabled:1; const struct mlxsw_sp_bridge_ops *ops; @@ -161,6 +161,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, } else { bridge_device->ops = bridge->bridge_8021d_ops; } + INIT_LIST_HEAD(&bridge_device->mids_list); list_add(&bridge_device->list, &bridge->bridges_list); return bridge_device; @@ -170,10 +171,17 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + struct mlxsw_sp_mid *mid, *tmp; + list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; WARN_ON(!list_empty(&bridge_device->ports_list)); + list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { + list_del(&mid->list); + clear_bit(mid->mid, bridge->mids_bitmap); + kfree(mid); + } kfree(bridge_device); } @@ -1221,22 +1229,25 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, return err; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) { struct mlxsw_sp_mid *mid; - list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) { + list_for_each_entry(mid, &bridge_device->mids_list, list) { if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) { struct mlxsw_sp_mid *mid; size_t alloc_size; @@ -1263,7 +1274,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->mid = mid_idx; - list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); + list_add_tail(&mid->list, &bridge_device->mids_list); return mid; } @@ -1316,9 +1327,10 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr, + fid_index); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1440,7 +1452,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; @@ -1995,17 +2007,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) } -static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_mid *mid, *tmp; - - list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) { - list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); - kfree(mid); - } -} - int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge; @@ -2017,7 +2018,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) bridge->mlxsw_sp = mlxsw_sp; INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list); - INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list); bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; @@ -2028,7 +2028,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - mlxsw_sp_mids_fini(mlxsw_sp); WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); kfree(mlxsw_sp->bridge); } From 5f9abc597cdd7a63433b7ebfcf26ee2746a76638 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:05 +0200 Subject: [PATCH 0117/2177] mlxsw: spectrum_switchdev: Break smid write function Break the smid write function into two, one that cleans the ports that might be still written there and one that changes an exiting mid entry. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2ba8a44e1933..09ead97d9442 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1190,7 +1190,7 @@ mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, - u16 fid, u16 mid, bool adding) + u16 fid, u16 mid_idx, bool adding) { char *sfd_pl; int err; @@ -1201,16 +1201,16 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid, - MLXSW_REG_SFD_REC_ACTION_NOP, mid); + MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } -static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, - bool add, bool clear_all_ports) +/* clean the an entry from the HW and write there a full new entry */ +static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, + u16 mid_idx) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *smid_pl; int err, i; @@ -1218,12 +1218,29 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, if (!smid_pl) return -ENOMEM; - mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); - if (clear_all_ports) { - for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) - if (mlxsw_sp->ports[i]) - mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); + mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false); + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + if (mlxsw_sp->ports[i]) + mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 mid_idx, bool add) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1336,10 +1353,11 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; } is_new_mid = true; + mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid->mid); } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, is_new_mid); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); goto err_out; @@ -1458,7 +1476,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); if (err) netdev_err(dev, "Unable to remove port from SMID\n"); From 73b433e803d2a3547ee38d1fb2a0bc6f3b03a6d9 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:06 +0200 Subject: [PATCH 0118/2177] mlxsw: spectrum_switchdev: Attach mid id allocation to HW write Attach mid getting and releasing mid id to the HW write / remove, and add a flag to indicate whether the mid is in the HW. It is done because mid id is also HW index to this mid. This change allows adding in the following patches the ability to have a mid in the mdb cache but not in the HW. It will be useful for being able to disable the multicast. It means that the mdb is being written / delete to the HW in the mid allocation / removing function, not after them. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../mellanox/mlxsw/spectrum_switchdev.c | 90 ++++++++++++------- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6fd0afe4b7a3..e907ec446a73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -94,6 +94,7 @@ struct mlxsw_sp_mid { unsigned char addr[ETH_ALEN]; u16 fid; u16 mid; + bool in_hw; unsigned long *ports_in_mid; /* bits array */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 09ead97d9442..9dd05d87b662 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1260,6 +1260,42 @@ mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, return NULL; } +static bool +mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + u16 mid_idx; + int err; + + mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, + MLXSW_SP_MID_MAX); + if (mid_idx == MLXSW_SP_MID_MAX) + return false; + + mid->mid = mid_idx; + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx); + if (err) + return false; + + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx, + true); + if (err) + return false; + + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = true; + return true; +} + +static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = false; + return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid, + false); +} + static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_device *bridge_device, @@ -1268,12 +1304,6 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_mid *mid; size_t alloc_size; - u16 mid_idx; - - mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, - MLXSW_SP_MID_MAX); - if (mid_idx == MLXSW_SP_MID_MAX) - return NULL; mid = kzalloc(sizeof(*mid), GFP_KERNEL); if (!mid) @@ -1281,36 +1311,43 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, alloc_size = sizeof(unsigned long) * BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core)); - mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); - if (!mid->ports_in_mid) { - kfree(mid); - return NULL; - } - set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); + mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); + if (!mid->ports_in_mid) + goto err_ports_in_mid_alloc; + ether_addr_copy(mid->addr, addr); mid->fid = fid; - mid->mid = mid_idx; + mid->in_hw = false; + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) + goto err_write_mdb_entry; + list_add_tail(&mid->list, &bridge_device->mids_list); return mid; + +err_write_mdb_entry: + kfree(mid->ports_in_mid); +err_ports_in_mid_alloc: + kfree(mid); + return NULL; } static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_mid *mid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err = 0; clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); if (bitmap_empty(mid->ports_in_mid, mlxsw_core_max_ports(mlxsw_sp->core))) { + err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); kfree(mid->ports_in_mid); kfree(mid); - return 1; } - return 0; + return err; } static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1324,7 +1361,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; - bool is_new_mid = false; u16 fid_index; int err = 0; @@ -1352,8 +1388,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; } - is_new_mid = true; - mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid->mid); } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); @@ -1363,15 +1397,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, goto err_out; } - if (is_new_mid) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid->mid, true); - if (err) { - netdev_err(dev, "Unable to set MC SFD\n"); - goto err_out; - } - } - return 0; err_out: @@ -1481,12 +1506,9 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid)) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid_idx, false); - if (err) - netdev_err(dev, "Unable to remove MC SFD\n"); - } + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); return err; } From 061e55bfb83e632afcd34130bb19fe7a32325b02 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:07 +0200 Subject: [PATCH 0119/2177] mlxsw: spectrum_switchdev: Break mid deletion into two function Break mid deletion into two function, so it will be possible in the future to delete a mid entry for other reasons then switchdev command (like port deletion). Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9dd05d87b662..7f622de6331c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1468,6 +1468,25 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int +__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_mid *mid) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); + + return err; +} + static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_mdb *mdb) { @@ -1479,8 +1498,6 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; u16 fid_index; - u16 mid_idx; - int err = 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (!bridge_port) @@ -1501,16 +1518,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); - - mid_idx = mid->mid; - err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); - if (err) - netdev_err(dev, "Unable to remove MC SFD\n"); - - return err; + return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); } static int mlxsw_sp_port_obj_del(struct net_device *dev, From 846fd8a0e7dcd9f455a86dc17ddf0a51c124f9c0 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:08 +0200 Subject: [PATCH 0120/2177] mlxsw: spectrum_switchdev: Don't write mids to the HW when mc is disabled Don't write multicast related data to the HW when mc is disabled. Also, don't allocate mid id to new mids (so the remove function could know that they weren't wrote to the HW) Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7f622de6331c..cea257a77d09 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1290,6 +1290,9 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mid *mid) { + if (!mid->in_hw) + return 0; + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); mid->in_hw = false; return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid, @@ -1319,11 +1322,15 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->in_hw = false; + + if (!bridge_device->multicast_enabled) + goto out; + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) goto err_write_mdb_entry; +out: list_add_tail(&mid->list, &bridge_device->mids_list); - return mid; err_write_mdb_entry: @@ -1391,6 +1398,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); + if (!bridge_device->multicast_enabled) + return 0; + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); @@ -1476,9 +1486,12 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *dev = mlxsw_sp_port->dev; int err; - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); + if (bridge_port->bridge_device->multicast_enabled) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); if (err) From 2e3496cd3488729200ff0f1c6381b7016ecd41bd Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:09 +0200 Subject: [PATCH 0121/2177] mlxsw: spectrum_switchdev: Disable mdb when mc is disabled Remove all the mdb entries from the HW when mc is being disabled and re-write them when it is being enabled. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index cea257a77d09..79806af87b93 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -121,6 +121,11 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, u16 fid_index); +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device); + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, const struct net_device *br_dev) @@ -757,6 +762,12 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device) return 0; + if (bridge_device->multicast_enabled != !mc_disabled) { + bridge_device->multicast_enabled = !mc_disabled; + mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port, + bridge_device); + } + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; bool member = mc_disabled ? true : bridge_port->mrouter; @@ -1207,9 +1218,8 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, return err; } -/* clean the an entry from the HW and write there a full new entry */ -static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, - u16 mid_idx) +static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, + long *ports_bitmap) { char *smid_pl; int err, i; @@ -1224,6 +1234,9 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core)) + mlxsw_reg_smid_port_set(smid_pl, i, 1); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1273,7 +1286,8 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, return false; mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, + mid->ports_in_mid); if (err) return false; @@ -1414,6 +1428,25 @@ err_out: return err; } +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_mid *mid; + bool mc_enabled; + + mc_enabled = bridge_device->multicast_enabled; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (mc_enabled) + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid); + else + mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); + } +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) From 218a8f8a6379bfd359e58f369b7b7660cd12e865 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:10 +0200 Subject: [PATCH 0122/2177] mlxsw: spectrum_switchdev: Use generic mc flood function Use the generic mc flood function to decide whether to flood mc to a port when mc is being enabled / disabled. Move this function in the file to avoid forward declaration. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 79806af87b93..19ac206879ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -742,6 +742,14 @@ out: return 0; } +static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) +{ + const struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = bridge_port->bridge_device; + return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; +} + static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, struct net_device *orig_dev, @@ -770,7 +778,7 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; - bool member = mc_disabled ? true : bridge_port->mrouter; + bool member = mlxsw_sp_mc_flood(bridge_port); err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, @@ -829,14 +837,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) -{ - const struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = bridge_port->bridge_device; - return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; -} - static int mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct mlxsw_sp_bridge_port *bridge_port) From 9dad51bdaa4b1846bd9d5307b54dca74efa555ea Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:11 +0200 Subject: [PATCH 0123/2177] mlxsw: spectrum_switchdev: Flood mc when mc is disabled by user flag When multicast is disabled, flood mc packets only to port that are marked BR_MCAST_FLOOD (instead to all). Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 19ac206879ff..50c4d7c735df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -262,7 +262,8 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, bridge_port->dev = brport_dev; bridge_port->bridge_device = bridge_device; bridge_port->stp_state = BR_STATE_DISABLED; - bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC; + bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC | + BR_MCAST_FLOOD; INIT_LIST_HEAD(&bridge_port->vlans_list); list_add(&bridge_port->list, &bridge_device->ports_list); bridge_port->ref_count = 1; @@ -468,7 +469,8 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, &attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; + attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | + BR_MCAST_FLOOD; break; default: return -EOPNOTSUPP; @@ -653,8 +655,18 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); + if (bridge_port->bridge_device->multicast_enabled) + goto out; + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + brport_flags & + BR_MCAST_FLOOD); + if (err) + return err; + +out: + memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); return 0; } @@ -747,7 +759,8 @@ static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) const struct mlxsw_sp_bridge_device *bridge_device; bridge_device = bridge_port->bridge_device; - return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; + return bridge_device->multicast_enabled ? bridge_port->mrouter : + bridge_port->flags & BR_MCAST_FLOOD; } static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, From bb5355b27c9da3786a2b5e1583c9d64f492ac7ad Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:12 +0200 Subject: [PATCH 0124/2177] mlxsw: spectrum_switchdev: Flush the mdb when a port is being removed When a port is being removed from a bridge, flush the bridge mdb to remove the mids of that port. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 50c4d7c735df..bc0787312a06 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -121,6 +121,10 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, u16 fid_index); +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port); + static void mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device @@ -176,17 +180,11 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { - struct mlxsw_sp_mid *mid, *tmp; - list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; WARN_ON(!list_empty(&bridge_device->ports_list)); - list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { - list_del(&mid->list); - clear_bit(mid->mid, bridge->mids_bitmap); - kfree(mid); - } + WARN_ON(!list_empty(&bridge_device->mids_list)); kfree(bridge_device); } @@ -987,24 +985,28 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) struct mlxsw_sp_bridge_vlan *bridge_vlan; struct mlxsw_sp_bridge_port *bridge_port; u16 vid = mlxsw_sp_port_vlan->vid; - bool last; + bool last_port, last_vlan; if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q && mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D)) return; bridge_port = mlxsw_sp_port_vlan->bridge_port; + last_vlan = list_is_singular(&bridge_port->vlans_list); bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); - last = list_is_singular(&bridge_vlan->port_vlan_list); + last_port = list_is_singular(&bridge_vlan->port_vlan_list); list_del(&mlxsw_sp_port_vlan->bridge_vlan_node); mlxsw_sp_bridge_vlan_put(bridge_vlan); mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); - if (last) + if (last_port) mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp, bridge_port, mlxsw_sp_fid_index(fid)); + if (last_vlan) + mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port); + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port); @@ -1580,6 +1582,23 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); } +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid, *tmp; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { + if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) { + __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, + mid); + } + } +} + static int mlxsw_sp_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { From 3fba877cb68cfbc1826dd4abc7b1a8fe862adb2a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:13 +0200 Subject: [PATCH 0125/2177] mlxsw: spectrum_switchdev: Flood all mc packets to mrouter ports When mc is enabled, whenever a mc packet doesn't hit any mdb entry it is being flood to the ports marked as mrouters. However, all mc packets should be flooded to them even if they match an entry in the mdb. This patch adds the mrouter ports to every mdb entry that is being written to the HW. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 65 +++++++++++++++++-- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index bc0787312a06..146beaa6b2da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1288,10 +1288,55 @@ mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, return NULL; } +static void +mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + unsigned long *ports_bitmap) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u64 max_lag_members, i; + int lag_id; + + if (!bridge_port->lagged) { + set_bit(bridge_port->system_port, ports_bitmap); + } else { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + lag_id = bridge_port->lag_id; + for (i = 0; i < max_lag_members; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, + lag_id, i); + if (mlxsw_sp_port) + set_bit(mlxsw_sp_port->local_port, + ports_bitmap); + } + } +} + +static void +mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->mrouter) { + mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp, + bridge_port, + flood_bitmap); + } + } +} + static bool mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_mid *mid) + struct mlxsw_sp_mid *mid, + struct mlxsw_sp_bridge_device *bridge_device) { + long *flood_bitmap; + int num_of_ports; + int alloc_size; u16 mid_idx; int err; @@ -1300,9 +1345,18 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, if (mid_idx == MLXSW_SP_MID_MAX) return false; + num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); + alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports); + flood_bitmap = kzalloc(alloc_size, GFP_KERNEL); + if (!flood_bitmap) + return false; + + bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); + mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); + mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, - mid->ports_in_mid); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap); + kfree(flood_bitmap); if (err) return false; @@ -1355,7 +1409,7 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!bridge_device->multicast_enabled) goto out; - if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device)) goto err_write_mdb_entry; out: @@ -1456,7 +1510,8 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(mid, &bridge_device->mids_list, list) { if (mc_enabled) - mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid); + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, + bridge_device); else mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); } From 3ddda1178e41bbe26bb5c6ebf66ae3d0a87ac410 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:14 +0200 Subject: [PATCH 0126/2177] mlxsw: spectrum_switchdev: Update the mdb of mrouter port change Whenever a port starts / stops being mrouter, update all the mdb entries in the HW to flood / stop flooding mc packets there. The change should happen only if the port is not in the mid. (If it is, the mid should flood mc packets to this port anyway) Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 146beaa6b2da..bf1a17557fb3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -130,6 +130,11 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device); +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add); + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, const struct net_device *br_dev) @@ -747,6 +752,8 @@ static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; + mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port, + is_port_mrouter); out: bridge_port->mrouter = is_port_mrouter; return 0; @@ -1517,6 +1524,22 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, } } +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add); + } +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) From 0166277706e57779f06b741d25a9e86d99726e2a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:15 +0200 Subject: [PATCH 0127/2177] mlxsw: spectrum_switchdev: Remove mrouter flood in mdb flush In mdb flush the port is being removed from all the mids it is registered to. But if the port is mrouter, all the mids floods to it. This patch remove mrouter ports from mids it is not registered to in the mdb flush. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index bf1a17557fb3..459cedc23c47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1673,6 +1673,9 @@ mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) { __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); + } else if (bridge_device->multicast_enabled && + bridge_port->mrouter) { + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); } } } From ded711c87a0411a7f3f56f8c575d7b642ee0110e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:16 +0200 Subject: [PATCH 0128/2177] mlxsw: spectrum_switchdev: Consider mrouter status for mdb changes When a mrouter is registered or leaves a mid, don't update the HW. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 459cedc23c47..0f9eac5f4ebf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1491,6 +1491,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device->multicast_enabled) return 0; + if (bridge_port->mrouter) + return 0; + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); @@ -1613,10 +1616,12 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, int err; if (bridge_port->bridge_device->multicast_enabled) { - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); + if (bridge_port->bridge_device->multicast_enabled) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, + false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + } } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); From 79af1f866193de29e65a4dba7d0dab14b0c0ff93 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Jun 2017 12:20:29 +0200 Subject: [PATCH 0129/2177] mac80211: avoid allocating TXQs that won't be used For AP_VLAN and monitor interfaces we'll never use the TXQs we allocated, so avoid doing so. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f75029abf728..2619daa29961 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1772,7 +1772,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sizeof(void *)); int txq_size = 0; - if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue && + type != NL80211_IFTYPE_AP_VLAN && + type != NL80211_IFTYPE_MONITOR) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; From 2512b1b18d0748d867bb22387db7c86b903291ad Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sat, 5 Aug 2017 11:44:31 +0300 Subject: [PATCH 0130/2177] mac80211: extend ieee80211_ie_split to support EXTENSION Current ieee80211_ie_split() implementation doesn't account for elements that are sub-elements of the EXTENSION IE. To extend support to these IEs as well, treat the WLAN_EID_EXTENSION ids in the %ids array as indicating that the next id in the array is a sub-element of the EXTENSION IE. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +++-- net/wireless/util.c | 54 +++++++++++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f12fa5245a45..aa9d993e519a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5934,7 +5934,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @after_ric: array IE types that come after the RIC element * @n_after_ric: size of the @after_ric array @@ -5965,7 +5966,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @offset: offset where to start splitting in the buffer * diff --git a/net/wireless/util.c b/net/wireless/util.c index bcb1284c3415..4aab793c2f00 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1367,13 +1367,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); -static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext) { int i; - for (i = 0; i < n_ids; i++) - if (ids[i] == id) + /* Make sure array values are legal */ + if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION)) + return false; + + i = 0; + while (i < n_ids) { + if (ids[i] == WLAN_EID_EXTENSION) { + if (id_ext && (ids[i + 1] == id)) + return true; + + i += 2; + continue; + } + + if (ids[i] == id && !id_ext) return true; + + i++; + } return false; } @@ -1403,14 +1419,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, { size_t pos = offset; - while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { + while (pos < ielen) { + u8 ext = 0; + + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext], + ies[pos] == WLAN_EID_EXTENSION)) + break; + if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { pos = skip_ie(ies, ielen, pos); - while (pos < ielen && - !ieee80211_id_in_list(after_ric, n_after_ric, - ies[pos])) - pos = skip_ie(ies, ielen, pos); + while (pos < ielen) { + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + else + ext = 0; + + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(after_ric, + n_after_ric, + ies[pos + ext], + ext == 2)) + pos = skip_ie(ies, ielen, pos); + } } else { pos = skip_ie(ies, ielen, pos); } From a7f26d8050c4f172d2dc523aabf45c5cbd9558ac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 5 Aug 2017 11:44:32 +0300 Subject: [PATCH 0131/2177] mac80211: simplify and clarify IE splitting There's no need to split off IEs from the ones obtained from userspace, if they were already split off, so for example IEs that went before HT don't have to be listed again to go before VHT. Simplify the code here so it's clearer. While at it, also clarify the comments regarding the DMG (60 GHz) elements. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 19 +++++++------------ net/mac80211/util.c | 21 +++++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b8e2709d8de..ee5ca1bc5a20 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -780,11 +780,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) WLAN_EID_SUPPORTED_REGULATORY_CLASSES, WLAN_EID_HT_CAPABILITY, WLAN_EID_BSS_COEX_2040, + /* luckily this is almost always there */ WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, - /* 60GHz doesn't happen right now */ + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ WLAN_EID_VHT_CAPABILITY, WLAN_EID_OPMODE_NOTIF, }; @@ -811,22 +812,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* if present, add any custom IEs that go before VHT */ if (assoc_data->ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_PWR_CAPABILITY, - WLAN_EID_SUPPORTED_CHANNELS, - WLAN_EID_RSN, - WLAN_EID_QOS_CAPA, - WLAN_EID_RRM_ENABLED_CAPABILITIES, - WLAN_EID_MOBILITY_DOMAIN, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off before HT + * or generated here + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; /* RIC already taken above, so no need to handle here anymore */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6aef6793d052..bfecc3e86318 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before HT */ if (ie && ie_len) { static const u8 before_ht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_DS_PARAMS, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, }; @@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before VHT */ if (ie && ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_DS_PARAMS, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_SSID_LIST, WLAN_EID_CHANNEL_USAGE, WLAN_EID_INTERWORKING, WLAN_EID_MESH_ID, - /* 60 GHz can't happen here right now */ + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; noffset = ieee80211_ie_split(ie, ie_len, before_vht, ARRAY_SIZE(before_vht), From ffa4629e0c2b8b015f5fa174149c6dd269b4142c Mon Sep 17 00:00:00 2001 From: Tova Mussai Date: Sat, 5 Aug 2017 11:44:38 +0300 Subject: [PATCH 0132/2177] nl80211: return error for invalid center_freq in 40 MHz When NL80211_ATTR_WIPHY_CHANNEL_TYPE is given, nl80211 would parse the channel definition the old way, discarding NL80211_ATTR_CENTER_FREQ1, NL80211_ATTR_CENTER_FREQ2 etc. However, it is possible that user space added both NL80211_ATTR_WIPHY_CHANNEL_TYPE and NL80211_ATTR_CENTER_FREQ1 or NL80211_ATTR_CENTER_FREQ2 assuming that all settings would be honored. In such a case, validate that NL80211_ATTR_CENTER_FREQ1 and NL80211_ATTR_CENTER_FREQ2 values match the channel configuration, as otherwise user space would assume that the desired configuration was applied. For example, when trying to start ap with NL80211_ATTR_WIPHY_CHANNEL_TYPE = NL80211_CHAN_HT40MINUS, NL80211_ATTR_WIPHY_FREQ = 5180 and NL80211_ATTR_CENTER_FREQ1 = 5250 without this fix, the ap will start on channel 36 (center_freq1 will be corrected to 5180). With this fix, we will throw an error instead. Signed-off-by: Tova Mussai Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0df8023f480b..66e97136ab44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2122,6 +2122,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, case NL80211_CHAN_HT40MINUS: cfg80211_chandef_create(chandef, chandef->chan, chantype); + /* user input for center_freq is incorrect */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ1] && + chandef->center_freq1 != nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ1])) + return -EINVAL; + /* center_freq2 must be zero */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ2] && + nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2])) + return -EINVAL; break; default: return -EINVAL; From 2d23d0736e3a4a0fdb92b8e46ea476639f16aae8 Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Sun, 6 Aug 2017 11:38:22 +0300 Subject: [PATCH 0133/2177] nl80211: add OCE scan and capability flags Add Optimized Connectivity Experience (OCE) scan and capability flags. Some of them unique to OCE and some are stand alone. And add scan flags to enable/disable them. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 37 +++++++++- net/wireless/nl80211.c | 137 ++++++++++++++++++++--------------- 2 files changed, 111 insertions(+), 63 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 51626b4175c0..76404d8a8863 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4914,6 +4914,15 @@ enum nl80211_feature_flags { * handshake with 802.1X in station mode (will pass EAP frames to the host * and accept the set_pmk/del_pmk commands), doing it in the host might not * be supported. + * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding + * the max channel attribute in the FILS request params IE with the + * actual dwell time. + * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe + * response + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending + * the first probe request in each channel at rate of at least 5.5Mbps. + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports + * probe request tx deferral and suppression * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4936,6 +4945,10 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -5012,12 +5025,28 @@ enum nl80211_timeout_reason { * locally administered 1, multicast 0) is assumed. * This flag must not be requested when the feature isn't supported, check * the nl80211 feature flags for the device. + * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS + * request parameters IE in the probe request + * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at + * rate of at least 5.5M. In case non OCE AP is dicovered in the channel, + * only the first probe req in the channel will be sent in high rate. + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request + * tx deferral (dot11FILSProbeDelay shall be set to 15ms) + * and suppression (if it has received a broadcast Probe Response frame, + * Beacon frame or FILS Discovery frame from an AP that the STA considers + * a suitable candidate for (re-)association - suitable in terms of + * SSID and/or RSSI */ enum nl80211_scan_flags { - NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, - NL80211_SCAN_FLAG_FLUSH = 1<<1, - NL80211_SCAN_FLAG_AP = 1<<2, - NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, + NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4, + NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 66e97136ab44..2e6f5f4065f9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6619,6 +6619,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) return regulatory_pre_cac_allowed(wdev->wiphy); } +static int +nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, + void *request, struct nlattr **attrs, + bool is_sched_scan) +{ + u8 *mac_addr, *mac_addr_mask; + u32 *flags; + enum nl80211_feature_flags randomness_flag; + + if (!attrs[NL80211_ATTR_SCAN_FLAGS]) + return 0; + + if (is_sched_scan) { + struct cfg80211_sched_scan_request *req = request; + + randomness_flag = wdev ? + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } else { + struct cfg80211_scan_request *req = request; + + randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } + + *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); + + if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) + return -EOPNOTSUPP; + + if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + int err; + + if (!(wiphy->features & randomness_flag) || + (wdev && wdev->current_bss)) + return -EOPNOTSUPP; + + err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); + if (err) + return err; + } + + if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE)) + return -EOPNOTSUPP; + + return 0; +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6824,34 +6895,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - if (!(wiphy->features & - NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(info->attrs, - request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, + false); + if (err) + goto out_free; request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -7299,37 +7346,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ie_len); } - if (attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR; - - if (!wdev) /* must be net-detect */ - flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR; - - if (!(wiphy->features & flg)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev && wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(attrs, request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); + if (err) + goto out_free; if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) request->delay = From 40b0bd24973487272167a09db040a70c053bedbe Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Sun, 6 Aug 2017 11:38:23 +0300 Subject: [PATCH 0134/2177] mac80211: oce: enable receiving of bcast probe resp One of OCE's optimizations is acception of broadcast probe responses. Accept broadcast probe responses but don't set NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP. Because a device's firmware may filter out the broadcast probe resp - drivers should set this flag. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho [johannes: make accepting broadcast conditional on the nl80211 scan flag that was added for that specific purpose] Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 47d2ed570470..ef2becaade50 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright 2016 Intel Deutschland GmbH + * Copyright 2016-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local, return bss; } +static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + u32 scan_flags, const u8 *da) +{ + if (!sdata) + return false; + /* accept broadcast for OCE */ + if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && + is_broadcast_ether_addr(da)) + return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) + return true; + return ether_addr_equal(da, sdata->vif.addr); +} + void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); @@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; + u32 scan_req_flags = 0, sched_scan_req_flags = 0; scan_req = rcu_dereference(local->scan_req); sched_scan_req = rcu_dereference(local->sched_scan_req); - /* ignore ProbeResp to foreign address unless scanning - * with randomised address + if (scan_req) + scan_req_flags = scan_req->flags; + + if (sched_scan_req) + sched_scan_req_flags = sched_scan_req->flags; + + /* ignore ProbeResp to foreign address or non-bcast (OCE) + * unless scanning with randomised address */ - if (!(sdata1 && - (ether_addr_equal(mgmt->da, sdata1->vif.addr) || - scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) && - !(sdata2 && - (ether_addr_equal(mgmt->da, sdata2->vif.addr) || - sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR))) + if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + mgmt->da) && + !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + mgmt->da)) return; elements = mgmt->u.probe_resp.variable; From 1272c5d89b597995cb10db87dd4a1adc91d36006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 18 Aug 2017 15:33:56 +0300 Subject: [PATCH 0135/2177] mac80211: add documentation to ieee80211_rx_ba_offl() Add documentation to ieee80211_rx_ba_offl() function and, while at it, rename the bit argument to tid, for consistency. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 +++++++- net/mac80211/agg-rx.c | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885690fa39c8..cc9073e45be9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5441,8 +5441,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +/** + * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, - unsigned int bit); + unsigned int tid); /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 2849a1fc41c5..88cc1ae935ea 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -459,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, - const u8 *addr, unsigned int bit) + const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; @@ -470,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, if (!sta) goto unlock; - set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); + set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); From 1281103770e909e064edbb22a1115a0c14eca081 Mon Sep 17 00:00:00 2001 From: Ilan peer Date: Wed, 6 Sep 2017 17:18:33 +0300 Subject: [PATCH 0136/2177] mac80211: Simplify locking in ieee80211_sta_tear_down_BA_sessions() Simplify the locking in ieee80211_sta_tear_down_BA_sessions() and lock sta->ampdu_mlme.mtx over the entire function instead of locking/unlocking it for each TID etc. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d6d0b4201e40..41f5e48f8021 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; + mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, reason); - __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, - reason != AGG_STOP_DESTROY_STA && - reason != AGG_STOP_PEER_REQUEST); + ___ieee80211_stop_tx_ba_session(sta, i, reason); + ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } + mutex_unlock(&sta->ampdu_mlme.mtx); /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); From 4c121fd690d9c465e4cb09b7859adfdd6a0aee1d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Sep 2017 11:54:46 +0200 Subject: [PATCH 0137/2177] mac80211: use offsetofend() This was created using the following spatch: @find@ type S; expression M, M2; position p; @@ offsetof(S, M) + sizeof(M2)@p @script:python@ m << find.M; m2 << find.M2; @@ if not m2.endswith('-> ' + m): cocci.include_match(False) @change@ type find.S; expression find.M, find.M2; position find.p; @@ -offsetof(S, M) + sizeof(M2)@p +offsetofend(S, M) Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 +-- net/mac80211/mesh_hwmp.c | 8 ++++---- net/mac80211/mesh_plink.c | 3 +-- net/mac80211/util.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index a550c707cd8a..7a76c4a6df30 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) enum nl80211_band band; u8 *pos; struct ieee80211_sub_if_data *sdata; - int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + - sizeof(mgmt->u.beacon); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); rcu_read_lock(); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d8bbd0d2225a..146ec6c0f12f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f69c6c38ca43..dc8e10f87207 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + - sizeof(mgmt->u.action.u.self_prot); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; skb = dev_alloc_skb(local->tx_headroom + diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bfecc3e86318..d57e5f6bd8b6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2977,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct ieee80211_local *local = sdata->local; int freq; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + - sizeof(mgmt->u.action.u.chan_switch); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.chan_switch); u8 *pos; if (sdata->vif.type != NL80211_IFTYPE_ADHOC && From 62b093b375e3c10ff39896f2a172146cf5df529f Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 18 Sep 2017 15:56:51 +0200 Subject: [PATCH 0138/2177] mac80211_hwsim: use dyndbg for debug messages The mac80211_hwsim doesn't offer a way to disable the debugging output. Unfortunately, it's pretty chatty, dumping a lot of stuff into the message buffer. This patch changes it to use dyndbg for controlling the debug output. It's disabled by default, but can be enabled by a module parameter (1), at runtime (2) or persisted in modprobe.conf (3). (1) modprobe mac80211_hwsim dyndbg=+p (2) echo "module mac80211_hwsim +p" >/sys/kernel/debug/dynamic_debug/control (3) echo "options mac80211_hwsim dyndbg=+p" >>/etc/modprobe.d/my.conf Signed-off-by: Lubomir Rintel Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 192 +++++++++++++------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6467ffac9811..ec2f4c31425a 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -396,7 +396,7 @@ static int mac80211_hwsim_vendor_cmd_test(struct wiphy *wiphy, if (!tb[QCA_WLAN_VENDOR_ATTR_TEST]) return -EINVAL; val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]); - wiphy_debug(wiphy, "%s: test=%u\n", __func__, val); + wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val); /* Send a vendor event as a test. Note that this would not normally be * done within a command handler, but rather, based on some other @@ -643,9 +643,9 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) if (!vp->assoc) return; - wiphy_debug(data->hw->wiphy, - "%s: send PS-Poll to %pM for aid %d\n", - __func__, vp->bssid, vp->aid); + wiphy_dbg(data->hw->wiphy, + "%s: send PS-Poll to %pM for aid %d\n", + __func__, vp->bssid, vp->aid); skb = dev_alloc_skb(sizeof(*pspoll)); if (!skb) @@ -674,9 +674,9 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, if (!vp->assoc) return; - wiphy_debug(data->hw->wiphy, - "%s: send data::nullfunc to %pM ps=%d\n", - __func__, vp->bssid, ps); + wiphy_dbg(data->hw->wiphy, + "%s: send data::nullfunc to %pM ps=%d\n", + __func__, vp->bssid, ps); skb = dev_alloc_skb(sizeof(*hdr)); if (!skb) @@ -1034,7 +1034,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0, HWSIM_CMD_FRAME); if (msg_head == NULL) { - printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n"); + pr_debug("mac80211_hwsim: problem with msg_head\n"); goto nla_put_failure; } @@ -1093,7 +1093,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, nla_put_failure: nlmsg_free(skb); err_free_txskb: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); ieee80211_free_txskb(hw, my_skb); data->tx_failed++; } @@ -1347,7 +1347,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } if (data->idle && !data->tmp_chan) { - wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n"); + wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n"); ieee80211_free_txskb(hw, skb); return; } @@ -1408,7 +1408,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, static int mac80211_hwsim_start(struct ieee80211_hw *hw) { struct mac80211_hwsim_data *data = hw->priv; - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); data->started = true; return 0; } @@ -1419,16 +1419,16 @@ static void mac80211_hwsim_stop(struct ieee80211_hw *hw) struct mac80211_hwsim_data *data = hw->priv; data->started = false; tasklet_hrtimer_cancel(&data->beacon_timer); - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); } static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { - wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), - vif->addr); + wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_set_magic(vif); vif->cab_queue = 0; @@ -1447,9 +1447,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw, bool newp2p) { newtype = ieee80211_iftype_p2p(newtype, newp2p); - wiphy_debug(hw->wiphy, - "%s (old type=%d, new type=%d, mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), + wiphy_dbg(hw->wiphy, + "%s (old type=%d, new type=%d, mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), newtype, vif->addr); hwsim_check_magic(vif); @@ -1465,9 +1465,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw, static void mac80211_hwsim_remove_interface( struct ieee80211_hw *hw, struct ieee80211_vif *vif) { - wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), - vif->addr); + wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_check_magic(vif); hwsim_clear_magic(vif); } @@ -1589,23 +1589,23 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) int idx; if (conf->chandef.chan) - wiphy_debug(hw->wiphy, - "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", - __func__, - conf->chandef.chan->center_freq, - conf->chandef.center_freq1, - conf->chandef.center_freq2, - hwsim_chanwidths[conf->chandef.width], - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + wiphy_dbg(hw->wiphy, + "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", + __func__, + conf->chandef.chan->center_freq, + conf->chandef.center_freq1, + conf->chandef.center_freq2, + hwsim_chanwidths[conf->chandef.width], + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); else - wiphy_debug(hw->wiphy, - "%s (freq=0 idle=%d ps=%d smps=%s)\n", - __func__, - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + wiphy_dbg(hw->wiphy, + "%s (freq=0 idle=%d ps=%d smps=%s)\n", + __func__, + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); @@ -1659,7 +1659,7 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, { struct mac80211_hwsim_data *data = hw->priv; - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); data->rx_filter = 0; if (*total_flags & FIF_ALLMULTI) @@ -1688,25 +1688,25 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, hwsim_check_magic(vif); - wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n", - __func__, changed, vif->addr); + wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n", + __func__, changed, vif->addr); if (changed & BSS_CHANGED_BSSID) { - wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n", - __func__, info->bssid); + wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n", + __func__, info->bssid); memcpy(vp->bssid, info->bssid, ETH_ALEN); } if (changed & BSS_CHANGED_ASSOC) { - wiphy_debug(hw->wiphy, " ASSOC: assoc=%d aid=%d\n", - info->assoc, info->aid); + wiphy_dbg(hw->wiphy, " ASSOC: assoc=%d aid=%d\n", + info->assoc, info->aid); vp->assoc = info->assoc; vp->aid = info->aid; } if (changed & BSS_CHANGED_BEACON_ENABLED) { - wiphy_debug(hw->wiphy, " BCN EN: %d (BI=%u)\n", - info->enable_beacon, info->beacon_int); + wiphy_dbg(hw->wiphy, " BCN EN: %d (BI=%u)\n", + info->enable_beacon, info->beacon_int); vp->bcn_en = info->enable_beacon; if (data->started && !hrtimer_is_queued(&data->beacon_timer.timer) && @@ -1725,8 +1725,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, ieee80211_iterate_active_interfaces_atomic( data->hw, IEEE80211_IFACE_ITER_NORMAL, mac80211_hwsim_bcn_en_iter, &count); - wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u", - count); + wiphy_dbg(hw->wiphy, " beaconing vifs remaining: %u", + count); if (count == 0) { tasklet_hrtimer_cancel(&data->beacon_timer); data->beacon_int = 0; @@ -1735,31 +1735,31 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_ERP_CTS_PROT) { - wiphy_debug(hw->wiphy, " ERP_CTS_PROT: %d\n", - info->use_cts_prot); + wiphy_dbg(hw->wiphy, " ERP_CTS_PROT: %d\n", + info->use_cts_prot); } if (changed & BSS_CHANGED_ERP_PREAMBLE) { - wiphy_debug(hw->wiphy, " ERP_PREAMBLE: %d\n", - info->use_short_preamble); + wiphy_dbg(hw->wiphy, " ERP_PREAMBLE: %d\n", + info->use_short_preamble); } if (changed & BSS_CHANGED_ERP_SLOT) { - wiphy_debug(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot); + wiphy_dbg(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot); } if (changed & BSS_CHANGED_HT) { - wiphy_debug(hw->wiphy, " HT: op_mode=0x%x\n", - info->ht_operation_mode); + wiphy_dbg(hw->wiphy, " HT: op_mode=0x%x\n", + info->ht_operation_mode); } if (changed & BSS_CHANGED_BASIC_RATES) { - wiphy_debug(hw->wiphy, " BASIC_RATES: 0x%llx\n", - (unsigned long long) info->basic_rates); + wiphy_dbg(hw->wiphy, " BASIC_RATES: 0x%llx\n", + (unsigned long long) info->basic_rates); } if (changed & BSS_CHANGED_TXPOWER) - wiphy_debug(hw->wiphy, " TX Power: %d dBm\n", info->txpower); + wiphy_dbg(hw->wiphy, " TX Power: %d dBm\n", info->txpower); } static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw, @@ -1813,11 +1813,11 @@ static int mac80211_hwsim_conf_tx( struct ieee80211_vif *vif, u16 queue, const struct ieee80211_tx_queue_params *params) { - wiphy_debug(hw->wiphy, - "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n", - __func__, queue, - params->txop, params->cw_min, - params->cw_max, params->aifs); + wiphy_dbg(hw->wiphy, + "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n", + __func__, queue, + params->txop, params->cw_min, + params->cw_max, params->aifs); return 0; } @@ -1981,7 +1981,7 @@ static void hw_scan_work(struct work_struct *work) .aborted = false, }; - wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n"); + wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n"); ieee80211_scan_completed(hwsim->hw, &info); hwsim->hw_scan_request = NULL; hwsim->hw_scan_vif = NULL; @@ -1990,8 +1990,8 @@ static void hw_scan_work(struct work_struct *work) return; } - wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n", - req->channels[hwsim->scan_chan_idx]->center_freq); + wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n", + req->channels[hwsim->scan_chan_idx]->center_freq); hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx]; if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR | @@ -2060,7 +2060,7 @@ static int mac80211_hwsim_hw_scan(struct ieee80211_hw *hw, memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data)); mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim hw_scan request\n"); + wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n"); ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0); @@ -2075,7 +2075,7 @@ static void mac80211_hwsim_cancel_hw_scan(struct ieee80211_hw *hw, .aborted = true, }; - wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n"); + wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n"); cancel_delayed_work_sync(&hwsim->hw_scan); @@ -2096,11 +2096,11 @@ static void mac80211_hwsim_sw_scan(struct ieee80211_hw *hw, mutex_lock(&hwsim->mutex); if (hwsim->scanning) { - printk(KERN_DEBUG "two hwsim sw_scans detected!\n"); + pr_debug("two hwsim sw_scans detected!\n"); goto out; } - printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n"); + pr_debug("hwsim sw_scan request, prepping stuff\n"); memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN); hwsim->scanning = true; @@ -2117,7 +2117,7 @@ static void mac80211_hwsim_sw_scan_complete(struct ieee80211_hw *hw, mutex_lock(&hwsim->mutex); - printk(KERN_DEBUG "hwsim sw_scan_complete\n"); + pr_debug("hwsim sw_scan_complete\n"); hwsim->scanning = false; eth_zero_addr(hwsim->scan_addr); @@ -2131,7 +2131,7 @@ static void hw_roc_start(struct work_struct *work) mutex_lock(&hwsim->mutex); - wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n"); + wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n"); hwsim->tmp_chan = hwsim->roc_chan; ieee80211_ready_on_channel(hwsim->hw); @@ -2151,7 +2151,7 @@ static void hw_roc_done(struct work_struct *work) hwsim->tmp_chan = NULL; mutex_unlock(&hwsim->mutex); - wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n"); + wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n"); } static int mac80211_hwsim_roc(struct ieee80211_hw *hw, @@ -2172,8 +2172,8 @@ static int mac80211_hwsim_roc(struct ieee80211_hw *hw, hwsim->roc_duration = duration; mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n", - chan->center_freq, duration); + wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n", + chan->center_freq, duration); ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50); return 0; @@ -2190,7 +2190,7 @@ static int mac80211_hwsim_croc(struct ieee80211_hw *hw) hwsim->tmp_chan = NULL; mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim ROC canceled\n"); + wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n"); return 0; } @@ -2199,20 +2199,20 @@ static int mac80211_hwsim_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { hwsim_set_chanctx_magic(ctx); - wiphy_debug(hw->wiphy, - "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); return 0; } static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { - wiphy_debug(hw->wiphy, - "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); hwsim_check_chanctx_magic(ctx); hwsim_clear_chanctx_magic(ctx); } @@ -2222,10 +2222,10 @@ static void mac80211_hwsim_change_chanctx(struct ieee80211_hw *hw, u32 changed) { hwsim_check_chanctx_magic(ctx); - wiphy_debug(hw->wiphy, - "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); } static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw, @@ -2479,7 +2479,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, ops = &mac80211_hwsim_mchan_ops; hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname); if (!hw) { - printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n"); + pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n"); err = -ENOMEM; goto failed; } @@ -2507,7 +2507,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data->dev->driver = &mac80211_hwsim_driver.driver; err = device_bind_driver(data->dev); if (err != 0) { - printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n", + pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n", err); goto failed_bind; } @@ -2698,12 +2698,12 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, err = ieee80211_register_hw(hw); if (err < 0) { - printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n", + pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n", err); goto failed_hw; } - wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr); + wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr); if (param->reg_alpha2) { data->alpha2[0] = param->reg_alpha2[0]; @@ -3067,7 +3067,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2, return 0; err: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); out: dev_kfree_skb(skb); return -EINVAL; @@ -3098,7 +3098,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, hwsim_register_wmediumd(net, info->snd_portid); - printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, " + pr_debug("mac80211_hwsim: received a REGISTER, " "switching to wmediumd mode with pid %d\n", info->snd_portid); return 0; @@ -3387,7 +3387,7 @@ static int __init hwsim_init_netlink(void) return 0; failure: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); return -EINVAL; } @@ -3578,7 +3578,7 @@ module_init(init_mac80211_hwsim); static void __exit exit_mac80211_hwsim(void) { - printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n"); + pr_debug("mac80211_hwsim: unregister radios\n"); hwsim_exit_netlink(); From a6bcda44843c6dfced0fb973e2607c2a98addfa9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2017 11:52:43 +0200 Subject: [PATCH 0139/2177] cfg80211: remove unused function ieee80211_data_from_8023() This function hasn't been used since the removal of iwmc3200wifi in 2012. It also appears to have a bug when qos=True, since then it'll copy uninitialized stack memory to the SKB. Just remove the function entirely. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- Documentation/driver-api/80211/cfg80211.rst | 3 - include/net/cfg80211.h | 13 --- net/wireless/util.c | 115 -------------------- 3 files changed, 131 deletions(-) diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst index 8ffac57e1f5b..eeab91b59457 100644 --- a/Documentation/driver-api/80211/cfg80211.rst +++ b/Documentation/driver-api/80211/cfg80211.rst @@ -299,9 +299,6 @@ Data path helpers .. kernel-doc:: include/net/cfg80211.h :functions: ieee80211_data_to_8023 -.. kernel-doc:: include/net/cfg80211.h - :functions: ieee80211_data_from_8023 - .. kernel-doc:: include/net/cfg80211.h :functions: ieee80211_amsdu_to_8023s diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aa9d993e519a..cc1996081463 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4346,19 +4346,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); } -/** - * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 - * @skb: the 802.3 frame - * @addr: the device MAC address - * @iftype: the virtual interface type - * @bssid: the network bssid (used only for iftype STATION and ADHOC) - * @qos: build 802.11 QoS data frame - * Return: 0 on success, or a negative error code. - */ -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, const u8 *bssid, - bool qos); - /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * diff --git a/net/wireless/util.c b/net/wireless/util.c index 4aab793c2f00..7dcdf67cba29 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -529,121 +529,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, } EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, - const u8 *bssid, bool qos) -{ - struct ieee80211_hdr hdr; - u16 hdrlen, ethertype; - __le16 fc; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos; - int head_need; - - if (unlikely(skb->len < ETH_HLEN)) - return -EINVAL; - - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); - - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_GO: - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - fc |= cpu_to_le16(IEEE80211_FCTL_TODS); - /* BSSID SA DA */ - memcpy(hdr.addr1, bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_OCB: - case NL80211_IFTYPE_ADHOC: - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, bssid, ETH_ALEN); - hdrlen = 24; - break; - default: - return -EOPNOTSUPP; - } - - if (qos) { - fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); - hdrlen += 2; - } - - hdr.frame_control = fc; - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= ETH_P_802_3_MIN) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; - } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - head_need = hdrlen + encaps_len - skb_headroom(skb); - - if (head_need > 0 || skb_cloned(skb)) { - head_need = max(head_need, 0); - if (head_need) - skb_orphan(skb); - - if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) - return -ENOMEM; - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - return 0; -} -EXPORT_SYMBOL(ieee80211_data_from_8023); - static void __frame_add_frag(struct sk_buff *skb, struct page *page, void *ptr, int len, int size) From 65026002d69de006e273749bb799d3b01b757eb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 18 Aug 2017 15:31:41 +0300 Subject: [PATCH 0140/2177] nl80211: add an option to allow MFP without requiring it The user space can now allow the kernel to associate to an AP that requires MFP or that doesn't have MFP enabled in the same NL80211_CMD_CONNECT command, by using a new NL80211_MFP_OPTIONAL flag. The driver / firmware will decide whether to use it or not. Include a feature bit to advertise support for NL80211_MFP_OPTIONAL. This allows new user space to run on old kernels and know that it cannot use the new attribute if it isn't supported. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++-- net/wireless/nl80211.c | 8 +++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 76404d8a8863..59ba6ca66a0d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1407,8 +1407,12 @@ enum nl80211_commands { * * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is * used for the association (&enum nl80211_mfp, represented as a u32); - * this attribute can be used - * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests + * this attribute can be used with %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for + * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it + * must have decided whether to use management frame protection or not. + * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will + * let the driver (or the firmware) decide whether to use MFP or not. * * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. @@ -3947,10 +3951,12 @@ enum nl80211_key_type { * enum nl80211_mfp - Management frame protection state * @NL80211_MFP_NO: Management frame protection not used * @NL80211_MFP_REQUIRED: Management frame protection required + * @NL80211_MFP_OPTIONAL: Management frame protection is optional */ enum nl80211_mfp { NL80211_MFP_NO, NL80211_MFP_REQUIRED, + NL80211_MFP_OPTIONAL, }; enum nl80211_wpa_versions { @@ -4923,6 +4929,8 @@ enum nl80211_feature_flags { * the first probe request in each channel at rate of at least 5.5Mbps. * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports * probe request tx deferral and suppression + * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL + * value in %NL80211_ATTR_USE_MFP. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4949,6 +4957,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, + NL80211_EXT_FEATURE_MFP_OPTIONAL, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2e6f5f4065f9..1e39ba3cfd06 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8952,8 +8952,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_USE_MFP]) { connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp == NL80211_MFP_OPTIONAL && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_MFP_OPTIONAL)) + return -EOPNOTSUPP; + if (connect.mfp != NL80211_MFP_REQUIRED && - connect.mfp != NL80211_MFP_NO) + connect.mfp != NL80211_MFP_NO && + connect.mfp != NL80211_MFP_OPTIONAL) return -EINVAL; } else { connect.mfp = NL80211_MFP_NO; From d405fd8cc807c045b23bc2df4a5ab6b85df614f3 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Sat, 5 Aug 2017 11:44:36 +0300 Subject: [PATCH 0141/2177] mac80211: recalculate some sta parameters after insertion Sometimes a station is added already in ASSOC state. For example, in AP mode, when a client station didn't get assoc resp and sends an assoc req again. If a station is inserted when its state is ASSOC or higher, the min chandef and allow_p2p_go_ps should be recalculated again after the insertion. Before this patch the recalculation happened only in sta_info_move_state which occurs before the insertion of the sta and thus even though it calls ieee80211_recalc_min_chandef/_p2p_go_ps_allowed functions, since sdata->local->sta_list is still empty at this point, it doesn't do anything. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 57 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 69615016d5bf..ffcd25c4908c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -515,6 +515,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local, return err; } +static void +ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool allow_p2p_go_ps = sdata->vif.p2p; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + !test_sta_flag(sta, WLAN_STA_ASSOC)) + continue; + if (!sta->sta.support_p2p_ps) { + allow_p2p_go_ps = false; + break; + } + } + rcu_read_unlock(); + + if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { + sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); + } +} + /* * should be called with sta_mtx locked * this function replaces the mutex lock @@ -561,6 +586,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) goto out_remove; set_sta_flag(sta, WLAN_STA_INSERTED); + + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); @@ -1788,31 +1820,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); -static void -ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - bool allow_p2p_go_ps = sdata->vif.p2p; - struct sta_info *sta; - - rcu_read_lock(); - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sdata != sta->sdata || - !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - if (!sta->sta.support_p2p_ps) { - allow_p2p_go_ps = false; - break; - } - } - rcu_read_unlock(); - - if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { - sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); - } -} - int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { From 1bd773c077deeeb2d9ced1fdb6d846169b8e7e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sch=C3=BCtz?= Date: Thu, 7 Sep 2017 17:47:43 +0200 Subject: [PATCH 0142/2177] wireless: set correct mandatory rate flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to IEEE Std 802.11-2016 (16.2.3.4 Long PHY SIGNAL field) all of the following rates are mandatory for a HR/DSSS PHY: 1 Mb/s, 2 Mb/s, 5.5 Mb/s and 11 Mb/s. Set IEEE80211_RATE_MANDATORY_B flag for all of these instead of just 1 Mb/s to correctly reflect this. Signed-off-by: Richard Schütz [johannes: use switch statement] Signed-off-by: Johannes Berg --- net/wireless/util.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 7dcdf67cba29..7a1fcc6ee060 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -157,32 +157,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) case NL80211_BAND_2GHZ: want = 7; for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) { + switch (sband->bitrates[i].bitrate) { + case 10: + case 20: + case 55: + case 110: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_B | IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate == 20 || - sband->bitrates[i].bitrate == 55 || - sband->bitrates[i].bitrate == 110 || - sband->bitrates[i].bitrate == 60 || - sband->bitrates[i].bitrate == 120 || - sband->bitrates[i].bitrate == 240) { + break; + case 60: + case 120: + case 240: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate != 10 && - sband->bitrates[i].bitrate != 20 && - sband->bitrates[i].bitrate != 55 && - sband->bitrates[i].bitrate != 110) + /* fall through */ + default: sband->bitrates[i].flags |= IEEE80211_RATE_ERP_G; + break; + } } - WARN_ON(want != 0 && want != 3 && want != 6); + WARN_ON(want != 0 && want != 3); break; case NL80211_BAND_60GHZ: /* check for mandatory HT MCS 1..4 */ From 4f88836d4f806d212361eb426bc8a6ce897dbef9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:22 +0530 Subject: [PATCH 0143/2177] drivers: net: de4x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 0affee9c8aa2..299812e92db7 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1147,9 +1147,8 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) lp->timeout = -1; lp->gendev = gendev; spin_lock_init(&lp->lock); - init_timer(&lp->timer); - lp->timer.function = (void (*)(unsigned long))de4x5_ast; - lp->timer.data = (unsigned long)dev; + setup_timer(&lp->timer, (void (*)(unsigned long))de4x5_ast, + (unsigned long)dev); de4x5_parse_params(dev); /* From cdc91b31b81abaa2cf491fd5e9007f4bcd45bc68 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:23 +0530 Subject: [PATCH 0144/2177] drivers: net: b44: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d10c825..42e44fc03a18 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1474,10 +1474,8 @@ static int b44_open(struct net_device *dev) goto out; } - init_timer(&bp->timer); + setup_timer(&bp->timer, b44_timer, (unsigned long)bp); bp->timer.expires = jiffies + HZ; - bp->timer.data = (unsigned long) bp; - bp->timer.function = b44_timer; add_timer(&bp->timer); b44_enable_ints(bp); From 334e4a7d5505f59a741b0549f41082e29a914439 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:24 +0530 Subject: [PATCH 0145/2177] drivers: net: pcnet32: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pcnet32.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 7f60d17819ce..e46153654016 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1970,9 +1970,8 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options |= PCNET32_PORT_MII; } - init_timer(&lp->watchdog_timer); - lp->watchdog_timer.data = (unsigned long)dev; - lp->watchdog_timer.function = (void *)&pcnet32_watchdog; + setup_timer(&lp->watchdog_timer, (void *)&pcnet32_watchdog, + (unsigned long)dev); /* The PCNET32-specific entries in the device structure. */ dev->netdev_ops = &pcnet32_netdev_ops; From 27dd0852644966c43079d3f812ec37e3e3fc864e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:25 +0530 Subject: [PATCH 0146/2177] drivers: net: brcm80211: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index aaed4ab503ad..ab3f22353154 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3260,9 +3260,8 @@ static void brcmf_init_escan(struct brcmf_cfg80211_info *cfg) brcmf_cfg80211_escan_handler); cfg->escan_info.escan_state = WL_ESCAN_STATE_IDLE; /* Init scan_timeout timer */ - init_timer(&cfg->escan_timeout); - cfg->escan_timeout.data = (unsigned long) cfg; - cfg->escan_timeout.function = brcmf_escan_timeout; + setup_timer(&cfg->escan_timeout, brcmf_escan_timeout, + (unsigned long)cfg); INIT_WORK(&cfg->escan_timeout_work, brcmf_cfg80211_escan_timeout_worker); } From ba4cc08793a58a97cffc2769acaa53fff4433332 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:26 +0530 Subject: [PATCH 0147/2177] drivers : net: niu: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 6a4e8e1bbd90..bde19b307d0d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6123,10 +6123,8 @@ static int niu_open(struct net_device *dev) err = niu_init_hw(np); if (!err) { - init_timer(&np->timer); + setup_timer(&np->timer, niu_timer, (unsigned long)np); np->timer.expires = jiffies + HZ; - np->timer.data = (unsigned long) np; - np->timer.function = niu_timer; err = niu_enable_interrupts(np, 1); if (err) From c3bd81cccbaa89da70047f1dcc73443f889735a8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:27 +0530 Subject: [PATCH 0148/2177] drivers: net: bcm63xx: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 4f3845a58126..f8bbbbfca06e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1857,9 +1857,8 @@ static int bcm_enet_probe(struct platform_device *pdev) spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - init_timer(&priv->rx_timeout); - priv->rx_timeout.function = bcm_enet_refill_rx_timer; - priv->rx_timeout.data = (unsigned long)dev; + setup_timer(&priv->rx_timeout, bcm_enet_refill_rx_timer, + (unsigned long)dev); /* init the mib update lock&work */ mutex_init(&priv->mib_update_lock); From b0b404bd9ba28edb795b999e3c35596e236cc5dd Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:28 +0530 Subject: [PATCH 0149/2177] drivers: net: declance: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/declance.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 82cc81385033..9bdf81c2cd00 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1246,9 +1246,9 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + setup_timer(&lp->multicast_timer, lance_set_multicast_retry, + (unsigned long)dev); + ret = register_netdev(dev); if (ret) { From aa0c72859972f209d7d07654037bd974c11f93ed Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:29 +0530 Subject: [PATCH 0150/2177] drivers: net: am79c961: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/am79c961a.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index b11e910850f7..0612dbee00d2 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -728,9 +728,7 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(&priv->chip_lock); - init_timer(&priv->timer); - priv->timer.data = (unsigned long)dev; - priv->timer.function = am79c961_timer; + setup_timer(&priv->timer, am79c961_timer, (unsigned long)dev); if (am79c961_hw_init(dev)) goto release; From 07b6901f61813aa547c5a25e118f977022fec9eb Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:30 +0530 Subject: [PATCH 0151/2177] drivers: net: et131x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/agere/et131x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 54eff90e2f02..658e92f79d36 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3624,11 +3624,10 @@ static int et131x_open(struct net_device *netdev) int result; /* Start the timer to track NIC errors */ - init_timer(&adapter->error_timer); + setup_timer(&adapter->error_timer, et131x_error_timer_handler, + (unsigned long)adapter); adapter->error_timer.expires = jiffies + msecs_to_jiffies(TX_ERROR_PERIOD); - adapter->error_timer.function = et131x_error_timer_handler; - adapter->error_timer.data = (unsigned long)adapter; add_timer(&adapter->error_timer); result = request_irq(irq, et131x_isr, From 1e153e554fc8a3744d1fa8a36d4cb3b4cfbdc3da Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:31 +0530 Subject: [PATCH 0152/2177] drivers: net: appletalk: cops: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/appletalk/cops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index 486e1e6997fc..caf04284711a 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -424,9 +424,7 @@ static int cops_open(struct net_device *dev) */ if(lp->board==TANGENT) /* Poll 20 times per second */ { - init_timer(&cops_timer); - cops_timer.function = cops_poll; - cops_timer.data = (unsigned long)dev; + setup_timer(&cops_timer, cops_poll, (unsigned long)dev); cops_timer.expires = jiffies + HZ/20; add_timer(&cops_timer); } From 531f3ce953d4c74989eb1ece7ce37a3717532a13 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:32 +0530 Subject: [PATCH 0153/2177] drivers: net: rsi_91x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/rsi/rsi_91x_hal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 070dfd68bb83..7ad286d6e9a7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -411,9 +411,8 @@ static void bl_cmd_timeout(unsigned long priv) static int bl_start_cmd_timer(struct rsi_hw *adapter, u32 timeout) { - init_timer(&adapter->bl_cmd_timer); - adapter->bl_cmd_timer.data = (unsigned long)adapter; - adapter->bl_cmd_timer.function = (void *)&bl_cmd_timeout; + setup_timer(&adapter->bl_cmd_timer, (void *)&bl_cmd_timeout, + (unsigned long)adapter); adapter->bl_cmd_timer.expires = (msecs_to_jiffies(timeout) + jiffies); adapter->blcmd_timer_expired = false; From e7bbad4487ae4005904d00a0a04622f07fadbc5b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:33 +0530 Subject: [PATCH 0154/2177] drivers: net: atp: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/atp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index bed34684994f..bdc3833fab7e 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -438,10 +438,8 @@ static int net_open(struct net_device *dev) hardware_init(dev); - init_timer(&lp->timer); + setup_timer(&lp->timer, atp_timed_checker, (unsigned long)dev); lp->timer.expires = jiffies + TIMED_CHECKER; - lp->timer.data = (unsigned long)dev; - lp->timer.function = atp_timed_checker; /* timer handler */ add_timer(&lp->timer); netif_start_queue(dev); From f40c9d5aea0fdf5454464bc477898cc981ff9715 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:34 +0530 Subject: [PATCH 0155/2177] drivers: net: ns83820: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/ns83820.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 729095db3e08..99d3c7884a4a 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1652,9 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - init_timer(&dev->tx_watchdog); - dev->tx_watchdog.data = (unsigned long)ndev; - dev->tx_watchdog.function = ns83820_tx_watch; + setup_timer(&dev->tx_watchdog, ns83820_tx_watch, (unsigned long)ndev); mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev); /* FIXME: wait for phy to come up */ From 82a8c6745169ec932473658c28679069a7ded95a Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:35 +0530 Subject: [PATCH 0156/2177] drivers: net: ixgb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5a713199653c..1e6ec2277d54 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -508,9 +508,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw); - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = ixgb_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, ixgb_watchdog, + (unsigned long)adapter); INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task); From 88e8aa172596d2eda971d3553d52a8e877805e90 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:36 +0530 Subject: [PATCH 0157/2177] drivers: net: sundance: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 2704bcf023be..6ca9e981ad57 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -913,10 +913,8 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = jiffies + 3*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); /* Enable interrupts by setting the interrupt mask. */ From 4896ad68ec3803d01a0d9fead64451377ea1ec5f Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:37 +0530 Subject: [PATCH 0158/2177] drivers: net: tg3: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 656e6af70f0a..d8d5f207c759 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11087,9 +11087,7 @@ static void tg3_timer_init(struct tg3 *tp) tp->asf_multiplier = (HZ / tp->timer_offset) * TG3_FW_UPDATE_FREQ_SEC; - init_timer(&tp->timer); - tp->timer.data = (unsigned long) tp; - tp->timer.function = tg3_timer; + setup_timer(&tp->timer, tg3_timer, (unsigned long)tp); } static void tg3_timer_start(struct tg3 *tp) From f347bd6b5f6599fe67d040758947dbf9bdd89195 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:38 +0530 Subject: [PATCH 0159/2177] drivers: net: sdla: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/sdla.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 236c62538036..0cc48902dbb9 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -1617,10 +1617,8 @@ static void setup_sdla(struct net_device *dev) flp->deassoc = sdla_deassoc; flp->dlci_conf = sdla_dlci_conf; - init_timer(&flp->timer); + setup_timer(&flp->timer, sdla_poll, (unsigned long)dev); flp->timer.expires = 1; - flp->timer.data = (unsigned long) dev; - flp->timer.function = sdla_poll; } static struct net_device *sdla; From dffec39fb185837468486b6bbd00b18f35e38c82 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:39 +0530 Subject: [PATCH 0160/2177] drivers: net: cisco_hdlc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index a408abc25512..c696d42f4502 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -293,10 +293,8 @@ static void cisco_start(struct net_device *dev) st->up = st->txseq = st->rxseq = 0; spin_unlock_irqrestore(&st->lock, flags); - init_timer(&st->timer); + setup_timer(&st->timer, cisco_timer, (unsigned long)dev); st->timer.expires = jiffies + HZ; /* First poll after 1 s */ - st->timer.function = cisco_timer; - st->timer.data = (unsigned long)dev; add_timer(&st->timer); } From 18df06c2cafea69b19ab5f274865b89c3ab0f715 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:40 +0530 Subject: [PATCH 0161/2177] drivers: net: slip: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/slip/slip.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 436dd78c396a..eb8a18991d8c 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -763,12 +763,8 @@ static struct slip *sl_alloc(dev_t line) sl->mode = SL_MODE_DEFAULT; #ifdef CONFIG_SLIP_SMART /* initialize timer_list struct */ - init_timer(&sl->keepalive_timer); - sl->keepalive_timer.data = (unsigned long)sl; - sl->keepalive_timer.function = sl_keepalive; - init_timer(&sl->outfill_timer); - sl->outfill_timer.data = (unsigned long)sl; - sl->outfill_timer.function = sl_outfill; + setup_timer(&sl->keepalive_timer, sl_keepalive, (unsigned long)sl); + setup_timer(&sl->outfill_timer, sl_outfill, (unsigned long)sl); #endif slip_devs[i] = dev; return sl; From e998092f7b7c2ce3199db91177aa43397a3f76d7 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:41 +0530 Subject: [PATCH 0162/2177] drivers: net: spider_net: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/spider_net.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index cec9e70ab995..a913538d3213 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2256,16 +2256,14 @@ spider_net_setup_netdev(struct spider_net_card *card) pci_set_drvdata(card->pdev, netdev); - init_timer(&card->tx_timer); - card->tx_timer.function = - (void (*)(unsigned long)) spider_net_cleanup_tx_ring; - card->tx_timer.data = (unsigned long) card; + setup_timer(&card->tx_timer, + (void(*)(unsigned long))spider_net_cleanup_tx_ring, + (unsigned long)card); netdev->irq = card->pdev->irq; card->aneg_count = 0; - init_timer(&card->aneg_timer); - card->aneg_timer.function = spider_net_link_phy; - card->aneg_timer.data = (unsigned long) card; + setup_timer(&card->aneg_timer, spider_net_link_phy, + (unsigned long)card); netif_napi_add(netdev, &card->napi, spider_net_poll, SPIDER_NET_NAPI_WEIGHT); From f891f36603dff38a15390e5c950e8ac66f73352b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:42 +0530 Subject: [PATCH 0163/2177] drivers: net: sun: cassini: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 382993c1561c..a74d78f64af9 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -5039,10 +5039,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&cp->stat_lock[N_TX_RINGS]); mutex_init(&cp->pm_mutex); - init_timer(&cp->link_timer); - cp->link_timer.function = cas_link_timer; - cp->link_timer.data = (unsigned long) cp; - + setup_timer(&cp->link_timer, cas_link_timer, (unsigned long)cp); #if 1 /* Just in case the implementation of atomic operations * change so that an explicit initialization is necessary. From ba98e9e2eb32ba8f604f947fa656ef2071d22fa0 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:43 +0530 Subject: [PATCH 0164/2177] drivers: net: natsemi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 18af2a23a933..dedeacd0bbca 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -1571,10 +1571,8 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; From 3e436a25fcca449cbb044c7483116772873f3e28 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:44 +0530 Subject: [PATCH 0165/2177] drivers: net: winbond-840: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/winbond-840.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 32d7229544fa..6f88d687b6d2 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -655,10 +655,8 @@ static int netdev_open(struct net_device *dev) netdev_dbg(dev, "Done netdev_open()\n"); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = jiffies + 1*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; out_err: From 7afd516ff75e967873d7bdcb8f9b1180c2400b57 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:45 +0530 Subject: [PATCH 0166/2177] drivers: net: enic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d24ee1ad3be1..4a11baffe02d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2846,9 +2846,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup notification timer, HW reset task, and wq locks */ - init_timer(&enic->notify_timer); - enic->notify_timer.function = enic_notify_timer; - enic->notify_timer.data = (unsigned long)enic; + setup_timer(&enic->notify_timer, enic_notify_timer, + (unsigned long)enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); From c41326fbb3a7d64c329267d20c58dd9cc8f22a47 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:46 +0530 Subject: [PATCH 0167/2177] drivers: net: bnx2: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index e3af1f3cb61f..b3055a76dfbf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8462,10 +8462,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bnx2_set_default_link(bp); bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2_timer, (unsigned long)bp); bp->timer.expires = RUN_AT(BNX2_TIMER_INTERVAL); - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2_timer; #ifdef BCM_CNIC if (bnx2_shmem_rd(bp, BNX2_ISCSI_INITIATOR) & BNX2_ISCSI_INITIATOR_EN) From f7c11175bdece128c0b4c4a43e6781b9216db4d9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:47 +0530 Subject: [PATCH 0168/2177] drivers: net: xen-netback: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ee8ed9da00ad..dcfcb153918c 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -520,8 +520,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->credit_bytes = queue->remaining_credit = ~0UL; queue->credit_usec = 0UL; - init_timer(&queue->credit_timeout); - queue->credit_timeout.function = xenvif_tx_credit_callback; + setup_timer(&queue->credit_timeout, xenvif_tx_credit_callback, 0UL); queue->credit_window_start = get_jiffies_64(); queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; From 55d3cef4ee68d7d54c457c7deeb0e393dd386fdc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:48 +0530 Subject: [PATCH 0169/2177] drivers: net: atmel: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/atmel/atmel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index b68436b23a63..e816d53c2c05 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -1579,11 +1579,10 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port, priv->default_beacon_period = priv->beacon_period = 100; priv->listen_interval = 1; - init_timer(&priv->management_timer); + setup_timer(&priv->management_timer, atmel_management_timer, + (unsigned long)dev); spin_lock_init(&priv->irqlock); spin_lock_init(&priv->timerlock); - priv->management_timer.function = atmel_management_timer; - priv->management_timer.data = (unsigned long) dev; dev->netdev_ops = &atmel_netdev_ops; dev->wireless_handlers = &atmel_handler_def; From 0625d739cc61d186b65b8619ee8c41da2fd24894 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:49 +0530 Subject: [PATCH 0170/2177] drivers: net: hippi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 71ddadbf2368..76cc140774a2 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1229,10 +1229,8 @@ static int rr_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&rrpriv->timer); + setup_timer(&rrpriv->timer, rr_timer, (unsigned long)dev); rrpriv->timer.expires = RUN_AT(5*HZ); /* 5 sec. watchdog */ - rrpriv->timer.data = (unsigned long)dev; - rrpriv->timer.function = rr_timer; /* timer handler */ add_timer(&rrpriv->timer); netif_start_queue(dev); From 32db034501d37c60c433d24e9faa41c1fa3136e5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:50 +0530 Subject: [PATCH 0171/2177] drivers: net: smsc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/epic100.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 6a0e1d4b597c..2a9724898fcf 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -739,10 +739,8 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&ep->timer); + setup_timer(&ep->timer, epic_timer, (unsigned long)dev); ep->timer.expires = jiffies + 3*HZ; - ep->timer.data = (unsigned long)dev; - ep->timer.function = epic_timer; /* timer handler */ add_timer(&ep->timer); return rc; From d4d8db71db1bf602623e859e6c3e700b604c2072 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:51 +0530 Subject: [PATCH 0172/2177] drivers: net: qlogic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2991179c2fd0..05479d435469 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3891,10 +3891,8 @@ static int ql3xxx_probe(struct pci_dev *pdev, INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work); INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work); - init_timer(&qdev->adapter_timer); - qdev->adapter_timer.function = ql3xxx_timer; + setup_timer(&qdev->adapter_timer, ql3xxx_timer, (unsigned long)qdev); qdev->adapter_timer.expires = jiffies + HZ * 2; /* two second delay */ - qdev->adapter_timer.data = (unsigned long)qdev; if (!cards_found) { pr_alert("%s\n", DRV_STRING); From 4a9c07ed71c2b8d755ee585264f80dd2d82a8066 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:52 +0530 Subject: [PATCH 0173/2177] drivers: net: e1000e: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 327dfe5bedc0..8436c5f2c3e8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7252,13 +7252,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = e1000_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = e1000_update_phy_info; - adapter->phy_info_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, e1000_watchdog, + (unsigned long)adapter); + setup_timer(&adapter->phy_info_timer, e1000_update_phy_info, + (unsigned long)adapter); INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); From af25c31d4bf4bc7d4dea4c093af8b7704ef24f81 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:53 +0530 Subject: [PATCH 0174/2177] drivers: net: amd: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/sunlance.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 291ca5187f12..0183ffb9d3ba 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1459,9 +1459,8 @@ no_link_test: * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + setup_timer(&lp->multicast_timer, lance_set_multicast_retry, + (unsigned long)dev); if (register_netdev(dev)) { printk(KERN_ERR "SunLance: Cannot register device.\n"); From cec55a92a98fb3205cc4f127d9b70b5a965f3bd7 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:54 +0530 Subject: [PATCH 0175/2177] drivers: net: amd8111e: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/amd8111e.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 7b5df562f30f..7f22af6e37e0 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1883,9 +1883,8 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize software ipg timer */ if(lp->options & OPTION_DYN_IPG_ENABLE){ - init_timer(&lp->ipg_data.ipg_timer); - lp->ipg_data.ipg_timer.data = (unsigned long) dev; - lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg; + setup_timer(&lp->ipg_data.ipg_timer, + (void *)&amd8111e_config_ipg, (unsigned long)dev); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; lp->ipg_data.ipg = DEFAULT_IPG; From fb4de582a22282fa0f9665fd363dfd3e073c40f6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:55 +0530 Subject: [PATCH 0176/2177] drivers: net: eql: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/eql.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/eql.c b/drivers/net/eql.c index fe13bfea30ac..fccce4b47778 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -178,10 +178,8 @@ static void __init eql_setup(struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); - init_timer(&eql->timer); - eql->timer.data = (unsigned long) eql; + setup_timer(&eql->timer, eql_timer, (unsigned long)eql); eql->timer.expires = jiffies + EQL_DEFAULT_RESCHED_IVAL; - eql->timer.function = eql_timer; spin_lock_init(&eql->queue.lock); INIT_LIST_HEAD(&eql->queue.all_slaves); From 82f5d72da8755cf73387a007ad323a86ea0b5663 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:56 +0530 Subject: [PATCH 0177/2177] drivers: net: can: usb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/usb/peak_usb/pcan_usb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 838545ce468d..7e10dbdded28 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -798,9 +798,8 @@ static int pcan_usb_init(struct peak_usb_device *dev) int err; /* initialize a timer needed to wait for hardware restart */ - init_timer(&pdev->restart_timer); - pdev->restart_timer.function = pcan_usb_restart; - pdev->restart_timer.data = (unsigned long)dev; + setup_timer(&pdev->restart_timer, pcan_usb_restart, + (unsigned long)dev); /* * explicit use of dev_xxx() instead of netdev_xxx() here: From 9f5ca8816b5ccc1fd13baf46e012fb5228e57763 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:57 +0530 Subject: [PATCH 0178/2177] drivers: net: can: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/grcan.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index a7be12d9a139..8570cfdaea75 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1626,13 +1626,11 @@ static int grcan_setup_netdev(struct platform_device *ofdev, spin_lock_init(&priv->lock); if (priv->need_txbug_workaround) { - init_timer(&priv->rr_timer); - priv->rr_timer.function = grcan_running_reset; - priv->rr_timer.data = (unsigned long)dev; + setup_timer(&priv->rr_timer, grcan_running_reset, + (unsigned long)dev); - init_timer(&priv->hang_timer); - priv->hang_timer.function = grcan_initiate_running_reset; - priv->hang_timer.data = (unsigned long)dev; + setup_timer(&priv->hang_timer, grcan_initiate_running_reset, + (unsigned long)dev); } netif_napi_add(dev, &priv->napi, grcan_poll, GRCAN_NAPI_WEIGHT); From fe9bfe207e200f901361681bf1497386068f1df2 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:58 +0530 Subject: [PATCH 0179/2177] drivers: net: arcnet: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/arcnet/arcnet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index fcfccbb3d9a2..13236b2cdf13 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -450,9 +450,7 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); - init_timer(&lp->timer); - lp->timer.data = (unsigned long) dev; - lp->timer.function = arcnet_timer; + setup_timer(&lp->timer, arcnet_timer, (unsigned long)dev); } return dev; From b9496b6b9100d824033390312dd64244f959a156 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:59 +0530 Subject: [PATCH 0180/2177] drivers: net: ath6kl: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/ath/ath6kl/txrx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c index e6b2517e6334..4e5cc2b7045a 100644 --- a/drivers/net/wireless/ath/ath6kl/txrx.c +++ b/drivers/net/wireless/ath/ath6kl/txrx.c @@ -1753,9 +1753,7 @@ void aggr_conn_init(struct ath6kl_vif *vif, struct aggr_info *aggr_info, aggr_conn->aggr_sz = AGGR_SZ_DEFAULT; aggr_conn->dev = vif->ndev; - init_timer(&aggr_conn->timer); - aggr_conn->timer.function = aggr_timeout; - aggr_conn->timer.data = (unsigned long) aggr_conn; + setup_timer(&aggr_conn->timer, aggr_timeout, (unsigned long)aggr_conn); aggr_conn->aggr_info = aggr_info; aggr_conn->timer_scheduled = false; From 6d2bcc14f5731e9357f15d41f7c5677a72e354f9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:00 +0530 Subject: [PATCH 0181/2177] drivers: net: sun: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sungem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index fa607d062cb3..b75ab8f44968 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2910,9 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) gp->msg_enable = DEFAULT_MSG; - init_timer(&gp->link_timer); - gp->link_timer.function = gem_link_timer; - gp->link_timer.data = (unsigned long) gp; + setup_timer(&gp->link_timer, gem_link_timer, (unsigned long)gp); INIT_WORK(&gp->reset_task, gem_reset_task); From ac803d1c5f62937dc142a35dafd180f09b9f9c83 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:01 +0530 Subject: [PATCH 0182/2177] drivers: net: sis900: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 40bd88362e3d..cb61247b0526 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1065,10 +1065,8 @@ sis900_open(struct net_device *net_dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&sis_priv->timer); + setup_timer(&sis_priv->timer, sis900_timer, (unsigned long)net_dev); sis_priv->timer.expires = jiffies + HZ; - sis_priv->timer.data = (unsigned long)net_dev; - sis_priv->timer.function = sis900_timer; add_timer(&sis_priv->timer); return 0; From f1ce56ce5d2a18f5d61ec335aaf5aad978fa9cd6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:02 +0530 Subject: [PATCH 0183/2177] drivers: net: packetengines: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/yellowfin.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index fa7770da6ef8..33c241f52a71 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -632,10 +632,8 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - init_timer(&yp->timer); + setup_timer(&yp->timer, yellowfin_timer, (unsigned long)dev); yp->timer.expires = jiffies + 3*HZ; - yp->timer.data = (unsigned long)dev; - yp->timer.function = yellowfin_timer; /* timer handler */ add_timer(&yp->timer); out: return rc; From 590deff6e7a898fadf3f1fd6425937d481913fb1 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:03 +0530 Subject: [PATCH 0184/2177] drivers: net: mlx5: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8aea0a065e56..a89a68ce53ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -320,15 +320,13 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - init_timer(&health->timer); + setup_timer(&health->timer, poll_health, (unsigned long)dev); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; - health->timer.data = (unsigned long)dev; - health->timer.function = poll_health; health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); add_timer(&health->timer); } From d2a0012e7632a588683ad6320529659c4cd27131 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:04 +0530 Subject: [PATCH 0185/2177] drivers: net: mlx4: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 53daa6ca5d83..de0f9e5e42ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) phys_addr_t addr; INIT_LIST_HEAD(&priv->catas_err.list); - init_timer(&priv->catas_err.timer); + setup_timer(&priv->catas_err.timer, poll_catas, (unsigned long)dev); priv->catas_err.map = NULL; if (!mlx4_is_slave(dev)) { @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) } } - priv->catas_err.timer.data = (unsigned long) dev; - priv->catas_err.timer.function = poll_catas; priv->catas_err.timer.expires = round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); add_timer(&priv->catas_err.timer); From 636873890c63c892fca5ccab8af3a9f3607eb1fc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:05 +0530 Subject: [PATCH 0186/2177] drivers: net: pxa168: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/pxa168_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 993724959a7c..91b1c154fd29 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1496,9 +1496,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size); memset(&pep->timeout, 0, sizeof(struct timer_list)); - init_timer(&pep->timeout); - pep->timeout.function = rxq_refill_timer_wrapper; - pep->timeout.data = (unsigned long)pep; + setup_timer(&pep->timeout, rxq_refill_timer_wrapper, + (unsigned long)pep); pep->smi_bus = mdiobus_alloc(); if (!pep->smi_bus) { From 34b0cf069d174b2615769102d75b4ce687addeb9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:06 +0530 Subject: [PATCH 0187/2177] drivers: net: fealnx: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/fealnx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index e92859dab7ae..c8982313d850 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -909,17 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = RUN_AT(3 * HZ); - np->timer.data = (unsigned long) dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); - init_timer(&np->reset_timer); - np->reset_timer.data = (unsigned long) dev; - np->reset_timer.function = reset_timer; + setup_timer(&np->reset_timer, reset_timer, (unsigned long)dev); np->reset_timer_armed = 0; return rc; } From a76aec2ac51f8a74659cff9b19f712e8fb984393 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:07 +0530 Subject: [PATCH 0188/2177] drivers: net: dmfe: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/dmfe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 07e10a45beaa..6585f737d08b 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -596,10 +596,8 @@ static int dmfe_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + setup_timer(&db->timer, dmfe_timer, (unsigned long)dev); db->timer.expires = DMFE_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = dmfe_timer; add_timer(&db->timer); return 0; From 6c43824477c2ac722325ba460c2ce683c48fb76b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:08 +0530 Subject: [PATCH 0189/2177] drivers: net: bnxt: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aacec8bc19d5..c25f5b555adf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7190,9 +7190,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; - init_timer(&bp->timer); - bp->timer.data = (unsigned long)bp; - bp->timer.function = bnxt_timer; + setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp); bp->current_interval = BNXT_TIMER_INTERVAL; clear_bit(BNXT_STATE_OPEN, &bp->state); From cac40a458ae68b41955cc55a80fc9e0166e429b1 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:09 +0530 Subject: [PATCH 0190/2177] drivers: net: amd: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/a2065.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index e22f976a0d18..998d30e050a6 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -733,10 +733,9 @@ static int a2065_init_one(struct zorro_dev *z, dev->watchdog_timeo = 5*HZ; dev->dma = 0; - init_timer(&priv->multicast_timer); - priv->multicast_timer.data = (unsigned long) dev; - priv->multicast_timer.function = - (void (*)(unsigned long))lance_set_multicast; + setup_timer(&priv->multicast_timer, + (void(*)(unsigned long))lance_set_multicast, + (unsigned long)dev); err = register_netdev(dev); if (err) { From 7c214194de36217c06b0dc4ed64d9cf251b261df Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:10 +0530 Subject: [PATCH 0191/2177] drivers: net: adi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/bfin_mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index a251de8d9a91..0658cde1586a 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1650,9 +1650,8 @@ static int bfin_mac_probe(struct platform_device *pdev) ndev->netdev_ops = &bfin_mac_netdev_ops; ndev->ethtool_ops = &bfin_mac_ethtool_ops; - init_timer(&lp->tx_reclaim_timer); - lp->tx_reclaim_timer.data = (unsigned long)lp; - lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; + setup_timer(&lp->tx_reclaim_timer, tx_reclaim_skb_timeout, + (unsigned long)lp); lp->flags = 0; netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); From 13e96b93ff5f1093592419e1f84962d4a266bd3e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:11 +0530 Subject: [PATCH 0192/2177] drivers: net: can: sja1000: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/sja1000/peak_pcmcia.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index dd56133cc461..4b8758e10bd4 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -692,9 +692,7 @@ static int pcan_probe(struct pcmcia_device *pdev) } /* init the timer which controls the leds */ - init_timer(&card->led_timer); - card->led_timer.function = pcan_led_timer; - card->led_timer.data = (unsigned long)card; + setup_timer(&card->led_timer, pcan_led_timer, (unsigned long)card); /* request the given irq */ err = request_irq(pdev->irq, &pcan_isr, IRQF_SHARED, PCC_NAME, card); From 7890d5341999968b94dbdf9e30ad32f37f82ea10 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:12 +0530 Subject: [PATCH 0193/2177] drivers: net: caif: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 438966bf51c2..fed75e75207a 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1211,17 +1211,14 @@ static int cfhsi_open(struct net_device *ndev) init_waitqueue_head(&cfhsi->flush_fifo_wait); /* Setup the inactivity timer. */ - init_timer(&cfhsi->inactivity_timer); - cfhsi->inactivity_timer.data = (unsigned long)cfhsi; - cfhsi->inactivity_timer.function = cfhsi_inactivity_tout; + setup_timer(&cfhsi->inactivity_timer, cfhsi_inactivity_tout, + (unsigned long)cfhsi); /* Setup the slowpath RX timer. */ - init_timer(&cfhsi->rx_slowpath_timer); - cfhsi->rx_slowpath_timer.data = (unsigned long)cfhsi; - cfhsi->rx_slowpath_timer.function = cfhsi_rx_slowpath; + setup_timer(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath, + (unsigned long)cfhsi); /* Setup the aggregation timer. */ - init_timer(&cfhsi->aggregation_timer); - cfhsi->aggregation_timer.data = (unsigned long)cfhsi; - cfhsi->aggregation_timer.function = cfhsi_aggregation_tout; + setup_timer(&cfhsi->aggregation_timer, cfhsi_aggregation_tout, + (unsigned long)cfhsi); /* Activate HSI interface. */ res = cfhsi->ops->cfhsi_up(cfhsi->ops); From ba7400ed88adcd07af0dc004be1cf5ab2443cb44 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:13 +0530 Subject: [PATCH 0194/2177] drivers: net: appletalk: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/appletalk/ltpc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index ac755d2950a6..e4aa374caa4d 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1165,9 +1165,7 @@ struct net_device * __init ltpc_probe(void) dev->irq = 0; /* polled mode -- 20 times per second */ /* this is really, really slow... should it poll more often? */ - init_timer(<pc_timer); - ltpc_timer.function=ltpc_poll; - ltpc_timer.data = (unsigned long) dev; + setup_timer(<pc_timer, ltpc_poll, (unsigned long)dev); ltpc_timer.expires = jiffies + HZ/20; add_timer(<pc_timer); From f2803332f2ef86ad29e247350f697bad4eb1aa93 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:14 +0530 Subject: [PATCH 0195/2177] drivers: net: dscc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/dscc4.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index a043fb1367bd..64f176496da4 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -1127,10 +1127,8 @@ static int dscc4_open(struct net_device *dev) done: netif_start_queue(dev); - init_timer(&dpriv->timer); + setup_timer(&dpriv->timer, dscc4_timer, (unsigned long)dev); dpriv->timer.expires = jiffies + 10*HZ; - dpriv->timer.data = (unsigned long)dev; - dpriv->timer.function = dscc4_timer; add_timer(&dpriv->timer); netif_carrier_on(dev); From 8d81fe753c1572573be6d529a44b1506e3b0425d Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:15 +0530 Subject: [PATCH 0196/2177] drivers: net: hdlc_ppp: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 0d2e00ece804..c7721c729541 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -610,9 +610,7 @@ static void ppp_start(struct net_device *dev) for (i = 0; i < IDX_COUNT; i++) { struct proto *proto = &ppp->protos[i]; proto->dev = dev; - init_timer(&proto->timer); - proto->timer.function = ppp_timer; - proto->timer.data = (unsigned long)proto; + setup_timer(&proto->timer, ppp_timer, (unsigned long)proto); proto->state = CLOSED; } ppp->protos[IDX_LCP].pid = PID_LCP; From 7e47fc264e2837d971720ccf1f8b35648c2626ea Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:16 +0530 Subject: [PATCH 0197/2177] drivers: net: hamradio: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 021a8ec411ab..97fe8dfb602d 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -623,9 +623,7 @@ static int sixpack_open(struct tty_struct *tty) netif_start_queue(dev); - init_timer(&sp->tx_t); - sp->tx_t.function = sp_xmit_on_air; - sp->tx_t.data = (unsigned long) sp; + setup_timer(&sp->tx_t, sp_xmit_on_air, (unsigned long)sp); init_timer(&sp->resync_t); From 9d90725f33ebd0f30790c26eb5e9e0a098567895 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:17 +0530 Subject: [PATCH 0198/2177] drivers: net: cpsw_ale: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_ale.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index ddd43e09111e..cd1185e66133 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -859,9 +859,7 @@ void cpsw_ale_start(struct cpsw_ale *ale) cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); - init_timer(&ale->timer); - ale->timer.data = (unsigned long)ale; - ale->timer.function = cpsw_ale_timer; + setup_timer(&ale->timer, cpsw_ale_timer, (unsigned long)ale); if (ale->ageout) { ale->timer.expires = jiffies + ale->ageout; add_timer(&ale->timer); From 997decfb6aeaa9be41ff557741845bb9fb4bf5bc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:18 +0530 Subject: [PATCH 0199/2177] drivers: net: stmmac: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1763e48c84e2..f41661a04f23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2217,10 +2217,8 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) { priv->tx_coal_frames = STMMAC_TX_FRAMES; priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; - init_timer(&priv->txtimer); + setup_timer(&priv->txtimer, stmmac_tx_timer, (unsigned long)priv); priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); - priv->txtimer.data = (unsigned long)priv; - priv->txtimer.function = stmmac_tx_timer; add_timer(&priv->txtimer); } From 9be5813a29e5b3379db30d00319682fe965febe5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:19 +0530 Subject: [PATCH 0200/2177] drivers: net: packetengines: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/hamachi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 482b85e4d665..77bc7cca8980 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -979,10 +979,8 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - init_timer(&hmp->timer); + setup_timer(&hmp->timer, hamachi_timer, (unsigned long)dev); hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ - hmp->timer.data = (unsigned long)dev; - hmp->timer.function = hamachi_timer; /* timer handler */ add_timer(&hmp->timer); return 0; From 7d8fb3a7742513bb5434be704e0b0bf785032f45 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:20 +0530 Subject: [PATCH 0201/2177] drivers: net: i40evf: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 1825d956bb00..c243f9da95ae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2686,9 +2686,8 @@ static void i40evf_init_task(struct work_struct *work) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = &i40evf_watchdog_timer; - adapter->watchdog_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, &i40evf_watchdog_timer, + (unsigned long)adapter); mod_timer(&adapter->watchdog_timer, jiffies + 1); adapter->tx_desc_count = I40EVF_DEFAULT_TXD; From 99e3aa1ea47d0804a69ea7948ddd1251bcd1a635 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:21 +0530 Subject: [PATCH 0202/2177] drivers: net: uli526x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/uli526x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 7fc248efc4ba..5fbbc0caba99 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -491,10 +491,8 @@ static int uli526x_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + setup_timer(&db->timer, uli526x_timer, (unsigned long)dev); db->timer.expires = ULI526X_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = uli526x_timer; add_timer(&db->timer); return 0; From 570ba3e82befbba7649e459fedc4aab27510ef44 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:22 +0530 Subject: [PATCH 0203/2177] drivers: net: enic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_clsf.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h index 6aa9f89d073b..4bfbf25f9ddc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.h +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -19,9 +19,8 @@ void enic_flow_may_expire(unsigned long data); static inline void enic_rfs_timer_start(struct enic *enic) { - init_timer(&enic->rfs_h.rfs_may_expire); - enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire; - enic->rfs_h.rfs_may_expire.data = (unsigned long)enic; + setup_timer(&enic->rfs_h.rfs_may_expire, enic_flow_may_expire, + (unsigned long)enic); mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); } From 66f06890305eb2c8200cefbc3d6405ff6baef47e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:23 +0530 Subject: [PATCH 0204/2177] drivers: net: cxgb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb/sge.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 0f13a7f7c1d3..75e439918700 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -2075,9 +2075,8 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p) goto nomem_port; } - init_timer(&sge->tx_reclaim_timer); - sge->tx_reclaim_timer.data = (unsigned long)sge; - sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; + setup_timer(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, + (unsigned long)sge); if (is_T2(sge->adapter)) { init_timer(&sge->espibug_timer); From 804dea920b66fa5813278fc55eaa5b2ae39ab110 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:24 +0530 Subject: [PATCH 0205/2177] drivers: net: bnx2x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c12b4d3e946e..54d1571384a0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12414,10 +12414,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) bp->current_interval = CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2x_timer, (unsigned long)bp); bp->timer.expires = jiffies + bp->current_interval; - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2x_timer; if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) && SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) && From 19569c88b938145e90cc7d6ab45d7fa0edd07beb Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:25 +0530 Subject: [PATCH 0206/2177] drivers: net: lmc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 4698450c77d1..ae69d65158e6 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1084,10 +1084,8 @@ static int lmc_open(struct net_device *dev) * Setup a timer for the watchdog on probe, and start it running. * Since lmc_ok == 0, it will be a NOP for now. */ - init_timer (&sc->timer); + setup_timer(&sc->timer, lmc_watchdog, (unsigned long)dev); sc->timer.expires = jiffies + HZ; - sc->timer.data = (unsigned long) dev; - sc->timer.function = lmc_watchdog; add_timer (&sc->timer); lmc_trace(dev, "lmc_open out"); From 95ec66968571bf0af0a22effdc1b9d9e62ea6630 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:56 -0700 Subject: [PATCH 0207/2177] samples/bpf: Use getppid instead of getpgrp for array map stress When cross-compiling the bpf sample map_perf_test for aarch64, I find that __NR_getpgrp is undefined. This causes build errors. This syscall is deprecated and requires defining __ARCH_WANT_SYSCALL_DEPRECATED. To avoid having to define that, just use a different syscall (getppid) for the array map stress test. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/map_perf_test_kern.c | 2 +- samples/bpf/map_perf_test_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 098c857f1eda..2b2ffb97018b 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -266,7 +266,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpgrp") +SEC("kprobe/sys_getppid") int stress_array_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index f388254896f6..a0310fc70057 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -282,7 +282,7 @@ static void test_array_lookup(int cpu) start_time = time_get_ns(); for (i = 0; i < max_cnt; i++) - syscall(__NR_getpgrp, 0); + syscall(__NR_getppid, 0); printf("%d:array_lookup %lld lookups per sec\n", cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); } From 876e88e3273e300895e308bd660c6cfaabb03cd5 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:57 -0700 Subject: [PATCH 0208/2177] samples/bpf: Enable cross compiler support When cross compiling, bpf samples use HOSTCC for compiling the non-BPF part of the sample, however what we really want is to use the cross compiler to build for the cross target since that is what will load and run the BPF sample. Detect this and compile samples correctly. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index cf17c7932a6e..13f74b67ca44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -177,6 +177,11 @@ HOSTLOADLIBES_syscall_tp += -lelf LLC ?= llc CLANG ?= clang +# Detect that we're cross compiling and use the cross compiler +ifdef CROSS_COMPILE +HOSTCC = $(CROSS_COMPILE)gcc +endif + # Trick to allow make to be run from this directory all: $(MAKE) -C ../../ $(CURDIR)/ From b655fc1c2ee1c2b2d07c0b6f432798b91202718c Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:58 -0700 Subject: [PATCH 0209/2177] samples/bpf: Fix pt_regs issues when cross-compiling BPF samples fail to build when cross-compiling for ARM64 because of incorrect pt_regs param selection. This is because clang defines __x86_64__ and bpf_headers thinks we're building for x86. Since clang is building for the BPF target, it shouldn't make assumptions about what target the BPF program is going to run on. To fix this, lets pass ARCH so the header knows which target the BPF program is being compiled for and can use the correct pt_regs code. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_helpers.h | 56 ++++++++++++++++++++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 13f74b67ca44..ebc2ad69b62c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -230,7 +230,7 @@ $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -Wno-compare-distinct-pointer-types \ + -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option \ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..4875395b0b52 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -109,7 +109,47 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = static int (*bpf_skb_change_head)(void *, int len, int flags) = (void *) BPF_FUNC_skb_change_head; +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s930x) + #define bpf_target_s930x + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined #if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390x__) + #define bpf_target_s930x +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) @@ -122,7 +162,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) -#elif defined(__s390x__) +#elif defined(bpf_target_s390x) #define PT_REGS_PARM1(x) ((x)->gprs[2]) #define PT_REGS_PARM2(x) ((x)->gprs[3]) @@ -135,7 +175,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_IP(x) ((x)->psw.addr) -#elif defined(__aarch64__) +#elif defined(bpf_target_arm64) #define PT_REGS_PARM1(x) ((x)->regs[0]) #define PT_REGS_PARM2(x) ((x)->regs[1]) @@ -148,7 +188,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->pc) -#elif defined(__mips__) +#elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) #define PT_REGS_PARM2(x) ((x)->regs[5]) @@ -161,7 +201,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) -#elif defined(__powerpc__) +#elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) #define PT_REGS_PARM2(x) ((x)->gpr[4]) @@ -172,7 +212,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) -#elif defined(__sparc__) +#elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) @@ -182,6 +222,8 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) #else @@ -190,10 +232,10 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #endif -#ifdef __powerpc__ +#ifdef bpf_target_powerpc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(__sparc__) +#elif bpf_target_sparc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else From 8bf2ac25a96c69985a1a9fbbad7da22ae4343a38 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:59 -0700 Subject: [PATCH 0210/2177] samples/bpf: Add documentation on cross compilation Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 79f9a58f1872..5f27e4faca50 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -64,3 +64,13 @@ It is also possible to point make to the newly compiled 'llc' or 'clang' command via redefining LLC or CLANG on the make command line:: make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang + +Cross compiling samples +----------------------- +In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH +environment variables before calling make. This will direct make to build +samples for the cross target. + +export ARCH=arm64 +export CROSS_COMPILE="aarch64-linux-gnu-" +make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang From 6e617de84e87d626d1e976fc30e1322239fd4d2d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Sep 2017 18:26:53 +0200 Subject: [PATCH 0211/2177] net: avoid a full fib lookup when rp_filter is disabled. Since commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") a full fib lookup is needed even if the rp_filter is disabled, if accept_local is false - which is the default. What we really need in the above scenario is just checking that the source IP address is not local, and in most case we can do that is a cheaper way looking up the ifaddr hash table. This commit adds a helper for such lookup, and uses it to validate the src address when rp_filter is disabled and no 'local' routes are created by the user space in the relevant namespace. A new ipv4 netns flag is added to account for such routes. We need that to preserve the same behavior we had before this patch. It also drops the checks to bail early from __fib_validate_source, added by the commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") they do not give any measurable performance improvement: if we do the lookup with are on a slower path. This improves UDP performances for unconnected sockets when rp_filter is disabled by 5% and also gives small but measurable performance improvement for TCP flood scenarios. v1 -> v2: - use the ifaddr lookup helper in __ip_dev_find(), as suggested by Eric - fall-back to full lookup if custom local routes are present Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/devinet.c | 30 ++++++++++++++++++------------ net/ipv4/fib_frontend.c | 22 +++++++++++++++++----- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fb3f809e34e4..751d051f0bc7 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -179,6 +179,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20d061c805e3..20720721da4b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; + bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d7adc0616599..7ce22a2c07ce 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { - if (ifa->ifa_local == addr) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; - result = dev; - break; - } - } - if (!result) { + ifa = inet_lookup_ifaddr_rcu(net, addr); + if (!ifa) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res = { 0 }; struct fib_table *local; @@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && res.type == RTN_LOCAL) result = FIB_RES_DEV(res); + } else { + result = ifa->ifa_dev->dev; } if (result && devref) dev_hold(result); @@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) } EXPORT_SYMBOL(__ip_dev_find); +/* called under RCU lock */ +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) +{ + u32 hash = inet_addr_hash(net, addr); + struct in_ifaddr *ifa; + + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) + if (ifa->ifa_local == addr && + net_eq(dev_net(ifa->ifa_dev->dev), net)) + return ifa; + + return NULL; +} + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 37819ab4cc74..f02819134ba2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -345,9 +345,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; - if (!rpf && !fib_num_tclassid_users(net) && - (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) - goto last_resort; fib_combine_itag(itag, &res); dev_match = false; @@ -402,13 +399,26 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); + struct net *net = dev_net(dev); - if (!r && !fib_num_tclassid_users(dev_net(dev)) && - IN_DEV_ACCEPT_LOCAL(idev) && + if (!r && !fib_num_tclassid_users(net) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { + if (IN_DEV_ACCEPT_LOCAL(idev)) + goto ok; + /* if no local routes are added from user space we can check + * for local addresses looking-up the ifaddr table + */ + if (net->ipv4.fib_has_custom_local_routes) + goto full_check; + if (inet_lookup_ifaddr_rcu(net, src)) + return -EINVAL; + +ok: *itag = 0; return 0; } + +full_check: return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); } @@ -759,6 +769,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, } err = fib_table_insert(net, tb, &cfg, extack); + if (!err && cfg.fc_type == RTN_LOCAL) + net->ipv4.fib_has_custom_local_routes = true; errout: return err; } From 0abfd494deefdbab66ac03c1181a614285e7d90c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 12:28:05 -0400 Subject: [PATCH 0212/2177] net: dsa: use dedicated CPU port Each port in DSA has its own dedicated CPU port currently available in its parent switch's ds->ports[port].cpu_dp. Use it instead of getting the unique tree CPU port, which will be deprecated soon. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/bcm_sf2.c | 6 +++--- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/mv88e6060.c | 2 +- drivers/net/dsa/qca8k.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a9f2a5b55a5e..d4ce092def83 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1336,7 +1336,7 @@ EXPORT_SYMBOL(b53_fdb_dump); int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br) { struct b53_device *dev = ds->priv; - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; u16 pvlan, reg; unsigned int i; @@ -1382,7 +1382,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct b53_device *dev = ds->priv; struct b53_vlan *vl = &dev->vlans[0]; - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0072a959db5b..898d5642b516 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -661,7 +661,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->dst->cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->netdev; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol; @@ -684,9 +684,9 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->dst->cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->netdev; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; struct ethtool_wolinfo pwol; p->ethtool_ops->get_wol(p, &pwol); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c142b97add2c..faa3b88d2206 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -928,11 +928,11 @@ mt7530_setup(struct dsa_switch *ds) struct device_node *dn; struct mt7530_dummy_poll p; - /* The parent node of cpu_dp->netdev which holds the common system + /* The parent node of master netdev which holds the common system * controller also is the container for two GMACs nodes representing * as two netdev instances. */ - dn = ds->dst->cpu_dp->netdev->dev.of_node->parent; + dn = ds->ports[MT7530_CPU_PORT].netdev->dev.of_node->parent; priv->ethernet = syscon_node_to_regmap(dn); if (IS_ERR(priv->ethernet)) return PTR_ERR(priv->ethernet); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index dce7fa57eb55..621cdc46ad81 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -176,7 +176,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) | (dsa_is_cpu_port(ds, p) ? ds->enabled_port_mask : - BIT(ds->dst->cpu_dp->index))); + BIT(ds->ports[p].cpu_dp->index))); /* Port Association Vector: when learning source addresses * of packets, add the address to the address database using diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 5ada7a41449c..82f09711ac1a 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -506,7 +506,7 @@ qca8k_setup(struct dsa_switch *ds) pr_warn("regmap initialization failed"); /* Initialize CPU port pad mode (xMII type, delays...) */ - phy_mode = of_get_phy_mode(ds->dst->cpu_dp->dn); + phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn); if (phy_mode < 0) { pr_err("Can't find phy-mode for master device\n"); return phy_mode; From 4fa7b718881a5358286903b796968cea80993820 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 19:31:57 -0400 Subject: [PATCH 0213/2177] net: dsa: better scoping of slave functions A few DSA slave functions take a dsa_slave_priv pointer as first argument, whereas the scope of the slave.c functions is the slave net_device structure. Fix this and rename dsa_netpoll_send_skb to dsa_slave_netpoll_send_skb. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6fc9eb094267..4a98942a6135 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -385,10 +385,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev, return 0; } -static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, - struct sk_buff *skb) +static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER + struct dsa_slave_priv *p = netdev_priv(dev); + if (p->netpoll) netpoll_send_skb(p->netpoll, skb); #else @@ -422,7 +424,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) * tag to be successfully transmitted */ if (unlikely(netpoll_tx_running(dev))) - return dsa_netpoll_send_skb(p, nskb); + return dsa_slave_netpoll_send_skb(dev, nskb); /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType @@ -736,9 +738,9 @@ static int dsa_slave_get_phys_port_name(struct net_device *dev, } static struct dsa_mall_tc_entry * -dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, - unsigned long cookie) +dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { + struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) @@ -820,7 +822,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, if (!ds->ops->port_mirror_del) return; - mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); + mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; @@ -1027,10 +1029,9 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, } /* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct dsa_slave_priv *p, - struct net_device *slave_dev, - int addr) +static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) { + struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); @@ -1046,9 +1047,9 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, p->phy_interface); } -static int dsa_slave_phy_setup(struct dsa_slave_priv *p, - struct net_device *slave_dev) +static int dsa_slave_phy_setup(struct net_device *slave_dev) { + struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; struct device_node *phy_dn, *port_dn; bool phy_is_fixed = false; @@ -1088,7 +1089,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, */ if (!phy_is_fixed && phy_id >= 0 && (ds->phys_mii_mask & (1 << phy_id))) { - ret = dsa_slave_phy_connect(p, slave_dev, phy_id); + ret = dsa_slave_phy_connect(slave_dev, phy_id); if (ret) { netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret); of_node_put(phy_dn); @@ -1111,7 +1112,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, * MDIO bus instead */ if (!p->phy) { - ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index); + ret = dsa_slave_phy_connect(slave_dev, p->dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->dp->index, ret); @@ -1233,7 +1234,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) netif_carrier_off(slave_dev); - ret = dsa_slave_phy_setup(p, slave_dev); + ret = dsa_slave_phy_setup(slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); unregister_netdev(slave_dev); From de40fc5d210f2c31f2c25a9b920861276a71b70d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 19:32:14 -0400 Subject: [PATCH 0214/2177] net: dsa: add port fdb dump Dumping a DSA port's FDB entries is not specific to a DSA slave, so add a dsa_port_fdb_dump function, similarly to dsa_port_fdb_add and dsa_port_fdb_del. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 + net/dsa/port.c | 11 +++++++++++ net/dsa/slave.c | 9 ++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f616b3444418..9803952a5b40 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -128,6 +128,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); diff --git a/net/dsa/port.c b/net/dsa/port.c index 659676ba3f8b..76d43a82d397 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -173,6 +173,17 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->port_fdb_dump) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_dump(ds, port, cb, data); +} + int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4a98942a6135..02ace7d462c4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -263,16 +263,11 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, }; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; int err; - if (!ds->ops->port_fdb_dump) - return -EOPNOTSUPP; - - err = ds->ops->port_fdb_dump(ds, dp->index, - dsa_slave_port_fdb_do_dump, - &dump); + err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); *idx = dump.idx; + return err; } From b6cd4b5895848968e8fee93fc5e3dc8babc40b9e Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 21 Sep 2017 08:15:26 +0200 Subject: [PATCH 0215/2177] e100: Cocci spatch "pool_zalloc-simple" Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0. Found by coccinelle spatch "api/alloc/pool_zalloc-simple.cocci" Signed-off-by: Thomas Meyer Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e100.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 4d10270ddf8f..184f11242f56 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1910,11 +1910,10 @@ static int e100_alloc_cbs(struct nic *nic) nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL; nic->cbs_avail = 0; - nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL, - &nic->cbs_dma_addr); + nic->cbs = pci_pool_zalloc(nic->cbs_pool, GFP_KERNEL, + &nic->cbs_dma_addr); if (!nic->cbs) return -ENOMEM; - memset(nic->cbs, 0, count * sizeof(struct cb)); for (cb = nic->cbs, i = 0; i < count; cb++, i++) { cb->next = (i + 1 < count) ? cb + 1 : nic->cbs; From 51957bc53aa7ca60d63f1ba0d9e3d887562b1723 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:17:34 +0200 Subject: [PATCH 0216/2177] net/smc: parameter cleanup in smc_cdc_get_free_slot() Use the smc_connection as first parameter with smc_cdc_get_free_slot(). This is just a small code cleanup, no functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 7 ++++--- net/smc/smc_cdc.h | 3 ++- net/smc/smc_tx.c | 6 ++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7294edbc221..5ef97e5a5f78 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -62,10 +62,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, bh_unlock_sock(&smc->sk); } -int smc_cdc_get_free_slot(struct smc_link *link, +int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend) { + struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, (struct smc_wr_tx_pend_priv **)pend); } @@ -118,8 +120,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc) return rc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 8e1d76f26007..56f883d1159c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -206,7 +206,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_cdc_tx_pend; -int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf, +int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 3c656beb8820..e2228f6d1c25 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -394,8 +394,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) int rc; spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -463,8 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], - &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (!rc) rc = smc_cdc_msg_send(conn, wr_buf, pend); if (rc < 0) { From e1f6198e221f472c03b88e352432a01076ec8647 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 21 Sep 2017 12:50:47 +0530 Subject: [PATCH 0217/2177] cxgb4: avoid stall while shutting down the adapter do not wait for completion while deleting the filters when the adapter is shutting down because we may not get the response as interrupts will be disabled. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 7 ++++++- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ea72d2d2e1b4..c4e997fdff64 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -549,6 +549,7 @@ enum { /* adapter flags */ MASTER_PF = (1 << 7), FW_OFLD_CONN = (1 << 9), ROOT_NO_RELAXED_ORDERING = (1 << 10), + SHUTTING_DOWN = (1 << 11), }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 45b5853ca2f1..97ead2c66751 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -191,7 +191,8 @@ static int del_filter_wr(struct adapter *adapter, int fidx) return -ENOMEM; fwr = __skb_put(skb, len); - t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1 + : adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -636,6 +637,10 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id) struct filter_ctx ctx; int ret; + /* If we are shutting down the adapter do not wait for completion */ + if (netdev2adap(dev)->flags & SHUTTING_DOWN) + return __cxgb4_del_filter(dev, filter_id, NULL); + init_completion(&ctx.completion); ret = __cxgb4_del_filter(dev, filter_id, &ctx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 92d9d795d874..5fe81a4e26a6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5254,6 +5254,8 @@ static void remove_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; @@ -5339,6 +5341,8 @@ static void shutdown_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; From 8701352b22a2f82c814283131587e970a56b69a8 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 21 Sep 2017 12:05:25 +0200 Subject: [PATCH 0218/2177] bridge: trigger RTM_NEWLINK when interface is modified by bridge ioctl Currently, there is a difference in netlink events received when an interface is modified through bridge ioctl() or through netlink. This patch generates additional events when an interface is added to or removed from a bridge via ioctl(). When adding then removing an interface from a bridge with netlink, we get: 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 Deleted 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff When using ioctl(): 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 Deleted 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff Without this patch, the last netlink notification is not sent. Signed-off-by: Vincent Bernat Reviewed-by: Stephen Hemminger Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 7970f8540cbb..66cd98772051 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -102,6 +102,9 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) else ret = br_del_if(br, dev); + if (!ret) + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); + return ret; } From 2ed343f98178ff232b504bbd006b34db07835082 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 16:29:33 +0530 Subject: [PATCH 0219/2177] net:nfc: use setup_timer Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index 5cf33df888c3..e5e23c2cbe74 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - init_timer(&dev->check_pres_timer); - dev->check_pres_timer.data = (unsigned long)dev; - dev->check_pres_timer.function = nfc_check_pres_timeout; + setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout, + (unsigned long)dev); INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); } From 802be571348681a8ee052850e47b9652de7e05d4 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:17:55 +0530 Subject: [PATCH 0220/2177] net: wan : hdlc: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_fr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 78596e42a3f3..425a47ffed25 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1003,11 +1003,10 @@ static void fr_start(struct net_device *dev) state(hdlc)->n391cnt = 0; state(hdlc)->txseq = state(hdlc)->rxseq = 0; - init_timer(&state(hdlc)->timer); + setup_timer(&state(hdlc)->timer, fr_timer, + (unsigned long)dev); /* First poll after 1 s */ state(hdlc)->timer.expires = jiffies + HZ; - state(hdlc)->timer.function = fr_timer; - state(hdlc)->timer.data = (unsigned long)dev; add_timer(&state(hdlc)->timer); } else fr_set_link_state(1, dev); From e53a84b2b47119cbb895a4a83e6675ec50cf9fe5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:24:15 +0530 Subject: [PATCH 0221/2177] net: usb: catc: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index dbc90313f472..aeb62e17d19d 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -805,9 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id spin_lock_init(&catc->tx_lock); spin_lock_init(&catc->ctrl_lock); - init_timer(&catc->timer); - catc->timer.data = (long) catc; - catc->timer.function = catc_stats_timer; + setup_timer(&catc->timer, catc_stats_timer, (long)catc); catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL); catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL); From 8447779637172809060e5064afdf52f16a09aa13 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:32:58 +0530 Subject: [PATCH 0222/2177] net: ti: netcp: use setup_timer Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 28cb38af1a34..4ad821655e51 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, } spin_unlock_bh(&gbe_dev->hw_stats_lock); - init_timer(&gbe_dev->timer); - gbe_dev->timer.data = (unsigned long)gbe_dev; - gbe_dev->timer.function = netcp_ethss_timer; + setup_timer(&gbe_dev->timer, netcp_ethss_timer, + (unsigned long)gbe_dev); gbe_dev->timer.expires = jiffies + GBE_TIMER_INTERVAL; add_timer(&gbe_dev->timer); *inst_priv = gbe_dev; From a1f3316dd7b5ce740c774697c664e2e60d095794 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 21 Sep 2017 18:18:23 -0700 Subject: [PATCH 0223/2177] ipv4: Move fib_has_custom_local_routes outside of IP_MULTIPLE_TABLES. > net/ipv4/fib_frontend.c: In function 'fib_validate_source': > net/ipv4/fib_frontend.c:411:16: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > if (net->ipv4.fib_has_custom_local_routes) > ^ > net/ipv4/fib_frontend.c: In function 'inet_rtm_newroute': > net/ipv4/fib_frontend.c:773:12: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > net->ipv4.fib_has_custom_local_routes = true; > ^ Fixes: 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20720721da4b..8387f099115e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,10 +49,10 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif + bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif From a424120765009df13957af20ba4d18e531cfe643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2017 07:50:28 -0700 Subject: [PATCH 0224/2177] net: vrf: remove skb_dst_force() after skb_dst_set() skb_dst_set(skb, dst) installs a normal (refcounted) dst, there is no point using skb_dst_force(skb) Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9b243e6f3008..cc18b7b11612 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -132,7 +132,6 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan(skb); skb_dst_set(skb, dst); - skb_dst_force(skb); /* set pkt_type to avoid skb hitting packet taps twice - * once on Tx and again in Rx processing From 411d788a23f7e20b8fc51b548c7204fdecc9d22e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Sep 2017 17:36:08 +0200 Subject: [PATCH 0225/2177] test_rhashtable: remove initdata annotation kbuild test robot reported a section mismatch warning w. gcc 4.x: WARNING: lib/test_rhashtable.o(.text+0x139e): Section mismatch in reference from the function rhltable_insert.clone.3() to the variable .init.data:rhlt so remove this annotation. Fixes: cdd4de372ea06 ("test_rhashtable: add test case for rhl_table interface") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index de4d0584631a..8e83cbdc049c 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -251,7 +251,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, } static struct rhashtable ht; -static struct rhltable rhlt __initdata; +static struct rhltable rhlt; static int __init test_rhltable(unsigned int entries) { From dd5437974964c759570d68e50ce13c313808f79a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 22 Sep 2017 14:38:58 +0800 Subject: [PATCH 0226/2177] virtio-net: correctly set xdp_xmit for mergeable buffer We should set xdp_xmit only when xdp_do_redirect() succeed. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f6c1f135a024..dd14a4547932 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -721,7 +721,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (err) + if (!err) *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; From 242c1a28eb61cb34974e8aa05235d84355940a8a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 22 Sep 2017 10:25:22 +0800 Subject: [PATCH 0227/2177] net: Remove useless function skb_header_release There is no one which would invokes the function skb_header_release. So just remove it now. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 2 +- include/linux/skbuff.h | 19 ------------------- net/batman-adv/soft-interface.c | 2 +- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 522d2900cd1d..f4d7362eb325 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -245,7 +245,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, * - We are allowed to put 4 bytes at tail if skb_cloned() * is false (and if we have 4 bytes of tailroom) * - * TCP packets for example are cloned, but skb_header_release() + * TCP packets for example are cloned, but __skb_header_release() * was called in tcp stack, allowing us to use headroom for our needs. */ if (!skb_header_cloned(skb) && diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 492828801acb..f9db5539a6fb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1456,28 +1456,9 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/** - * skb_header_release - release reference to header - * @skb: buffer to operate on - * - * Drop a reference to the header part of the buffer. This is done - * by acquiring a payload reference. You must not read from the header - * part of skb->data after this. - * Note : Check if you can use __skb_header_release() instead. - */ -static inline void skb_header_release(struct sk_buff *skb) -{ - BUG_ON(skb->nohdr); - skb->nohdr = 1; - atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); -} - /** * __skb_header_release - release reference to header * @skb: buffer to operate on - * - * Variant of skb_header_release() assuming skb is private to caller. - * We can avoid one atomic operation. */ static inline void __skb_header_release(struct sk_buff *skb) { diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 10f7edfb176e..c2c986746d0b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -69,7 +69,7 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) int result; /* TODO: We must check if we can release all references to non-payload - * data using skb_header_release in our skbs to allow skb_cow_header to + * data using __skb_header_release in our skbs to allow skb_cow_header to * work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer From 52a59bd509e3dc458be99dcf333b778e6e3b3749 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:33:29 +0300 Subject: [PATCH 0228/2177] net: use 32-bit arithmetic while allocating net device Private part of allocation is never big enough to warrant size_t. Space savings: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-10 (-10) function old new delta alloc_netdev_mqs 1120 1110 -10 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index fb766d906148..37d6a3c59e69 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7989,7 +7989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - size_t alloc_size; + unsigned int alloc_size; struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); From 6a345b3dbd1ed83a7877993c6e23c977a84bb483 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:13 +0530 Subject: [PATCH 0229/2177] cxgb4: add tc flower offload skeleton Add basic skeleton to prepare for offloading tc-flower flows. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 4 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 22 +++++++ .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 57 +++++++++++++++++++ .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 46 +++++++++++++++ 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 817212702f0a..fecd7aab673b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4_ptp.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ + cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5fe81a4e26a6..5079246aaf2c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -79,6 +79,7 @@ #include "l2t.h" #include "sched.h" #include "cxgb4_tc_u32.h" +#include "cxgb4_tc_flower.h" #include "cxgb4_ptp.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -2873,6 +2874,25 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int cxgb_setup_tc_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) +{ + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return cxgb4_tc_flower_replace(dev, cls_flower); + case TC_CLSFLOWER_DESTROY: + return cxgb4_tc_flower_destroy(dev, cls_flower); + case TC_CLSFLOWER_STATS: + return cxgb4_tc_flower_stats(dev, cls_flower); + default: + return -EOPNOTSUPP; + } +} + static int cxgb_setup_tc_cls_u32(struct net_device *dev, struct tc_cls_u32_offload *cls_u32) { @@ -2907,6 +2927,8 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_CLSU32: return cxgb_setup_tc_cls_u32(dev, type_data); + case TC_SETUP_CLSFLOWER: + return cxgb_setup_tc_flower(dev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c new file mode 100644 index 000000000000..16dff71e4d02 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cxgb4.h" +#include "cxgb4_tc_flower.h" + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} + +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} + +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h new file mode 100644 index 000000000000..b321fc205b5a --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -0,0 +1,46 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_FLOWER_H +#define __CXGB4_TC_FLOWER_H + +#include + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls); +#endif /* __CXGB4_TC_FLOWER_H */ From 62488e4b53ae02d82ac000f91ec82b5cfb41d6f2 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:14 +0530 Subject: [PATCH 0230/2177] cxgb4: add basic tc flower offload support Add support to add/remove flows for offload. Following match and action are supported for offloading a flow: Match: ether-protocol, IPv4/IPv6 addresses, L4 ports (TCP/UDP) Action: drop, redirect to another port on the device. The qualifying flows can have accompanying mask information. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 24 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 + .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 280 +++++++++++++++++- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 17 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + 6 files changed, 325 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c4e997fdff64..d05721b06178 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -905,6 +905,9 @@ struct adapter { /* TC u32 offload */ struct cxgb4_tc_u32_table *tc_u32; struct chcr_stats_debug chcr_stats; + + /* TC flower offload */ + DECLARE_HASHTABLE(flower_anymatch_tbl, 9); }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 97ead2c66751..f3de9cdd4181 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -148,6 +148,30 @@ static int get_filter_steerq(struct net_device *dev, return iq; } +int cxgb4_get_free_ftid(struct net_device *dev, int family) +{ + struct adapter *adap = netdev2adap(dev); + struct tid_info *t = &adap->tids; + int ftid; + + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) { + ftid = find_first_zero_bit(t->ftid_bmap, t->nftids); + if (ftid >= t->nftids) + ftid = -1; + } else { + ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2); + if (ftid < 0) + goto out_unlock; + + /* this is only a lookup, keep the found region unallocated */ + bitmap_release_region(t->ftid_bmap, ftid, 2); + } +out_unlock: + spin_unlock_bh(&t->ftid_lock); + return ftid; +} + static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) { spin_lock_bh(&t->ftid_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5079246aaf2c..ce33c3addc2b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5105,6 +5105,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!adapter->tc_u32) dev_warn(&pdev->dev, "could not offload tc u32, continuing\n"); + + cxgb4_init_tc_flower(adapter); } if (is_offload(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 16dff71e4d02..dda34d5a52fb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -38,16 +38,287 @@ #include "cxgb4.h" #include "cxgb4_tc_flower.h" +static struct ch_tc_flower_entry *allocate_flower_entry(void) +{ + struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); + return new; +} + +/* Must be called with either RTNL or rcu_read_lock */ +static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, + unsigned long flower_cookie) +{ + struct ch_tc_flower_entry *flower_entry; + + hash_for_each_possible_rcu(adap->flower_anymatch_tbl, flower_entry, + link, flower_cookie) + if (flower_entry->tc_flower_cookie == flower_cookie) + return flower_entry; + return NULL; +} + +static void cxgb4_process_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + u16 addr_type = 0; + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + cls->key); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->mask); + u16 ethtype_key = ntohs(key->n_proto); + u16 ethtype_mask = ntohs(mask->n_proto); + + if (ethtype_key == ETH_P_ALL) { + ethtype_key = 0; + ethtype_mask = 0; + } + + fs->val.ethtype = ethtype_key; + fs->mask.ethtype = ethtype_mask; + fs->val.proto = key->ip_proto; + fs->mask.proto = mask->ip_proto; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->mask); + fs->type = 0; + memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst)); + memcpy(&fs->val.fip[0], &key->src, sizeof(key->src)); + memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->mask); + + fs->type = 1; + memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst)); + memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src)); + memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src)); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key, *mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->mask); + fs->val.lport = cpu_to_be16(key->dst); + fs->mask.lport = cpu_to_be16(mask->dst); + fs->val.fport = cpu_to_be16(key->src); + fs->mask.fport = cpu_to_be16(mask->src); + } + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs->val.iport = netdev2pinfo(dev)->port_id; + fs->mask.iport = ~0; +} + +static int cxgb4_validate_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + if (cls->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + netdev_warn(dev, "Unsupported key used: 0x%x\n", + cls->dissector->used_keys); + return -EOPNOTSUPP; + } + return 0; +} + +static void cxgb4_process_flow_actions(struct net_device *in, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + } else if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out = __dev_get_by_index(dev_net(in), + ifindex); + struct port_info *pi = netdev_priv(out); + + fs->action = FILTER_SWITCH; + fs->eport = pi->port_id; + } + } +} + +static int cxgb4_validate_flow_actions(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) { + /* Do nothing */ + } else if (is_tcf_mirred_egress_redirect(a)) { + struct adapter *adap = netdev2adap(dev); + struct net_device *n_dev; + unsigned int i, ifindex; + bool found = false; + + ifindex = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (ifindex == n_dev->ifindex) { + found = true; + break; + } + } + + /* If interface doesn't belong to our hw, then + * the provided output port is not valid + */ + if (!found) { + netdev_err(dev, "%s: Out port invalid\n", + __func__); + return -EINVAL; + } + } else { + netdev_err(dev, "%s: Unsupported action\n", __func__); + return -EOPNOTSUPP; + } + } + return 0; +} + int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + struct ch_filter_specification *fs; + struct filter_ctx ctx; + int fidx; + int ret; + + if (cxgb4_validate_flow_actions(dev, cls)) + return -EOPNOTSUPP; + + if (cxgb4_validate_flow_match(dev, cls)) + return -EOPNOTSUPP; + + ch_flower = allocate_flower_entry(); + if (!ch_flower) { + netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__); + return -ENOMEM; + } + + fs = &ch_flower->fs; + fs->hitcnts = 1; + cxgb4_process_flow_actions(dev, cls, fs); + cxgb4_process_flow_match(dev, cls, fs); + + fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); + if (fidx < 0) { + netdev_err(dev, "%s: No fidx for offload.\n", __func__); + ret = -ENOMEM; + goto free_entry; + } + + init_completion(&ctx.completion); + ret = __cxgb4_set_filter(dev, fidx, fs, &ctx); + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) { + ret = -ETIMEDOUT; + goto free_entry; + } + + ret = ctx.result; + /* Check if hw returned error for filter creation */ + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + INIT_HLIST_NODE(&ch_flower->link); + ch_flower->tc_flower_cookie = cls->cookie; + ch_flower->filter_id = ctx.tid; + hash_add_rcu(adap->flower_anymatch_tbl, &ch_flower->link, cls->cookie); + + return ret; + +free_entry: + kfree(ch_flower); + return ret; } int cxgb4_tc_flower_destroy(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) + return -ENOENT; + + ret = cxgb4_del_filter(dev, ch_flower->filter_id); + if (ret) + goto err; + + hash_del_rcu(&ch_flower->link); + kfree_rcu(ch_flower, rcu); + +err: + return ret; } int cxgb4_tc_flower_stats(struct net_device *dev, @@ -55,3 +326,8 @@ int cxgb4_tc_flower_stats(struct net_device *dev, { return -EOPNOTSUPP; } + +void cxgb4_init_tc_flower(struct adapter *adap) +{ + hash_init(adap->flower_anymatch_tbl); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index b321fc205b5a..6145a9e056eb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -37,10 +37,27 @@ #include +struct ch_tc_flower_stats { + u64 packet_count; + u64 byte_count; + u64 last_used; +}; + +struct ch_tc_flower_entry { + struct ch_filter_specification fs; + struct ch_tc_flower_stats stats; + unsigned long tc_flower_cookie; + struct hlist_node link; + struct rcu_head rcu; + u32 filter_id; +}; + int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls); int cxgb4_tc_flower_destroy(struct net_device *dev, struct tc_cls_flower_offload *cls); int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls); + +void cxgb4_init_tc_flower(struct adapter *adap); #endif /* __CXGB4_TC_FLOWER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 84541fce94c5..88487095d14f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -212,6 +212,7 @@ struct filter_ctx { struct ch_filter_specification; +int cxgb4_get_free_ftid(struct net_device *dev, int family); int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); From cf2885a70fc71d5f6b434b86eedfc18ad66ba6f6 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:15 +0530 Subject: [PATCH 0231/2177] cxgb4: add support to offload action vlan Add support for offloading tc-flower flows having vlan actions: pop, push and modify. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index dda34d5a52fb..e42d2efc9ea2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -34,6 +34,7 @@ #include #include +#include #include "cxgb4.h" #include "cxgb4_tc_flower.h" @@ -185,6 +186,27 @@ static void cxgb4_process_flow_actions(struct net_device *in, fs->action = FILTER_SWITCH; fs->eport = pi->port_id; + } else if (is_tcf_vlan(a)) { + u32 vlan_action = tcf_vlan_action(a); + u8 prio = tcf_vlan_push_prio(a); + u16 vid = tcf_vlan_push_vid(a); + u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid; + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + fs->newvlan |= VLAN_REMOVE; + break; + case TCA_VLAN_ACT_PUSH: + fs->newvlan |= VLAN_INSERT; + fs->vlan = vlan_tci; + break; + case TCA_VLAN_ACT_MODIFY: + fs->newvlan |= VLAN_REWRITE; + fs->vlan = vlan_tci; + break; + default: + break; + } } } } @@ -222,6 +244,26 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, __func__); return -EINVAL; } + } else if (is_tcf_vlan(a)) { + u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); + u32 vlan_action = tcf_vlan_action(a); + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + break; + case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: + if (proto != ETH_P_8021Q) { + netdev_err(dev, "%s: Unsupported vlan proto\n", + __func__); + return -EOPNOTSUPP; + } + break; + default: + netdev_err(dev, "%s: Unsupported vlan action\n", + __func__); + return -EOPNOTSUPP; + } } else { netdev_err(dev, "%s: Unsupported action\n", __func__); return -EOPNOTSUPP; From e0f911c81e93fc23fe1a4fb0318ff1c3b1c9027f Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:16 +0530 Subject: [PATCH 0232/2177] cxgb4: fetch stats for offloaded tc flower flows Add support to retrieve stats from hardware for offloaded tc flower flows. Also, poll for the stats of offloaded flows via timer callback. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 76 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 + .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 79 ++++++++++++++++++- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 + 6 files changed, 161 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d05721b06178..0db3ab6ad094 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -908,6 +908,7 @@ struct adapter { /* TC flower offload */ DECLARE_HASHTABLE(flower_anymatch_tbl, 9); + struct timer_list flower_stats_timer; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index f3de9cdd4181..15361ca2857c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -148,6 +148,82 @@ static int get_filter_steerq(struct net_device *dev, return iq; } +static int get_filter_count(struct adapter *adapter, unsigned int fidx, + u64 *pkts, u64 *bytes) +{ + unsigned int tcb_base, tcbaddr; + unsigned int word_offset; + struct filter_entry *f; + __be64 be64_byte_count; + int ret; + + tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); + if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids - 1)) && + fidx >= adapter->tids.nftids) + return -E2BIG; + + f = &adapter->tids.ftid_tab[fidx]; + if (!f->valid) + return -EINVAL; + + tcbaddr = tcb_base + f->tid * TCB_SIZE; + + spin_lock(&adapter->win0_lock); + if (is_t4(adapter->params.chip)) { + __be64 be64_count; + + /* T4 doesn't maintain byte counts in hw */ + *bytes = 0; + + /* Get pkts */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_count), + (__be32 *)&be64_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = be64_to_cpu(be64_count); + } else { + __be32 be32_count; + + /* Get bytes */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_byte_count), + &be64_byte_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *bytes = be64_to_cpu(be64_byte_count); + + /* Get pkts */ + word_offset = 6; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be32_count), + &be32_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = (u64)be32_to_cpu(be32_count); + } + +out: + spin_unlock(&adapter->win0_lock); + return ret; +} + +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt) +{ + struct adapter *adapter = netdev2adap(dev); + + return get_filter_count(adapter, fidx, hitcnt, bytecnt); +} + int cxgb4_get_free_ftid(struct net_device *dev, int family) { struct adapter *adap = netdev2adap(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index ce33c3addc2b..aa93ae95d3b9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4637,6 +4637,7 @@ static void free_some_resources(struct adapter *adapter) kvfree(adapter->l2t); t4_cleanup_sched(adapter); kvfree(adapter->tids.tid_tab); + cxgb4_cleanup_tc_flower(adapter); cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index e42d2efc9ea2..a36bd66d2834 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -39,9 +39,12 @@ #include "cxgb4.h" #include "cxgb4_tc_flower.h" +#define STATS_CHECK_PERIOD (HZ / 2) + static struct ch_tc_flower_entry *allocate_flower_entry(void) { struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); + spin_lock_init(&new->lock); return new; } @@ -363,13 +366,87 @@ err: return ret; } +void ch_flower_stats_cb(unsigned long data) +{ + struct adapter *adap = (struct adapter *)data; + struct ch_tc_flower_entry *flower_entry; + struct ch_tc_flower_stats *ofld_stats; + unsigned int i; + u64 packets; + u64 bytes; + int ret; + + rcu_read_lock(); + hash_for_each_rcu(adap->flower_anymatch_tbl, i, flower_entry, link) { + ret = cxgb4_get_filter_counters(adap->port[0], + flower_entry->filter_id, + &packets, &bytes); + if (!ret) { + spin_lock(&flower_entry->lock); + ofld_stats = &flower_entry->stats; + + if (ofld_stats->prev_packet_count != packets) { + ofld_stats->prev_packet_count = packets; + ofld_stats->last_used = jiffies; + } + spin_unlock(&flower_entry->lock); + } + } + rcu_read_unlock(); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); +} + int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_stats *ofld_stats; + struct ch_tc_flower_entry *ch_flower; + u64 packets; + u64 bytes; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) { + ret = -ENOENT; + goto err; + } + + ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id, + &packets, &bytes); + if (ret < 0) + goto err; + + spin_lock_bh(&ch_flower->lock); + ofld_stats = &ch_flower->stats; + if (ofld_stats->packet_count != packets) { + if (ofld_stats->prev_packet_count != packets) + ofld_stats->last_used = jiffies; + tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count, + packets - ofld_stats->packet_count, + ofld_stats->last_used); + + ofld_stats->packet_count = packets; + ofld_stats->byte_count = bytes; + ofld_stats->prev_packet_count = packets; + } + spin_unlock_bh(&ch_flower->lock); + return 0; + +err: + return ret; } void cxgb4_init_tc_flower(struct adapter *adap) { hash_init(adap->flower_anymatch_tbl); + setup_timer(&adap->flower_stats_timer, ch_flower_stats_cb, + (unsigned long)adap); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); +} + +void cxgb4_cleanup_tc_flower(struct adapter *adap) +{ + if (adap->flower_stats_timer.function) + del_timer_sync(&adap->flower_stats_timer); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 6145a9e056eb..604feffc752e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -38,6 +38,7 @@ #include struct ch_tc_flower_stats { + u64 prev_packet_count; u64 packet_count; u64 byte_count; u64 last_used; @@ -49,6 +50,7 @@ struct ch_tc_flower_entry { unsigned long tc_flower_cookie; struct hlist_node link; struct rcu_head rcu; + spinlock_t lock; /* lock for stats */ u32 filter_id; }; @@ -60,4 +62,5 @@ int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls); void cxgb4_init_tc_flower(struct adapter *adap); +void cxgb4_cleanup_tc_flower(struct adapter *adap); #endif /* __CXGB4_TC_FLOWER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 88487095d14f..52324c77a4fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -221,6 +221,8 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, int cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs); int cxgb4_del_filter(struct net_device *dev, int filter_id); +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { From 39e50d9637f9a31967ac9e956b829ee8b50a750f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 22 Sep 2017 10:20:21 -0400 Subject: [PATCH 0233/2177] forcedeth: optimize the xmit/rx with unlikely In the xmit/rx fastpath, the function dma_map_single rarely fails. Therefore, add an unlikely() optimization to this error check conditional. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index b605b94f4567..a235e8881af9 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1817,8 +1817,8 @@ static int nv_alloc_rx(struct net_device *dev) skb->data, skb_tailroom(skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_rx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -1858,8 +1858,8 @@ static int nv_alloc_rx_optimized(struct net_device *dev) skb->data, skb_tailroom(skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_rx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -2227,8 +2227,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2268,7 +2268,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) frag, offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -2377,8 +2378,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2419,7 +2420,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -5075,8 +5077,8 @@ static int nv_loopback_test(struct net_device *dev) test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data, skb_tailroom(tx_skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - test_dma_addr)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + test_dma_addr))) { dev_kfree_skb_any(tx_skb); goto out; } From 373b8eeb0c15d4ce58f62afb12f213b1b5bbc3d3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:45:43 +0300 Subject: [PATCH 0234/2177] xfrm: make aead_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f002a2c5e33c..0be4c547e383 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1764,7 +1764,7 @@ static inline int xfrm_acquire_is_on(struct net *net) } #endif -static inline int aead_len(struct xfrm_algo_aead *alg) +static inline unsigned int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2bfbd9121e3b..32c67b80c3ce 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < aead_len(algp)) + if (nla_len(rt) < (int)aead_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; From 06cd22f830f28023b82455c82c7db65fc6cf9c16 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:46:30 +0300 Subject: [PATCH 0235/2177] xfrm: make xfrm_alg_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0be4c547e383..2abc0e117f11 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1769,7 +1769,7 @@ static inline unsigned int aead_len(struct xfrm_algo_aead *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_len(const struct xfrm_algo *alg) +static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 32c67b80c3ce..09512d90e6a5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_len(algp)) return -EINVAL; switch (type) { From 1bd963a72e859d194d87a5a2a8839efee7e23102 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:09 +0300 Subject: [PATCH 0236/2177] xfrm: make xfrm_alg_auth_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2abc0e117f11..5d5e11b653eb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1774,7 +1774,7 @@ static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) +static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 09512d90e6a5..465c23d4ea78 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_auth_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; From 5e708e47c44366453c33373940455a75fd33f635 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:50 +0300 Subject: [PATCH 0237/2177] xfrm: make xfrm_replay_state_esn_len() return unsigned int Replay detection bitmaps can't have negative length. Comparisons with nla_len() are left signed just in case negative value can sneak in there. Propagate unsignedness for code size savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-38 (-38) function old new delta xfrm_state_construct 1802 1800 -2 xfrm_update_ae_params 295 289 -6 xfrm_state_migrate 1345 1339 -6 xfrm_replay_notify_esn 349 337 -12 xfrm_replay_notify_bmp 345 333 -12 Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5d5e11b653eb..3cb618bbcfa5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1779,7 +1779,7 @@ static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) +static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) { return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 465c23d4ea78..83718db5ec9c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) return -EINVAL; - if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && nla_len(rt) != sizeof(*rs)) return -EINVAL; } @@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; - int ulen; + unsigned int ulen; if (!replay_esn || !rp) return 0; @@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es /* Check the overall length and the internal bitmap length to avoid * potential overflow. */ - if (nla_len(rp) < ulen || + if (nla_len(rp) < (int)ulen || xfrm_replay_state_esn_len(replay_esn) != ulen || replay_esn->bmp_len != up->bmp_len) return -EINVAL; @@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; - int klen, ulen; + unsigned int klen, ulen; if (!rta) return 0; up = nla_data(rta); klen = xfrm_replay_state_esn_len(up); - ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up); p = kzalloc(klen, GFP_KERNEL); if (!p) From a1b831f23a2b3306ecf275872a54abcf97b0b977 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:48:54 +0300 Subject: [PATCH 0238/2177] xfrm: eradicate size_t All netlink message sizes are a) unsigned, b) can't be >= 4GB in size because netlink doesn't support >= 64KB messages in the first place. All those size_t across the code are a scam especially across networking which likes to work with small numbers like 1500 or 65536. Propagate unsignedness and flip some "int" to "unsigned int" as well. This is preparation to switching nlmsg_new() to "unsigned int". Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 83718db5ec9c..f7a12aa15086 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn return 0; } -static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) +static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { - int len = 0; + unsigned int len = 0; if (xfrm_ctx) { len += sizeof(struct xfrm_user_sec_ctx); @@ -1031,7 +1031,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, return -1; } -static inline size_t xfrm_spdinfo_msgsize(void) +static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_spdinfo)) @@ -1157,7 +1157,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } -static inline size_t xfrm_sadinfo_msgsize(void) +static inline unsigned int xfrm_sadinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_sadhinfo)) @@ -1633,7 +1633,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return copy_sec_ctx(xp->security, skb); return 0; } -static inline size_t userpolicy_type_attrsize(void) +static inline unsigned int userpolicy_type_attrsize(void) { #ifdef CONFIG_XFRM_SUB_POLICY return nla_total_size(sizeof(struct xfrm_userpolicy_type)); @@ -1850,9 +1850,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) +static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) { - size_t replay_size = x->replay_esn ? + unsigned int replay_size = x->replay_esn ? xfrm_replay_state_esn_len(x->replay_esn) : sizeof(struct xfrm_replay_state); @@ -2321,8 +2321,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, - int with_encp) +static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma, + int with_encp) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) @@ -2566,7 +2566,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } -static inline size_t xfrm_expire_msgsize(void) +static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + nla_total_size(sizeof(struct xfrm_mark)); @@ -2654,9 +2654,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c) return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } -static inline size_t xfrm_sa_len(struct xfrm_state *x) +static inline unsigned int xfrm_sa_len(struct xfrm_state *x) { - size_t l = 0; + unsigned int l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); if (x->aalg) { @@ -2701,8 +2701,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = xfrm_sa_len(x); - int headlen, err; + unsigned int len = xfrm_sa_len(x); + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { @@ -2776,8 +2777,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c } -static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, - struct xfrm_policy *xp) +static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2900,7 +2901,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return xp; } -static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2957,13 +2958,14 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int headlen, err; + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -3070,7 +3072,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct } -static inline size_t xfrm_report_msgsize(void) +static inline unsigned int xfrm_report_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); } @@ -3115,7 +3117,7 @@ static int xfrm_send_report(struct net *net, u8 proto, return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } -static inline size_t xfrm_mapping_msgsize(void) +static inline unsigned int xfrm_mapping_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); } From 50c8cd44ed5fcd2cbbfe19e5b1eb680aa4440186 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Fri, 1 Sep 2017 12:13:34 +0530 Subject: [PATCH 0239/2177] ath9k: remove cast to void pointer casting to void pointer from any pointer type and vice-versa is done implicitly and therefore casting is not needed in such a case. Done using Coccinellle. Semantic Patch used : @r@ expression x; void* e; type T; identifier f; @@ ( *((T *)e) | ((T *)x)[...] | ((T *)x)->f | - (T *) e ) Signed-off-by: Himanshu Jha Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_mac.c | 4 ++-- drivers/net/wireless/ath/ath9k/dfs.c | 2 +- drivers/net/wireless/ath/ath9k/hif_usb.c | 8 +++---- .../net/wireless/ath/ath9k/htc_drv_beacon.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 24 +++++++++---------- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 6 ++--- drivers/net/wireless/ath/ath9k/init.c | 8 +++---- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/ath9k/mci.c | 2 +- drivers/net/wireless/ath/ath9k/wmi.c | 4 ++-- 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c index b3f20b3c0210..e1fe7a7c3ad8 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL(ath9k_hw_addrxbuf_edma); int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs, void *buf_addr) { - struct ar9003_rxs *rxsp = (struct ar9003_rxs *) buf_addr; + struct ar9003_rxs *rxsp = buf_addr; unsigned int phyerr; if ((rxsp->status11 & AR_RxDone) == 0) @@ -610,7 +610,7 @@ void ath9k_hw_setup_statusring(struct ath_hw *ah, void *ts_start, ah->ts_paddr_start = ts_paddr_start; ah->ts_paddr_end = ts_paddr_start + (size * sizeof(struct ar9003_txs)); ah->ts_size = size; - ah->ts_ring = (struct ar9003_txs *) ts_start; + ah->ts_ring = ts_start; ath9k_hw_reset_txstatus_ring(ah); } diff --git a/drivers/net/wireless/ath/ath9k/dfs.c b/drivers/net/wireless/ath/ath9k/dfs.c index 1ece42c2443d..40a397fd0e0e 100644 --- a/drivers/net/wireless/ath/ath9k/dfs.c +++ b/drivers/net/wireless/ath/ath9k/dfs.c @@ -326,7 +326,7 @@ void ath9k_dfs_process_phyerr(struct ath_softc *sc, void *data, if (ard.ext_rssi & 0x80) ard.ext_rssi = 0; - vdata_end = (char *)data + datalen; + vdata_end = data + datalen; ard.pulse_bw_info = vdata_end[-1]; ard.pulse_length_ext = vdata_end[-2]; ard.pulse_length_pri = vdata_end[-3]; diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index c5f4dd808745..56676eaff24c 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -424,7 +424,7 @@ static int hif_usb_send_tx(struct hif_device_usb *hif_dev, struct sk_buff *skb) static void hif_usb_start(void *hif_handle) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; unsigned long flags; hif_dev->flags |= HIF_USB_START; @@ -436,7 +436,7 @@ static void hif_usb_start(void *hif_handle) static void hif_usb_stop(void *hif_handle) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL; unsigned long flags; @@ -457,7 +457,7 @@ static void hif_usb_stop(void *hif_handle) static int hif_usb_send(void *hif_handle, u8 pipe_id, struct sk_buff *skb) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; int ret = 0; switch (pipe_id) { @@ -492,7 +492,7 @@ static inline bool check_index(struct sk_buff *skb, u8 idx) static void hif_usb_sta_drain(void *hif_handle, u8 idx) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; struct sk_buff *skb, *tmp; unsigned long flags; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 2c0e4d26e8f9..f20c839aeda2 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -384,7 +384,7 @@ void ath9k_htc_set_tsfadjust(struct ath9k_htc_priv *priv, static void ath9k_htc_beacon_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - bool *beacon_configured = (bool *)data; + bool *beacon_configured = data; struct ath9k_htc_vif *avp = (struct ath9k_htc_vif *) vif->drv_priv; if (vif->type == NL80211_IFTYPE_STATION && diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index da2164b0cccc..57de6fa6ff03 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -233,7 +233,7 @@ static void ath9k_reg_notifier(struct wiphy *wiphy, static unsigned int ath9k_regread(void *hw_priv, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; __be32 val, reg = cpu_to_be32(reg_offset); @@ -255,7 +255,7 @@ static unsigned int ath9k_regread(void *hw_priv, u32 reg_offset) static void ath9k_multi_regread(void *hw_priv, u32 *addr, u32 *val, u16 count) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; __be32 tmpaddr[8]; @@ -301,7 +301,7 @@ static void ath9k_regwrite_multi(struct ath_common *common) static void ath9k_regwrite_single(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; const __be32 buf[2] = { @@ -322,7 +322,7 @@ static void ath9k_regwrite_single(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_regwrite_buffer(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -345,7 +345,7 @@ static void ath9k_regwrite_buffer(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_regwrite(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -357,7 +357,7 @@ static void ath9k_regwrite(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_enable_regwrite_buffer(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -366,7 +366,7 @@ static void ath9k_enable_regwrite_buffer(void *hw_priv) static void ath9k_regwrite_flush(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -383,7 +383,7 @@ static void ath9k_regwrite_flush(void *hw_priv) static void ath9k_reg_rmw_buffer(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; u32 rsp_status; @@ -421,7 +421,7 @@ static void ath9k_reg_rmw_buffer(void *hw_priv, static void ath9k_reg_rmw_flush(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; u32 rsp_status; @@ -453,7 +453,7 @@ static void ath9k_reg_rmw_flush(void *hw_priv) static void ath9k_enable_rmw_buffer(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -466,7 +466,7 @@ static void ath9k_enable_rmw_buffer(void *hw_priv) static u32 ath9k_reg_rmw_single(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; struct register_rmw buf, buf_ret; @@ -490,7 +490,7 @@ static u32 ath9k_reg_rmw_single(void *hw_priv, static u32 ath9k_reg_rmw(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a553c91d41a1..f808e5833d7e 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1483,7 +1483,7 @@ static void ath9k_htc_set_bssid(struct ath9k_htc_priv *priv) static void ath9k_htc_bss_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *)data; + struct ath9k_htc_priv *priv = data; struct ath_common *common = ath9k_hw_common(priv->ah); struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index b38a586ea59a..2682da02da54 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -641,7 +641,7 @@ static struct sk_buff* ath9k_htc_tx_get_packet(struct ath9k_htc_priv *priv, void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) { - struct wmi_event_txstatus *txs = (struct wmi_event_txstatus *)wmi_event; + struct wmi_event_txstatus *txs = wmi_event; struct __wmi_event_txstatus *__txs; struct sk_buff *skb; struct ath9k_htc_tx_event *tx_pend; @@ -684,7 +684,7 @@ void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) void ath9k_htc_txep(void *drv_priv, struct sk_buff *skb, enum htc_endpoint_id ep_id, bool txok) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) drv_priv; + struct ath9k_htc_priv *priv = drv_priv; struct ath9k_htc_tx_ctl *tx_ctl; struct sk_buff_head *epid_queue; @@ -1103,7 +1103,7 @@ requeue: void ath9k_htc_rxep(void *drv_priv, struct sk_buff *skb, enum htc_endpoint_id ep_id) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *)drv_priv; + struct ath9k_htc_priv *priv = drv_priv; struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_rxbuf *rxbuf = NULL, *tmp_buf = NULL; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index bb7936090b91..4bc403620cf9 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -117,7 +117,7 @@ static const struct ath_ps_ops ath9k_ps_ops = { static void ath9k_iowrite32(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; @@ -132,7 +132,7 @@ static void ath9k_iowrite32(void *hw_priv, u32 val, u32 reg_offset) static unsigned int ath9k_ioread32(void *hw_priv, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; u32 val; @@ -172,7 +172,7 @@ static unsigned int __ath9k_reg_rmw(struct ath_softc *sc, u32 reg_offset, static unsigned int ath9k_reg_rmw(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; unsigned long uninitialized_var(flags); @@ -275,7 +275,7 @@ int ath_descdma_setup(struct ath_softc *sc, struct ath_descdma *dd, if (!dd->dd_desc) return -ENOMEM; - ds = (u8 *) dd->dd_desc; + ds = dd->dd_desc; ath_dbg(common, CONFIG, "%s DMA map: %p (%u) -> %llx (%u)\n", name, ds, (u32) dd->dd_desc_len, ito64(dd->dd_desc_paddr), /*XXX*/(u32) dd->dd_desc_len); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 8b4ac7f0a09b..9c24bc077e6b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1193,7 +1193,7 @@ void ath9k_calculate_summary_state(struct ath_softc *sc, static void ath9k_tpc_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - int *power = (int *)data; + int *power = data; if (*power < vif->bss_conf.txpower) *power = vif->bss_conf.txpower; diff --git a/drivers/net/wireless/ath/ath9k/mci.c b/drivers/net/wireless/ath/ath9k/mci.c index cf23fd815211..39d46c203f6b 100644 --- a/drivers/net/wireless/ath/ath9k/mci.c +++ b/drivers/net/wireless/ath/ath9k/mci.c @@ -453,7 +453,7 @@ int ath_mci_setup(struct ath_softc *sc) mci->sched_buf.bf_len = ATH_MCI_SCHED_BUF_SIZE; mci->gpm_buf.bf_len = ATH_MCI_GPM_BUF_SIZE; - mci->gpm_buf.bf_addr = (u8 *)mci->sched_buf.bf_addr + mci->sched_buf.bf_len; + mci->gpm_buf.bf_addr = mci->sched_buf.bf_addr + mci->sched_buf.bf_len; mci->gpm_buf.bf_paddr = mci->sched_buf.bf_paddr + mci->sched_buf.bf_len; ret = ar9003_mci_setup(sc->sc_ah, mci->gpm_buf.bf_paddr, diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 64a354fa78ab..b0b5579b7560 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -159,7 +159,7 @@ void ath9k_wmi_event_tasklet(unsigned long data) switch (cmd_id) { case WMI_SWBA_EVENTID: - swba = (struct wmi_event_swba *) wmi_event; + swba = wmi_event; ath9k_htc_swba(priv, swba); break; case WMI_FATAL_EVENTID: @@ -207,7 +207,7 @@ static void ath9k_wmi_rsp_callback(struct wmi *wmi, struct sk_buff *skb) static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb, enum htc_endpoint_id epid) { - struct wmi *wmi = (struct wmi *) priv; + struct wmi *wmi = priv; struct wmi_cmd_hdr *hdr; u16 cmd_id; From 896cbefadf62b431cba469089cc3bd5ecfe69ed5 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sun, 3 Sep 2017 14:19:31 +0200 Subject: [PATCH 0240/2177] ath9k: Use ARRAY_SIZE macro Use ARRAY_SIZE macro, rather than explicitly coding some variant of it yourself. Found with: find -type f -name "*.c" -o -name "*.h" | xargs perl -p -i -e 's/\bsizeof\s*\(\s*(\w+)\s*\)\s*\ /\s*sizeof\s*\(\s*\1\s*\[\s*0\s*\]\s*\) /ARRAY_SIZE(\1)/g' and manual check/verification. Signed-off-by: Thomas Meyer Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 3dbfd86ebe36..c2e210c0a770 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -15,6 +15,7 @@ */ #include +#include #include "hw.h" #include "ar9003_phy.h" #include "ar9003_eeprom.h" @@ -2946,14 +2947,12 @@ static const struct ar9300_eeprom *ar9300_eep_templates[] = { static const struct ar9300_eeprom *ar9003_eeprom_struct_find_by_id(int id) { -#define N_LOOP (sizeof(ar9300_eep_templates) / sizeof(ar9300_eep_templates[0])) int it; - for (it = 0; it < N_LOOP; it++) + for (it = 0; it < ARRAY_SIZE(ar9300_eep_templates); it++) if (ar9300_eep_templates[it]->templateVersion == id) return ar9300_eep_templates[it]; return NULL; -#undef N_LOOP } static int ath9k_hw_ar9300_check_eeprom(struct ath_hw *ah) From 496cbf3ebb6b37f89a8db84b3ed021eea2205fba Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Thu, 14 Sep 2017 00:31:52 +0530 Subject: [PATCH 0241/2177] ath10k: make ath10k_hw_ce_regs const Make them const as they are not modified in the file referencing them. They are only stored in the const field 'hw_ce_reg' of an ath10k structure. Also, make the declarations in the header const. Signed-off-by: Bhumika Goyal Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/hw.c | 4 ++-- drivers/net/wireless/ath/ath10k/hw.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index a860691d635d..07df7c6bc05b 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -310,7 +310,7 @@ static struct ath10k_hw_ce_dst_src_wm_regs wcn3990_wm_dst_ring = { .wm_high = &wcn3990_dst_wm_high, }; -struct ath10k_hw_ce_regs wcn3990_ce_regs = { +const struct ath10k_hw_ce_regs wcn3990_ce_regs = { .sr_base_addr = 0x00000000, .sr_size_addr = 0x00000008, .dr_base_addr = 0x0000000c, @@ -457,7 +457,7 @@ static struct ath10k_hw_ce_dst_src_wm_regs qcax_wm_dst_ring = { .wm_high = &qcax_dst_wm_high, }; -struct ath10k_hw_ce_regs qcax_ce_regs = { +const struct ath10k_hw_ce_regs qcax_ce_regs = { .sr_base_addr = 0x00000000, .sr_size_addr = 0x00000004, .dr_base_addr = 0x00000008, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 0c089f6dd3d9..f80840fae517 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -369,8 +369,8 @@ extern const struct ath10k_hw_values qca99x0_values; extern const struct ath10k_hw_values qca9888_values; extern const struct ath10k_hw_values qca4019_values; extern const struct ath10k_hw_values wcn3990_values; -extern struct ath10k_hw_ce_regs wcn3990_ce_regs; -extern struct ath10k_hw_ce_regs qcax_ce_regs; +extern const struct ath10k_hw_ce_regs wcn3990_ce_regs; +extern const struct ath10k_hw_ce_regs qcax_ce_regs; void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, u32 cc, u32 rcc, u32 cc_prev, u32 rcc_prev); From ba24d63dd3748bae134365e3bcfd9c13b4e3c3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 18 Sep 2017 22:59:19 +0300 Subject: [PATCH 0242/2177] ath9k: Avoid a potential deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep warns us that sc_pm_lock and cc_lock can cause a deadlock when cc_lock is acquired by itself with interrupts enabled. Disable irqs whenever taking cc_lock to avoid this. [ 19.094524] kworker/u2:0/5 just changed the state of lock: [ 19.094578] (&(&sc->sc_pm_lock)->rlock){-.-...}, at: [] ath_isr+0x15e/0x200 [ath9k] [ 19.094674] but this lock took another, HARDIRQ-unsafe lock in the past: [ 19.094731] (&(&common->cc_lock)->rlock){+.-...} [ 19.094741] and interrupts could create inverse lock ordering between them. [ 19.094866] other info that might help us debug this: [ 19.094926] Possible interrupt unsafe locking scenario: [ 19.094985] CPU0 CPU1 [ 19.095036] ---- ---- [ 19.095086] lock(&(&common->cc_lock)->rlock); [ 19.095197] local_irq_disable(); [ 19.095305] lock(&(&sc->sc_pm_lock)->rlock); [ 19.095423] lock(&(&common->cc_lock)->rlock); [ 19.095539] [ 19.095636] lock(&(&sc->sc_pm_lock)->rlock); [ 19.095745] *** DEADLOCK *** [ 19.095965] 3 locks held by kworker/u2:0/5: [ 19.096067] #0: ("%s"wiphy_name(local->hw.wiphy)){.+.+.+}, at: [] process_one_work+0x127/0x580 [ 19.096260] #1: ((&local->dynamic_ps_enable_work)){+.+...}, at: [] process_one_work+0x127/0x580 [ 19.096447] #2: (&sc->mutex){+.+...}, at: [] ath9k_config+0x30/0x1d0 [ath9k] [ 19.096639] the shortest dependencies between 2nd lock and 1st lock: [ 19.096813] -> (&(&common->cc_lock)->rlock){+.-...} ops: 38 { [ 19.096816] HARDIRQ-ON-W at: [ 19.096816] __lock_acquire+0x57e/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_bh+0x3f/0x50 [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k] [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] IN-SOFTIRQ-W at: [ 19.096816] __lock_acquire+0x55a/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock+0x3c/0x50 [ 19.096816] ath_ps_full_sleep+0x24/0x70 [ath9k] [ 19.096816] call_timer_fn+0xa4/0x300 [ 19.096816] run_timer_softirq+0x1b1/0x560 [ 19.096816] __do_softirq+0xb0/0x430 [ 19.096816] do_softirq_own_stack+0x33/0x40 [ 19.096816] irq_exit+0xad/0xc0 [ 19.096816] smp_apic_timer_interrupt+0x31/0x40 [ 19.096816] apic_timer_interrupt+0x37/0x3c [ 19.096816] wp_page_copy+0xb8/0x580 [ 19.096816] do_wp_page+0x64/0x420 [ 19.096816] handle_mm_fault+0x430/0x990 [ 19.096816] __do_page_fault+0x18b/0x430 [ 19.096816] do_page_fault+0xb/0x10 [ 19.096816] common_exception+0x62/0x6a [ 19.096816] INITIAL USE at: [ 19.096816] __lock_acquire+0x204/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_bh+0x3f/0x50 [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k] [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] } [ 19.096816] ... key at: [] __key.61991+0x0/0xffffc96c [ath9k] [ 19.096816] ... acquired at: [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock+0x3c/0x50 [ 19.096816] ath9k_ps_wakeup+0x85/0xe0 [ath9k] [ 19.096816] ath9k_bss_info_changed+0x2a/0x1b0 [ath9k] [ 19.096816] ieee80211_bss_info_change_notify+0xf3/0x360 [mac80211] [ 19.096816] ieee80211_recalc_txpower+0x33/0x40 [mac80211] [ 19.096816] ieee80211_set_tx_power+0x45/0x1d0 [mac80211] [ 19.096816] cfg80211_wext_siwtxpower+0xd3/0x350 [cfg80211] [ 19.096816] ioctl_standard_call+0x4e/0x400 [ 19.096816] wext_handle_ioctl+0xf4/0x190 [ 19.096816] dev_ioctl+0xb7/0x630 [ 19.096816] sock_ioctl+0x13e/0x2d0 [ 19.096816] do_vfs_ioctl+0x84/0x750 [ 19.096816] SyS_ioctl+0x34/0x60 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] -> (&(&sc->sc_pm_lock)->rlock){-.-...} ops: 597 { [ 19.096816] IN-HARDIRQ-W at: [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ret_from_fork+0x19/0x24 [ 19.096816] IN-SOFTIRQ-W at: [ 19.096816] __lock_acquire+0x55a/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k] [ 19.096816] ath9k_tasklet+0x42/0x260 [ath9k] [ 19.096816] tasklet_action+0x196/0x1e0 [ 19.096816] __do_softirq+0xb0/0x430 [ 19.096816] do_softirq_own_stack+0x33/0x40 [ 19.096816] irq_exit+0xad/0xc0 [ 19.096816] do_IRQ+0x65/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] get_page_from_freelist+0x20a/0x970 [ 19.096816] __alloc_pages_nodemask+0xca/0xed0 [ 19.096816] __get_free_pages+0x14/0x30 [ 19.096816] pgd_alloc+0x1d/0x160 [ 19.096816] mm_init.isra.47+0x13a/0x1b0 [ 19.096816] copy_process.part.54+0xb55/0x1700 [ 19.096816] _do_fork+0xd4/0x6a0 [ 19.096816] SyS_clone+0x27/0x30 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] INITIAL USE at: [ 19.096816] __lock_acquire+0x204/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k] [ 19.096816] ath9k_start+0x29/0x1f0 [ath9k] [ 19.096816] drv_start+0x71/0x270 [mac80211] [ 19.096816] ieee80211_do_open+0x31f/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] } [ 19.096816] ... key at: [] __key.61994+0x0/0xffffc984 [ath9k] [ 19.096816] ... acquired at: [ 19.096816] check_usage_forwards+0x118/0x120 [ 19.096816] mark_lock+0x2e4/0x590 [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ret_from_fork+0x19/0x24 [ 19.096816] stack backtrace: [ 19.096816] CPU: 0 PID: 5 Comm: kworker/u2:0 Not tainted 4.13.0-mgm-ovl+ #51 [ 19.096816] Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26 05/10/2004 [ 19.096816] Workqueue: phy0 ieee80211_dynamic_ps_enable_work [mac80211] [ 19.096816] Call Trace: [ 19.096816] [ 19.096816] dump_stack+0x16/0x19 [ 19.096816] print_irq_inversion_bug.part.37+0x16c/0x179 [ 19.096816] check_usage_forwards+0x118/0x120 [ 19.096816] ? ret_from_fork+0x19/0x24 [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 19.096816] mark_lock+0x2e4/0x590 [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k] [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k] [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] ? handle_nested_irq+0x100/0x100 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] EIP: _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] EFLAGS: 00000286 CPU: 0 [ 19.096816] EAX: f60a3600 EBX: 00000286 ECX: 00000006 EDX: 00000001 [ 19.096816] ESI: f46c9e68 EDI: f46c8620 EBP: f60b5e8c ESP: f60b5e84 [ 19.096816] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ? ieee80211_hw_config+0x1db/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ? process_one_work+0x580/0x580 [ 19.096816] ? kthread_create_on_node+0x30/0x30 [ 19.096816] ret_from_fork+0x19/0x24 Cc: QCA ath9k Development Cc: Kalle Valo Cc: netdev@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/channel.c | 5 +++-- drivers/net/wireless/ath/ath9k/link.c | 4 ++-- drivers/net/wireless/ath/ath9k/main.c | 16 +++++++++------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index f0439f2d566b..fad020aa222e 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -29,6 +29,7 @@ static int ath_set_channel(struct ath_softc *sc) struct cfg80211_chan_def *chandef = &sc->cur_chan->chandef; struct ieee80211_channel *chan = chandef->chan; int pos = chan->hw_value; + unsigned long flags; int old_pos = -1; int r; @@ -42,9 +43,9 @@ static int ath_set_channel(struct ath_softc *sc) chan->center_freq, chandef->width); /* update survey stats for the old channel before switching */ - spin_lock_bh(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_update_survey_stats(sc); - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); ath9k_cmn_get_channel(hw, ah, chandef); diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index 27c50562dc47..3f4f01c829f0 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -367,10 +367,10 @@ void ath_ani_calibrate(unsigned long data) /* Call ANI routine if necessary */ if (aniflag) { - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath9k_hw_ani_monitor(ah, ah->curchan); ath_update_survey_stats(sc); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); } /* Perform calibration if necessary */ diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 9c24bc077e6b..918773a9231b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -97,11 +97,12 @@ void ath_ps_full_sleep(unsigned long data) { struct ath_softc *sc = (struct ath_softc *) data; struct ath_common *common = ath9k_hw_common(sc->sc_ah); + unsigned long flags; bool reset; - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_hw_cycle_counters_update(common); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); ath9k_hw_setrxabort(sc->sc_ah, 1); ath9k_hw_stopdmarecv(sc->sc_ah, &reset); @@ -394,10 +395,10 @@ void ath9k_tasklet(unsigned long data) if ((ah->config.hw_hang_checks & HW_BB_WATCHDOG) && (status & ATH9K_INT_BB_WATCHDOG)) { - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_hw_cycle_counters_update(common); ar9003_hw_bb_watchdog_dbg_info(ah); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); if (ar9003_hw_bb_watchdog_check(ah)) { type = RESET_TYPE_BB_WATCHDOG; @@ -1955,12 +1956,13 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + unsigned long flags; int pos; if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; - spin_lock_bh(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); if (idx == 0) ath_update_survey_stats(sc); @@ -1974,7 +1976,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, sband = hw->wiphy->bands[NL80211_BAND_5GHZ]; if (!sband || idx >= sband->n_channels) { - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); return -ENOENT; } @@ -1982,7 +1984,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, pos = chan->hw_value; memcpy(survey, &sc->survey[pos], sizeof(*survey)); survey->channel = chan; - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); return 0; } From fd52bdae9ab06b4f444b77d833f403ad07b9e72e Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 19 Sep 2017 14:33:23 +0200 Subject: [PATCH 0243/2177] wcn36xx: Disable 5GHz for wcn3620 wcn3620 can only operate on 2.4GHz band due to RF limitation. If wcn36xx digital block is associated with an external IRIS RF module, retrieve the id and disable 5GHz band in case of wcn3620 id. Signed-off-by: Loic Poulain Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wcn36xx/main.c | 12 +++++++++++- drivers/net/wireless/ath/wcn36xx/wcn36xx.h | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 35bd50bcbbd5..fc4130829489 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1136,7 +1136,8 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn) BIT(NL80211_IFTYPE_MESH_POINT); wcn->hw->wiphy->bands[NL80211_BAND_2GHZ] = &wcn_band_2ghz; - wcn->hw->wiphy->bands[NL80211_BAND_5GHZ] = &wcn_band_5ghz; + if (wcn->rf_id != RF_IRIS_WCN3620) + wcn->hw->wiphy->bands[NL80211_BAND_5GHZ] = &wcn_band_5ghz; wcn->hw->wiphy->max_scan_ssids = WCN36XX_MAX_SCAN_SSIDS; wcn->hw->wiphy->max_scan_ie_len = WCN36XX_MAX_SCAN_IE_LEN; @@ -1169,6 +1170,7 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, struct platform_device *pdev) { struct device_node *mmio_node; + struct device_node *iris_node; struct resource *res; int index; int ret; @@ -1231,6 +1233,14 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, goto unmap_ccu; } + /* External RF module */ + iris_node = of_find_node_by_name(mmio_node, "iris"); + if (iris_node) { + if (of_device_is_compatible(iris_node, "qcom,wcn3620")) + wcn->rf_id = RF_IRIS_WCN3620; + of_node_put(iris_node); + } + of_node_put(mmio_node); return 0; diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index 6aefba4c0cda..81017e6703b4 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -94,6 +94,9 @@ enum wcn36xx_ampdu_state { #define WCN36XX_FLAGS(__wcn) (__wcn->hw->flags) #define WCN36XX_MAX_POWER(__wcn) (__wcn->hw->conf.chandef.chan->max_power) +#define RF_UNKNOWN 0x0000 +#define RF_IRIS_WCN3620 0x3620 + static inline void buff_to_be(u32 *buf, size_t len) { int i; @@ -241,6 +244,9 @@ struct wcn36xx { struct sk_buff *tx_ack_skb; + /* RF module */ + unsigned rf_id; + #ifdef CONFIG_WCN36XX_DEBUGFS /* Debug file system entry */ struct wcn36xx_dfs_entry dfs; From 30ac407639390fab45b003003619e430c6f9f728 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:13:06 +0530 Subject: [PATCH 0244/2177] brcmfmac: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 613caca7dc02..5adce0e36fe5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4144,10 +4144,8 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) init_waitqueue_head(&bus->dcmd_resp_wait); /* Set up the watchdog timer */ - init_timer(&bus->timer); - bus->timer.data = (unsigned long)bus; - bus->timer.function = brcmf_sdio_watchdog; - + setup_timer(&bus->timer, brcmf_sdio_watchdog, + (unsigned long)bus); /* Initialize watchdog thread */ init_completion(&bus->watchdog_wait); bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread, From d5633bb2c62a95be5f049ee1e7b9ae0869a93c41 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 15:03:16 +0100 Subject: [PATCH 0245/2177] brcmsmac: make const array ucode_ofdm_rates static, reduces object code size Don't populate const array ucode_ofdm_rates on the stack, instead make it static. Makes the object code smaller by 100 bytes: Before: text data bss dec hex filename 39482 564 0 40046 9c6e phy_cmn.o After text data bss dec hex filename 39326 620 0 39946 9c0a phy_cmn.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c index 1c4e9dd57960..3a13d176b221 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c @@ -1916,7 +1916,7 @@ void wlc_phy_txpower_update_shm(struct brcms_phy *pi) pi->hwpwr_txcur); for (j = TXP_FIRST_OFDM; j <= TXP_LAST_OFDM; j++) { - const u8 ucode_ofdm_rates[] = { + static const u8 ucode_ofdm_rates[] = { 0x0c, 0x12, 0x18, 0x24, 0x30, 0x48, 0x60, 0x6c }; offset = wlapi_bmac_rate_shm_offset( From e0a576d7478230aaf51c479db57a2c5ce68e2575 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Fri, 8 Sep 2017 12:30:00 +0200 Subject: [PATCH 0246/2177] rtl8xxxu: Don't printk raw binary if serial number is not burned in. I assume that a blank efuse comes with all ones, thus I did not bother recognizing other possible junk values. This matches 100% of dongles I've seen (a single Gembird 8192eu). Signed-off-by: Adam Borowski Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 80fee699f58a..38b2ba1ac6f8 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -614,7 +614,10 @@ static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv) dev_info(&priv->udev->dev, "Vendor: %.7s\n", efuse->vendor_name); dev_info(&priv->udev->dev, "Product: %.11s\n", efuse->device_name); - dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial); + if (memchr_inv(efuse->serial, 0xff, 11)) + dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial); + else + dev_info(&priv->udev->dev, "Serial not available.\n"); if (rtl8xxxu_debug & RTL8XXXU_DEBUG_EFUSE) { unsigned char *raw = priv->efuse_wifi.raw; From 7dfb0ebd022b15dcafd513ae9270599799764e50 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 22:05:00 +0100 Subject: [PATCH 0247/2177] mwifiex: make const array tos_to_ac static, reduces object code size Don't populate the read-only const array tos_to_ac on the stack, instead make it static. Makes the object code smaller by 250 bytes: Before: text data bss dec hex filename 26104 2720 128 28952 7118 wmm.o After: text data bss dec hex filename 25758 2816 128 28702 701e wmm.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/wmm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index 0edd26881321..936a0a841af8 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -359,7 +359,8 @@ static enum mwifiex_wmm_ac_e mwifiex_wmm_convert_tos_to_ac(struct mwifiex_adapter *adapter, u32 tos) { /* Map of TOS UP values to WMM AC */ - const enum mwifiex_wmm_ac_e tos_to_ac[] = { WMM_AC_BE, + static const enum mwifiex_wmm_ac_e tos_to_ac[] = { + WMM_AC_BE, WMM_AC_BK, WMM_AC_BK, WMM_AC_BE, From 192524a4992a8e638a05147f02d6e42cb2d485e1 Mon Sep 17 00:00:00 2001 From: Pavani Muthyala Date: Thu, 21 Sep 2017 18:20:34 +0530 Subject: [PATCH 0248/2177] rsi: add version information We will dump information about firmware version, firmware file name and operating mode during initialization. Signed-off-by: Pavani Muthyala Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_debugfs.c | 19 ++++++---------- drivers/net/wireless/rsi/rsi_91x_hal.c | 13 +++++++++++ drivers/net/wireless/rsi/rsi_91x_main.c | 25 ++++++++++++++++++++++ drivers/net/wireless/rsi/rsi_hal.h | 3 +++ drivers/net/wireless/rsi/rsi_main.h | 14 ++++++++---- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c index e98eb55c26cc..8c6ca8e689e4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c +++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c @@ -83,19 +83,12 @@ static int rsi_version_read(struct seq_file *seq, void *data) { struct rsi_common *common = seq->private; - common->driver_ver.major = 0; - common->driver_ver.minor = 1; - common->driver_ver.release_num = 0; - common->driver_ver.patch_num = 0; - seq_printf(seq, "Driver : %x.%d.%d.%d\nLMAC : %d.%d.%d.%d\n", - common->driver_ver.major, - common->driver_ver.minor, - common->driver_ver.release_num, - common->driver_ver.patch_num, - common->fw_ver.major, - common->fw_ver.minor, - common->fw_ver.release_num, - common->fw_ver.patch_num); + seq_printf(seq, "LMAC : %d.%d.%d.%d\n", + common->lmac_ver.major, + common->lmac_ver.minor, + common->lmac_ver.release_num, + common->lmac_ver.patch_num); + return 0; } diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 7e8e5d4f5f3d..71b02add81c8 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -769,6 +769,7 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content, static int rsi_load_firmware(struct rsi_hw *adapter) { + struct rsi_common *common = adapter->priv; struct rsi_host_intf_ops *hif_ops = adapter->host_intf_ops; const struct firmware *fw_entry = NULL; u32 regout_val = 0, content_size; @@ -844,6 +845,18 @@ static int rsi_load_firmware(struct rsi_hw *adapter) content_size = fw_entry->size; rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", content_size); + /* Get the firmware version */ + common->lmac_ver.ver.info.fw_ver[0] = + flash_content[LMAC_VER_OFFSET] & 0xFF; + common->lmac_ver.ver.info.fw_ver[1] = + flash_content[LMAC_VER_OFFSET + 1] & 0xFF; + common->lmac_ver.major = flash_content[LMAC_VER_OFFSET + 2] & 0xFF; + common->lmac_ver.release_num = + flash_content[LMAC_VER_OFFSET + 3] & 0xFF; + common->lmac_ver.minor = flash_content[LMAC_VER_OFFSET + 4] & 0xFF; + common->lmac_ver.patch_num = 0; + rsi_print_version(common); + status = bl_write_header(adapter, flash_content, content_size); if (status) { rsi_dbg(ERR_ZONE, diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index b57bfdcf3549..71b8cfb8252e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -20,6 +20,7 @@ #include #include "rsi_mgmt.h" #include "rsi_common.h" +#include "rsi_hal.h" u32 rsi_zone_enabled = /* INFO_ZONE | INIT_ZONE | @@ -56,6 +57,30 @@ void rsi_dbg(u32 zone, const char *fmt, ...) } EXPORT_SYMBOL_GPL(rsi_dbg); +static char *opmode_str(int oper_mode) +{ + switch (oper_mode) { + case RSI_DEV_OPMODE_WIFI_ALONE: + return "Wi-Fi alone"; + } + + return "Unknown"; +} + +void rsi_print_version(struct rsi_common *common) +{ + rsi_dbg(ERR_ZONE, "================================================\n"); + rsi_dbg(ERR_ZONE, "================ RSI Version Info ==============\n"); + rsi_dbg(ERR_ZONE, "================================================\n"); + rsi_dbg(ERR_ZONE, "FW Version\t: %d.%d.%d\n", + common->lmac_ver.major, common->lmac_ver.minor, + common->lmac_ver.release_num); + rsi_dbg(ERR_ZONE, "Operating mode\t: %d [%s]", + common->oper_mode, opmode_str(common->oper_mode)); + rsi_dbg(ERR_ZONE, "Firmware file\t: %s", common->priv->fw_file_name); + rsi_dbg(ERR_ZONE, "================================================\n"); +} + /** * rsi_prepare_skb() - This function prepares the skb. * @common: Pointer to the driver private structure. diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index ad0d6537a678..a09d36b6b765 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -101,6 +101,9 @@ #define BBP_INFO_40MHZ 0x6 +#define FW_FLASH_OFFSET 0x820 +#define LMAC_VER_OFFSET (FW_FLASH_OFFSET + 0x200) + struct bl_header { __le32 flags; __le32 image_no; diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index 34089ab111aa..a118b7aaeca0 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -113,8 +113,13 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); struct version_info { u16 major; u16 minor; - u16 release_num; - u16 patch_num; + u8 release_num; + u8 patch_num; + union { + struct { + u8 fw_ver[8]; + } info; + } ver; } __packed; struct skb_info { @@ -199,8 +204,7 @@ struct rsi_common { struct vif_priv vif_info[RSI_MAX_VIFS]; bool mgmt_q_block; - struct version_info driver_ver; - struct version_info fw_ver; + struct version_info lmac_ver; struct rsi_thread tx_thread; struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 2]; @@ -334,6 +338,8 @@ struct rsi_hw { int (*determine_event_timeout)(struct rsi_hw *adapter); }; +void rsi_print_version(struct rsi_common *common); + struct rsi_host_intf_ops { int (*read_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len); int (*write_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len); From 20db073327365f41e9b14feacb450df06758b520 Mon Sep 17 00:00:00 2001 From: Karun Eagalapati Date: Thu, 21 Sep 2017 18:21:28 +0530 Subject: [PATCH 0249/2177] rsi: sdio suspend and resume support SDIO suspend and resume handlers are implemented and verified that device works after suspend/resume cycle. Signed-off-by: Karun Eagalapati Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 128 +++++++++++++++++++++++- drivers/net/wireless/rsi/rsi_sdio.h | 2 + 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 8d3a4839b6ef..b3f8006c0e9b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1059,16 +1059,136 @@ static void rsi_disconnect(struct sdio_func *pfunction) } #ifdef CONFIG_PM +static int rsi_set_sdio_pm_caps(struct rsi_hw *adapter) +{ + struct rsi_91x_sdiodev *dev = + (struct rsi_91x_sdiodev *)adapter->rsi_dev; + struct sdio_func *func = dev->pfunction; + int ret; + + ret = sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); + if (ret) + rsi_dbg(ERR_ZONE, "Set sdio keep pwr flag failed: %d\n", ret); + + return ret; +} + +static int rsi_sdio_disable_interrupts(struct sdio_func *pfunc) +{ + struct rsi_hw *adapter = sdio_get_drvdata(pfunc); + u8 isr_status = 0, data = 0; + int ret; + unsigned long t1; + + rsi_dbg(INFO_ZONE, "Waiting for interrupts to be cleared.."); + t1 = jiffies; + do { + rsi_sdio_read_register(adapter, RSI_FN1_INT_REGISTER, + &isr_status); + rsi_dbg(INFO_ZONE, "."); + } while ((isr_status) && (jiffies_to_msecs(jiffies - t1) < 20)); + rsi_dbg(INFO_ZONE, "Interrupts cleared\n"); + + sdio_claim_host(pfunc); + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", + __func__); + goto done; + } + + data &= RSI_INT_ENABLE_MASK; + ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to write to int enable register\n", + __func__); + goto done; + } + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", + __func__); + goto done; + } + rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data); + +done: + sdio_release_host(pfunc); + return ret; +} + +static int rsi_sdio_enable_interrupts(struct sdio_func *pfunc) +{ + u8 data; + int ret; + + sdio_claim_host(pfunc); + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", __func__); + goto done; + } + + data |= ~RSI_INT_ENABLE_MASK & 0xff; + + ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to write to int enable register\n", + __func__); + goto done; + } + + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", __func__); + goto done; + } + rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data); + +done: + sdio_release_host(pfunc); + return ret; +} + static int rsi_suspend(struct device *dev) { - /* Not yet implemented */ - return -ENOSYS; + int ret; + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common; + + if (!adapter) { + rsi_dbg(ERR_ZONE, "Device is not ready\n"); + return -ENODEV; + } + common = adapter->priv; + rsi_sdio_disable_interrupts(pfunction); + + ret = rsi_set_sdio_pm_caps(adapter); + if (ret) + rsi_dbg(INFO_ZONE, + "Setting power management caps failed\n"); + common->fsm_state = FSM_CARD_NOT_READY; + + return 0; } static int rsi_resume(struct device *dev) { - /* Not yet implemented */ - return -ENOSYS; + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common = adapter->priv; + + common->fsm_state = FSM_MAC_INIT_DONE; + rsi_sdio_enable_interrupts(pfunction); + + return 0; } static const struct dev_pm_ops rsi_pm_ops = { diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h index 95e4bed57baf..49c549ba6682 100644 --- a/drivers/net/wireless/rsi/rsi_sdio.h +++ b/drivers/net/wireless/rsi/rsi_sdio.h @@ -48,6 +48,8 @@ enum sdio_interrupt_type { #define RSI_DEVICE_BUFFER_STATUS_REGISTER 0xf3 #define RSI_FN1_INT_REGISTER 0xf9 +#define RSI_INT_ENABLE_REGISTER 0x04 +#define RSI_INT_ENABLE_MASK 0xfc #define RSI_SD_REQUEST_MASTER 0x10000 /* FOR SD CARD ONLY */ From 77d68147745b98c25c9400f3880906333845f230 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:29 -0700 Subject: [PATCH 0250/2177] qtnfmac: convert channel width from bitfiled to simple enum This will allow to use qlink channel width values to specify BW setting corresponding to enum nl80211_chan_width. Current user is converted to apply BIT() macro manually to each individual qlink_channel_width enumeration value. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 16 ++++++++-------- .../net/wireless/quantenna/qtnfmac/qlink_util.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index a8242f678496..a69fd470c115 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -108,14 +108,14 @@ enum qlink_sta_flags { }; enum qlink_channel_width { - QLINK_CHAN_WIDTH_5 = BIT(0), - QLINK_CHAN_WIDTH_10 = BIT(1), - QLINK_CHAN_WIDTH_20_NOHT = BIT(2), - QLINK_CHAN_WIDTH_20 = BIT(3), - QLINK_CHAN_WIDTH_40 = BIT(4), - QLINK_CHAN_WIDTH_80 = BIT(5), - QLINK_CHAN_WIDTH_80P80 = BIT(6), - QLINK_CHAN_WIDTH_160 = BIT(7), + QLINK_CHAN_WIDTH_5 = 0, + QLINK_CHAN_WIDTH_10, + QLINK_CHAN_WIDTH_20_NOHT, + QLINK_CHAN_WIDTH_20, + QLINK_CHAN_WIDTH_40, + QLINK_CHAN_WIDTH_80, + QLINK_CHAN_WIDTH_80P80, + QLINK_CHAN_WIDTH_160, }; /* QLINK Command messages related definitions diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index cf024c995fd6..369b77d7864e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -49,28 +49,28 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask) { u8 result = 0; - if (qlink_mask & QLINK_CHAN_WIDTH_5) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_5)) result |= BIT(NL80211_CHAN_WIDTH_5); - if (qlink_mask & QLINK_CHAN_WIDTH_10) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_10)) result |= BIT(NL80211_CHAN_WIDTH_10); - if (qlink_mask & QLINK_CHAN_WIDTH_20_NOHT) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20_NOHT)) result |= BIT(NL80211_CHAN_WIDTH_20_NOHT); - if (qlink_mask & QLINK_CHAN_WIDTH_20) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20)) result |= BIT(NL80211_CHAN_WIDTH_20); - if (qlink_mask & QLINK_CHAN_WIDTH_40) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_40)) result |= BIT(NL80211_CHAN_WIDTH_40); - if (qlink_mask & QLINK_CHAN_WIDTH_80) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80)) result |= BIT(NL80211_CHAN_WIDTH_80); - if (qlink_mask & QLINK_CHAN_WIDTH_80P80) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80P80)) result |= BIT(NL80211_CHAN_WIDTH_80P80); - if (qlink_mask & QLINK_CHAN_WIDTH_160) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_160)) result |= BIT(NL80211_CHAN_WIDTH_160); return result; From fac7f9bf14814fd2c722eaeec18ca78be2177d84 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:30 -0700 Subject: [PATCH 0251/2177] qtnfmac: make "Channel change" event report full channel info Specifically, it has to report center frequency, secondary center frequency (for 80+80) and BW. Introduce channel definition structure to qlink and modify channel change event processing function accordingly. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/event.c | 29 +++++------ .../net/wireless/quantenna/qtnfmac/qlink.h | 18 ++++++- .../wireless/quantenna/qtnfmac/qlink_util.c | 52 +++++++++++++++++++ .../wireless/quantenna/qtnfmac/qlink_util.h | 4 ++ 4 files changed, 85 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 0fc2814eafad..df58e83fbbb8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -25,6 +25,7 @@ #include "trans.h" #include "util.h" #include "event.h" +#include "qlink_util.h" static int qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif, @@ -359,39 +360,35 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, { struct wiphy *wiphy = priv_to_wiphy(mac); struct cfg80211_chan_def chandef; - struct ieee80211_channel *chan; struct qtnf_vif *vif; - int freq; int i; if (len < sizeof(*data)) { - pr_err("payload is too short\n"); + pr_err("MAC%u: payload is too short\n", mac->macid); return -EINVAL; } - freq = le32_to_cpu(data->freq); - chan = ieee80211_get_channel(wiphy, freq); - if (!chan) { - pr_err("channel at %d MHz not found\n", freq); + qlink_chandef_q2cfg(wiphy, &data->chan, &chandef); + + if (!cfg80211_chandef_valid(&chandef)) { + pr_err("MAC%u: bad channel f1=%u f2=%u bw=%u\n", mac->macid, + chandef.center_freq1, chandef.center_freq2, + chandef.width); return -EINVAL; } - pr_debug("MAC%d switch to new channel %u MHz\n", mac->macid, freq); + pr_debug("MAC%d: new channel ieee=%u freq1=%u freq2=%u bw=%u\n", + mac->macid, chandef.chan->hw_value, chandef.center_freq1, + chandef.center_freq2, chandef.width); if (mac->status & QTNF_MAC_CSA_ACTIVE) { mac->status &= ~QTNF_MAC_CSA_ACTIVE; - if (chan->hw_value != mac->csa_chandef.chan->hw_value) + if (chandef.chan->hw_value != mac->csa_chandef.chan->hw_value) pr_warn("unexpected switch to %u during CSA to %u\n", - chan->hw_value, + chandef.chan->hw_value, mac->csa_chandef.chan->hw_value); } - /* FIXME: need to figure out proper nl80211_channel_type value */ - cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - /* fall-back to minimal safe chandef description */ - if (!cfg80211_chandef_valid(&chandef)) - cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); for (i = 0; i < QTNF_MAX_INTF; i++) { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index a69fd470c115..59368549c086 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -118,6 +118,20 @@ enum qlink_channel_width { QLINK_CHAN_WIDTH_160, }; +/** + * struct qlink_chandef - qlink channel definition + * + * @center_freq1: center frequency of first segment + * @center_freq2: center frequency of second segment (80+80 only) + * @width: channel width, one of @enum qlink_channel_width + */ +struct qlink_chandef { + __le16 center_freq1; + __le16 center_freq2; + u8 width; + u8 rsvd[3]; +} __packed; + /* QLINK Command messages related definitions */ @@ -764,11 +778,11 @@ struct qlink_event_bss_leave { /** * struct qlink_event_freq_change - data for QLINK_EVENT_FREQ_CHANGE event * - * @freq: new operating frequency in MHz + * @chan: new operating channel definition */ struct qlink_event_freq_change { struct qlink_event ehdr; - __le32 freq; + struct qlink_chandef chan; } __packed; enum qlink_rxmgmt_flags { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index 369b77d7864e..3c1db5bd6393 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -75,3 +75,55 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask) return result; } + +static enum nl80211_chan_width qlink_chanwidth_to_nl(u8 qlw) +{ + switch (qlw) { + case QLINK_CHAN_WIDTH_20_NOHT: + return NL80211_CHAN_WIDTH_20_NOHT; + case QLINK_CHAN_WIDTH_20: + return NL80211_CHAN_WIDTH_20; + case QLINK_CHAN_WIDTH_40: + return NL80211_CHAN_WIDTH_40; + case QLINK_CHAN_WIDTH_80: + return NL80211_CHAN_WIDTH_80; + case QLINK_CHAN_WIDTH_80P80: + return NL80211_CHAN_WIDTH_80P80; + case QLINK_CHAN_WIDTH_160: + return NL80211_CHAN_WIDTH_160; + case QLINK_CHAN_WIDTH_5: + return NL80211_CHAN_WIDTH_5; + case QLINK_CHAN_WIDTH_10: + return NL80211_CHAN_WIDTH_10; + default: + return -1; + } +} + +void qlink_chandef_q2cfg(struct wiphy *wiphy, + const struct qlink_chandef *qch, + struct cfg80211_chan_def *chdef) +{ + chdef->center_freq1 = le16_to_cpu(qch->center_freq1); + chdef->center_freq2 = le16_to_cpu(qch->center_freq2); + chdef->width = qlink_chanwidth_to_nl(qch->width); + + switch (chdef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + chdef->chan = ieee80211_get_channel(wiphy, chdef->center_freq1); + break; + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + chdef->chan = ieee80211_get_channel(wiphy, + chdef->center_freq1 - 10); + break; + default: + chdef->chan = NULL; + break; + } +} diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index de06c1e20b5b..5e49a8a09977 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -19,6 +19,7 @@ #include #include +#include #include "qlink.h" @@ -62,5 +63,8 @@ static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb, u16 qlink_iface_type_to_nl_mask(u16 qlink_type); u8 qlink_chan_width_mask_to_nl(u16 qlink_mask); +void qlink_chandef_q2cfg(struct wiphy *wiphy, + const struct qlink_chandef *qch, + struct cfg80211_chan_def *chdef); #endif /* _QTN_FMAC_QLINK_UTIL_H_ */ From 9e5478b608b5853dc877ea9c80be24349bcb453f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:31 -0700 Subject: [PATCH 0252/2177] qtnfmac: retrieve current channel info from EP Do not try to cache current operational channel info in driver, this is a potential source of synchronization issues + driver does not really need that info. Introduce GET_CHANNEL command and process it appropriately. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 35 +++++++---------- .../net/wireless/quantenna/qtnfmac/commands.c | 38 +++++++++++++++++++ .../net/wireless/quantenna/qtnfmac/commands.h | 1 + .../net/wireless/quantenna/qtnfmac/qlink.h | 11 ++++++ 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 856fa6e8327e..0ef1285e7b93 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -793,37 +793,30 @@ qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct qtnf_wmac *mac = wiphy_priv(wiphy); struct net_device *ndev = wdev->netdev; struct qtnf_vif *vif; + int ret; if (!ndev) return -ENODEV; vif = qtnf_netdev_get_priv(wdev->netdev); - switch (vif->wdev.iftype) { - case NL80211_IFTYPE_STATION: - if (vif->sta_state == QTNF_STA_DISCONNECTED) { - pr_warn("%s: STA disconnected\n", ndev->name); - return -ENODATA; - } - break; - case NL80211_IFTYPE_AP: - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_warn("%s: AP not started\n", ndev->name); - return -ENODATA; - } - break; - default: - pr_err("unsupported vif type (%d)\n", vif->wdev.iftype); - return -ENODATA; + ret = qtnf_cmd_get_channel(vif, chandef); + if (ret) { + pr_err("%s: failed to get channel: %d\n", ndev->name, ret); + goto out; } - if (!cfg80211_chandef_valid(&mac->chandef)) { - pr_err("invalid channel settings on %s\n", ndev->name); - return -ENODATA; + if (!cfg80211_chandef_valid(chandef)) { + pr_err("%s: bad chan freq1=%u freq2=%u bw=%u\n", ndev->name, + chandef->center_freq1, chandef->center_freq2, + chandef->width); + ret = -ENODATA; } - memcpy(chandef, &mac->chandef, sizeof(*chandef)); - return 0; + memcpy(&mac->chandef, chandef, sizeof(mac->chandef)); + +out: + return ret; } static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 4206886b110c..806b88b964b0 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2358,3 +2358,41 @@ out: qtnf_bus_unlock(mac->bus); return ret; } + +int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef) +{ + struct qtnf_bus *bus = vif->mac->bus; + const struct qlink_resp_channel_get *resp; + struct sk_buff *cmd_skb; + struct sk_buff *resp_skb = NULL; + u16 res_code = QLINK_CMD_RESULT_OK; + int ret; + + cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, + QLINK_CMD_CHAN_GET, + sizeof(struct qlink_cmd)); + if (unlikely(!cmd_skb)) + return -ENOMEM; + + qtnf_bus_lock(bus); + + ret = qtnf_cmd_send_with_reply(bus, cmd_skb, &resp_skb, &res_code, + sizeof(*resp), NULL); + + qtnf_bus_unlock(bus); + + if (unlikely(ret)) + goto out; + + if (unlikely(res_code != QLINK_CMD_RESULT_OK)) { + ret = -ENODATA; + goto out; + } + + resp = (const struct qlink_resp_channel_get *)resp_skb->data; + qlink_chandef_q2cfg(priv_to_wiphy(vif->mac), &resp->chan, chdef); + +out: + consume_skb(resp_skb); + return ret; +} diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index 783b20364296..e1bcb83d7705 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -75,5 +75,6 @@ int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel, struct qtnf_chan_stats *stats); int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, struct cfg80211_csa_settings *params); +int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef); #endif /* QLINK_COMMANDS_H_ */ diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 59368549c086..fb88f3ebf083 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -169,6 +169,7 @@ enum qlink_cmd_type { QLINK_CMD_REG_NOTIFY = 0x0019, QLINK_CMD_CHANS_INFO_GET = 0x001A, QLINK_CMD_CHAN_SWITCH = 0x001B, + QLINK_CMD_CHAN_GET = 0x001C, QLINK_CMD_CONFIG_AP = 0x0020, QLINK_CMD_START_AP = 0x0021, QLINK_CMD_STOP_AP = 0x0022, @@ -694,6 +695,16 @@ struct qlink_resp_get_chan_stats { u8 info[0]; } __packed; +/** + * struct qlink_resp_channel_get - response for QLINK_CMD_CHAN_GET command + * + * @chan: definition of current operating channel. + */ +struct qlink_resp_channel_get { + struct qlink_resp rhdr; + struct qlink_chandef chan; +} __packed; + /* QLINK Events messages related definitions */ From 96d4eaf20fb8d34a475c4086de3ed9bd483993d6 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:32 -0700 Subject: [PATCH 0253/2177] qtnfmac: do not cache channel info from "connect" command This makes no sense because real operational channel is choosen based on AP operation, not on what STA is configured to. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 15 +-------------- drivers/net/wireless/quantenna/qtnfmac/commands.c | 6 ++++-- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 0ef1285e7b93..17b323e2e83e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -613,8 +613,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - struct qtnf_wmac *mac = wiphy_priv(wiphy); - struct cfg80211_chan_def chandef; struct qtnf_bss_config *bss_cfg; int ret; @@ -627,18 +625,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, bss_cfg = &vif->bss_cfg; memset(bss_cfg, 0, sizeof(*bss_cfg)); - if (sme->channel) { - /* FIXME: need to set proper nl80211_channel_type value */ - cfg80211_chandef_create(&chandef, sme->channel, - NL80211_CHAN_HT20); - /* fall-back to minimal safe chandef description */ - if (!cfg80211_chandef_valid(&chandef)) - cfg80211_chandef_create(&chandef, sme->channel, - NL80211_CHAN_HT20); - - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); - } - bss_cfg->ssid_len = sme->ssid_len; memcpy(&bss_cfg->ssid, sme->ssid, bss_cfg->ssid_len); bss_cfg->auth_type = sme->auth_type; @@ -663,6 +649,7 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, bss_cfg->connect_flags |= QLINK_STA_CONNECT_USE_RRM; memcpy(&bss_cfg->crypto, &sme->crypto, sizeof(bss_cfg->crypto)); + if (sme->bssid) ether_addr_copy(bss_cfg->bssid, sme->bssid); else diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 806b88b964b0..c55bae156b3c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2055,8 +2055,10 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, ether_addr_copy(cmd->bssid, bss_cfg->bssid); - if (vif->mac->chandef.chan) - cmd->channel = cpu_to_le16(vif->mac->chandef.chan->hw_value); + if (sme->channel) + cmd->channel = cpu_to_le16(sme->channel->hw_value); + else + cmd->channel = 0; cmd->bg_scan_period = cpu_to_le16(bss_cfg->bg_scan_period); From 3656ab0fef5b29e95d251e3f0a0bfb7da311337a Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:33 -0700 Subject: [PATCH 0254/2177] qtnfmac: let wifi card handle channel switch request to the same chan No reason to verify channel switch request in driver, it can simply be forwarded to wireless device. Device can perform required checks and return appropriate error code, and driver may not even have information on current operational channel. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 17b323e2e83e..4590f3038120 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -846,11 +846,6 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - if (cfg80211_chandef_identical(¶ms->chandef, &mac->chandef)) { - pr_err("%s: switch request to the same channel\n", dev->name); - return -EALREADY; - } - ret = qtnf_cmd_send_chan_switch(mac, params); if (ret) pr_warn("%s: failed to switch to channel (%u)\n", From 8c015b9067d608e59a1486b8618eac9e0bd2952f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:34 -0700 Subject: [PATCH 0255/2177] qtnfmac: pass VIF info to SendChannel command Do not assume whether wireless device can or can not handle switching several interfaces on a single radio to different channels. Device will handle it itself and will return appropriate error code. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/commands.c | 5 +++-- drivers/net/wireless/quantenna/qtnfmac/commands.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 4590f3038120..30f8be554406 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -846,7 +846,7 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - ret = qtnf_cmd_send_chan_switch(mac, params); + ret = qtnf_cmd_send_chan_switch(vif, params); if (ret) pr_warn("%s: failed to switch to channel (%u)\n", dev->name, params->chandef.chan->hw_value); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index c55bae156b3c..0138dadf93b1 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2306,15 +2306,16 @@ out: return ret; } -int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, +int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, struct cfg80211_csa_settings *params) { + struct qtnf_wmac *mac = vif->mac; struct qlink_cmd_chan_switch *cmd; struct sk_buff *cmd_skb; u16 res_code = QLINK_CMD_RESULT_OK; int ret; - cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0x0, + cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, vif->vifid, QLINK_CMD_CHAN_SWITCH, sizeof(*cmd)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index e1bcb83d7705..8a5a82ce82cd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -73,7 +73,7 @@ int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif, int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req); int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel, struct qtnf_chan_stats *stats); -int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, +int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, struct cfg80211_csa_settings *params); int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef); From 97397633108a34ec5f5f316ffd1d3709e1c0479d Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:35 -0700 Subject: [PATCH 0256/2177] qtnfmac: do not cache CSA chandef info It is never used for anything useful, and all logic is handled by either WiFi card or higher layers. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 12 ------------ drivers/net/wireless/quantenna/qtnfmac/commands.c | 2 -- drivers/net/wireless/quantenna/qtnfmac/core.h | 1 - drivers/net/wireless/quantenna/qtnfmac/event.c | 8 +------- 4 files changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 30f8be554406..262e8cfd8f8d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -809,7 +809,6 @@ out: static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { - struct qtnf_wmac *mac = wiphy_priv(wiphy); struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -830,17 +829,6 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; } - if (vif->vifid != 0) { - if (!(mac->status & QTNF_MAC_CSA_ACTIVE)) - return -EOPNOTSUPP; - - if (!cfg80211_chandef_identical(¶ms->chandef, - &mac->csa_chandef)) - return -EINVAL; - - return 0; - } - if (!cfg80211_chandef_valid(¶ms->chandef)) { pr_err("%s: invalid channel\n", dev->name); return -EINVAL; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 0138dadf93b1..42f7e1dadd29 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2337,8 +2337,6 @@ int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, switch (res_code) { case QLINK_CMD_RESULT_OK: - memcpy(&mac->csa_chandef, ¶ms->chandef, - sizeof(mac->csa_chandef)); mac->status |= QTNF_MAC_CSA_ACTIVE; ret = 0; break; diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 066fcd1095a0..521ce094f2a6 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -147,7 +147,6 @@ struct qtnf_wmac { struct qtnf_vif iflist[QTNF_MAX_INTF]; struct cfg80211_scan_request *scan_req; struct cfg80211_chan_def chandef; - struct cfg80211_chan_def csa_chandef; struct mutex mac_lock; /* lock during wmac speicific ops */ struct timer_list scan_timeout; }; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index df58e83fbbb8..77563b0b1a7a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -381,13 +381,7 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, mac->macid, chandef.chan->hw_value, chandef.center_freq1, chandef.center_freq2, chandef.width); - if (mac->status & QTNF_MAC_CSA_ACTIVE) { - mac->status &= ~QTNF_MAC_CSA_ACTIVE; - if (chandef.chan->hw_value != mac->csa_chandef.chan->hw_value) - pr_warn("unexpected switch to %u during CSA to %u\n", - chandef.chan->hw_value, - mac->csa_chandef.chan->hw_value); - } + mac->status &= ~QTNF_MAC_CSA_ACTIVE; memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); From 6bfe61d697cb24b0838227c40bff8603addae652 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:36 -0700 Subject: [PATCH 0257/2177] qtnfmac: remove unused mac::status field There are no users of this field and it can safely be removed. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 1 - drivers/net/wireless/quantenna/qtnfmac/core.h | 5 ----- drivers/net/wireless/quantenna/qtnfmac/event.c | 2 -- 3 files changed, 8 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 42f7e1dadd29..8f95f9842f49 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2337,7 +2337,6 @@ int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, switch (res_code) { case QLINK_CMD_RESULT_OK: - mac->status |= QTNF_MAC_CSA_ACTIVE; ret = 0; break; case QLINK_CMD_RESULT_ENOTFOUND: diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 521ce094f2a6..2cd015048598 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -89,10 +89,6 @@ enum qtnf_sta_state { QTNF_STA_CONNECTED }; -enum qtnf_mac_status { - QTNF_MAC_CSA_ACTIVE = BIT(0) -}; - struct qtnf_vif { struct wireless_dev wdev; u8 vifid; @@ -141,7 +137,6 @@ struct qtnf_wmac { u8 macid; u8 wiphy_registered; u8 macaddr[ETH_ALEN]; - u32 status; struct qtnf_bus *bus; struct qtnf_mac_info macinfo; struct qtnf_vif iflist[QTNF_MAX_INTF]; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 77563b0b1a7a..b1acc24bdc39 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -381,8 +381,6 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, mac->macid, chandef.chan->hw_value, chandef.center_freq1, chandef.center_freq2, chandef.width); - mac->status &= ~QTNF_MAC_CSA_ACTIVE; - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); for (i = 0; i < QTNF_MAX_INTF; i++) { From 115af851234fc2690753f71685a0007fa4c7f973 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:37 -0700 Subject: [PATCH 0258/2177] qtnfmac: do not report channel changes until wiphy is registered Wireless device may send "channel changed" event before driver registered this device with wireless core, which will result in warnings. Once device is registered, higher layer will query channel info manually using .get_channel callback. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index b1acc24bdc39..7481d5bdf647 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -368,6 +368,9 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, return -EINVAL; } + if (!wiphy->registered) + return 0; + qlink_chandef_q2cfg(wiphy, &data->chan, &chandef); if (!cfg80211_chandef_valid(&chandef)) { From 9b029e178ea174acd96a5ddf97b04c70f4e57885 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Sep 2017 23:56:30 +0100 Subject: [PATCH 0259/2177] iwlegacy: make const array static to shink object code size Don't populate const array ac_to_fifo on the stack in an inlined function, instead make it static. Makes the object code smaller by over 800 bytes: text data bss dec hex filename 159029 33154 1216 193399 2f377 4965-mac.o text data bss dec hex filename 158122 33250 1216 192588 2f04c 4965-mac.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f..65eba2c24292 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -1480,7 +1480,7 @@ il4965_get_ac_from_tid(u16 tid) static inline int il4965_get_fifo_from_tid(u16 tid) { - const u8 ac_to_fifo[] = { + static const u8 ac_to_fifo[] = { IL_TX_FIFO_VO, IL_TX_FIFO_VI, IL_TX_FIFO_BE, From 96cbe3d638e4287db6482b6223367d3e6cf5871e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 16:39:02 +0100 Subject: [PATCH 0260/2177] b43: make const arrays static, reduces object code size Don't populate const arrays on the stack, instead make them static. Makes the object code smaller by over 60 bytes: Before: text data bss dec hex filename 14816 1296 0 16112 3ef0 b43/phy_ht.o After: text data bss dec hex filename 14551 1496 0 16047 3eaf b43/phy_ht.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/phy_ht.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_ht.c b/drivers/net/wireless/broadcom/b43/phy_ht.c index 718c90e81696..c3158d085c2b 100644 --- a/drivers/net/wireless/broadcom/b43/phy_ht.c +++ b/drivers/net/wireless/broadcom/b43/phy_ht.c @@ -119,7 +119,7 @@ static void b43_radio_2059_rcal(struct b43_wldev *dev) /* Calibrate the internal RC oscillator? */ static void b43_radio_2057_rccal(struct b43_wldev *dev) { - const u16 radio_values[3][2] = { + static const u16 radio_values[3][2] = { { 0x61, 0xE9 }, { 0x69, 0xD5 }, { 0x73, 0x99 }, }; int i; @@ -154,7 +154,7 @@ static void b43_radio_2059_init_pre(struct b43_wldev *dev) static void b43_radio_2059_init(struct b43_wldev *dev) { - const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 }; + static const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 }; int i; /* Prepare (reset?) radio */ @@ -263,7 +263,7 @@ static void b43_phy_ht_reset_cca(struct b43_wldev *dev) static void b43_phy_ht_zero_extg(struct b43_wldev *dev) { u8 i, j; - u16 base[] = { 0x40, 0x60, 0x80 }; + static const u16 base[] = { 0x40, 0x60, 0x80 }; for (i = 0; i < ARRAY_SIZE(base); i++) { for (j = 0; j < 4; j++) From 2d04cfcfa2e86cac751979b584e5420dd470903b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:02 +0530 Subject: [PATCH 0261/2177] net: af_packet: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d288f52c53f7..b0f221885dfd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po, struct tpacket_kbdq_core *pkc, void (*func) (unsigned long)) { - init_timer(&pkc->retire_blk_timer); - pkc->retire_blk_timer.data = (long)po; - pkc->retire_blk_timer.function = func; + setup_timer(&pkc->retire_blk_timer, func, (long)po); pkc->retire_blk_timer.expires = jiffies; } From b1e07c5486f0df26caf93234bd7db5545a37aba8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:03 +0530 Subject: [PATCH 0262/2177] net: nfc: hci: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/hci/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b740fef0acc5..a8a6e7814e09 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - init_timer(&hdev->cmd_timer); - hdev->cmd_timer.data = (unsigned long)hdev; - hdev->cmd_timer.function = nfc_hci_cmd_timeout; + setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout, + (unsigned long)hdev); skb_queue_head_init(&hdev->rx_hcp_frags); From 22d387e13ac357279ddac55694883fd3e30a5639 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:04 +0530 Subject: [PATCH 0263/2177] net: nfc: hci: llc_shdlc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/hci/llc_shdlc.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 17e59a009ce6..58df37eae1e8 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, mutex_init(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; - init_timer(&shdlc->connect_timer); - shdlc->connect_timer.data = (unsigned long)shdlc; - shdlc->connect_timer.function = llc_shdlc_connect_timeout; + setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout, + (unsigned long)shdlc); - init_timer(&shdlc->t1_timer); - shdlc->t1_timer.data = (unsigned long)shdlc; - shdlc->t1_timer.function = llc_shdlc_t1_timeout; + setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout, + (unsigned long)shdlc); - init_timer(&shdlc->t2_timer); - shdlc->t2_timer.data = (unsigned long)shdlc; - shdlc->t2_timer.function = llc_shdlc_t2_timeout; + setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout, + (unsigned long)shdlc); shdlc->w = SHDLC_MAX_WINDOW; shdlc->srej_support = SHDLC_SREJ_SUPPORT; From d835b63cc4ee67e59eed9d1957f729c0a30b7331 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:05 +0530 Subject: [PATCH 0264/2177] net: nfc: llcp_core: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/llcp_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 02eef5cf3cce..7988185072e5 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); - init_timer(&local->link_timer); - local->link_timer.data = (unsigned long) local; - local->link_timer.function = nfc_llcp_symm_timer; + setup_timer(&local->link_timer, nfc_llcp_symm_timer, + (unsigned long)local); skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); @@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) mutex_init(&local->sdreq_lock); INIT_HLIST_HEAD(&local->pending_sdreqs); - init_timer(&local->sdreq_timer); - local->sdreq_timer.data = (unsigned long) local; - local->sdreq_timer.function = nfc_llcp_sdreq_timer; + setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer, + (unsigned long)local); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); list_add(&local->list, &llcp_devices); From b5d7388f9db78f19e6af7b56a469ca8d1860329d Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 21 Sep 2017 18:43:29 -0400 Subject: [PATCH 0265/2177] bpf: Optimize lpm trie delete Before the delete operator was added, this datastructure maintained an invariant that intermediate nodes were only present when necessary to build the tree. This patch updates the delete operation to reinstate that invariant by removing unnecessary intermediate nodes after a node is removed and thus keeping the tree structure at a minimal size. Suggested-by: Daniel Mack Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 71 ++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 9d58a576b2ae..34d8a690ea05 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct bpf_lpm_trie_key *key = _key; - struct lpm_trie_node __rcu **trim; - struct lpm_trie_node *node; + struct lpm_trie_node __rcu **trim, **trim2; + struct lpm_trie_node *node, *parent; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; @@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) raw_spin_lock_irqsave(&trie->lock, irq_flags); /* Walk the tree looking for an exact key/length match and keeping - * track of where we could begin trimming the tree. The trim-point - * is the sub-tree along the walk consisting of only single-child - * intermediate nodes and ending at a leaf node that we want to - * remove. + * track of the path we traverse. We will need to know the node + * we wish to delete, and the slot that points to the node we want + * to delete. We may also need to know the nodes parent and the + * slot that contains it. */ trim = &trie->root; - node = rcu_dereference_protected( - trie->root, lockdep_is_held(&trie->lock)); - while (node) { + trim2 = trim; + parent = NULL; + while ((node = rcu_dereference_protected( + *trim, lockdep_is_held(&trie->lock)))) { matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || node->prefixlen == key->prefixlen) break; + parent = node; + trim2 = trim; next_bit = extract_bit(key->data, node->prefixlen); - /* If we hit a node that has more than one child or is a valid - * prefix itself, do not remove it. Reset the root of the trim - * path to its descendant on our path. - */ - if (!(node->flags & LPM_TREE_NODE_FLAG_IM) || - (node->child[0] && node->child[1])) - trim = &node->child[next_bit]; - node = rcu_dereference_protected( - node->child[next_bit], lockdep_is_held(&trie->lock)); + trim = &node->child[next_bit]; } if (!node || node->prefixlen != key->prefixlen || @@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) trie->n_entries--; - /* If the node we are removing is not a leaf node, simply mark it + /* If the node we are removing has two children, simply mark it * as intermediate and we are done. */ - if (rcu_access_pointer(node->child[0]) || + if (rcu_access_pointer(node->child[0]) && rcu_access_pointer(node->child[1])) { node->flags |= LPM_TREE_NODE_FLAG_IM; goto out; } - /* trim should now point to the slot holding the start of a path from - * zero or more intermediate nodes to our leaf node for deletion. + /* If the parent of the node we are about to delete is an intermediate + * node, and the deleted node doesn't have any children, we can delete + * the intermediate parent as well and promote its other child + * up the tree. Doing this maintains the invariant that all + * intermediate nodes have exactly 2 children and that there are no + * unnecessary intermediate nodes in the tree. */ - while ((node = rcu_dereference_protected( - *trim, lockdep_is_held(&trie->lock)))) { - RCU_INIT_POINTER(*trim, NULL); - trim = rcu_access_pointer(node->child[0]) ? - &node->child[0] : - &node->child[1]; + if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) && + !node->child[0] && !node->child[1]) { + if (node == rcu_access_pointer(parent->child[0])) + rcu_assign_pointer( + *trim2, rcu_access_pointer(parent->child[1])); + else + rcu_assign_pointer( + *trim2, rcu_access_pointer(parent->child[0])); + kfree_rcu(parent, rcu); kfree_rcu(node, rcu); + goto out; } + /* The node we are removing has either zero or one child. If there + * is a child, move it into the removed node's slot then delete + * the node. Otherwise just clear the slot and delete the node. + */ + if (node->child[0]) + rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0])); + else if (node->child[1]) + rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); + else + RCU_INIT_POINTER(*trim, NULL); + kfree_rcu(node, rcu); + out: raw_spin_unlock_irqrestore(&trie->lock, irq_flags); From 1b17ca044a82342fb96529e1bdff6da7af90bc53 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 16:50:23 +0100 Subject: [PATCH 0266/2177] hv_netvsc: make const array ver_list static, reduces object code size Don't populate const array ver_list on the stack, instead make it static. Makes the object code smaller by over 400 bytes: Before: text data bss dec hex filename 18444 3168 320 21932 55ac drivers/net/hyperv/netvsc.o After: text data bss dec hex filename 17950 3224 320 21494 53f6 drivers/net/hyperv/netvsc.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8d5077fb0492..b0d323e24978 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -484,7 +484,7 @@ static int netvsc_connect_vsp(struct hv_device *device, struct netvsc_device *net_device, const struct netvsc_device_info *device_info) { - const u32 ver_list[] = { + static const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; From e94cd8113ce63bca34040aae52d0603baf6ec07c Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Fri, 22 Sep 2017 23:57:49 +0800 Subject: [PATCH 0267/2177] net: remove MTU limits for dummy and ifb device These two drivers (dummy and ifb) call ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. These two devices should not have limits on MTU. This patch set their min_mtu/max_mtu to 0. So that dev_set_mtu() will not check the mtu range, and can be set with any value. CC: Eric Dumazet CC: Sabrina Dubroca Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/dummy.c | 2 +- drivers/net/ifb.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index e31ab3b94c6f..58483af80bdb 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -353,7 +353,7 @@ static void dummy_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->min_mtu = 0; - dev->max_mtu = ETH_MAX_MTU; + dev->max_mtu = 0; } static int dummy_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 8870bd2a2e8a..0008da7e9d4c 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -231,6 +231,9 @@ static void ifb_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->needs_free_netdev = true; dev->priv_destructor = ifb_dev_free; + + dev->min_mtu = 0; + dev->max_mtu = 0; } static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) From 943170998b200190f99d3fe7e771437e2c51f319 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 22 Sep 2017 13:49:14 -0700 Subject: [PATCH 0268/2177] tun: enable NAPI for TUN/TAP driver Changes TUN driver to use napi_gro_receive() upon receiving packets rather than netif_rx_ni(). Adds flag IFF_NAPI that enables these changes and operation is not affected if the flag is disabled. SKBs are constructed upon packet arrival and are queued to be processed later. The new path was evaluated with a benchmark with the following setup: Open two tap devices and a receiver thread that reads in a loop for each device. Start one sender thread and pin all threads to different CPUs. Send 1M minimum UDP packets to each device and measure sending time for each of the sending methods: napi_gro_receive(): 4.90s netif_rx_ni(): 4.90s netif_receive_skb(): 7.20s Signed-off-by: Petar Penkov Cc: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Cc: davem@davemloft.net Cc: ppenkov@stanford.edu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/tun.c | 133 ++++++++++++++++++++++++++++++++---- include/uapi/linux/if_tun.h | 1 + 2 files changed, 119 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c9985f29950..f16407242b18 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -121,7 +121,7 @@ do { \ #define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ - IFF_MULTI_QUEUE) + IFF_MULTI_QUEUE | IFF_NAPI) #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 @@ -172,6 +172,7 @@ struct tun_file { u16 queue_index; unsigned int ifindex; }; + struct napi_struct napi; struct list_head next; struct tun_struct *detached; struct skb_array tx_array; @@ -229,6 +230,68 @@ struct tun_struct { struct bpf_prog __rcu *xdp_prog; }; +static int tun_napi_receive(struct napi_struct *napi, int budget) +{ + struct tun_file *tfile = container_of(napi, struct tun_file, napi); + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; + struct sk_buff_head process_queue; + struct sk_buff *skb; + int received = 0; + + __skb_queue_head_init(&process_queue); + + spin_lock(&queue->lock); + skb_queue_splice_tail_init(queue, &process_queue); + spin_unlock(&queue->lock); + + while (received < budget && (skb = __skb_dequeue(&process_queue))) { + napi_gro_receive(napi, skb); + ++received; + } + + if (!skb_queue_empty(&process_queue)) { + spin_lock(&queue->lock); + skb_queue_splice(&process_queue, queue); + spin_unlock(&queue->lock); + } + + return received; +} + +static int tun_napi_poll(struct napi_struct *napi, int budget) +{ + unsigned int received; + + received = tun_napi_receive(napi, budget); + + if (received < budget) + napi_complete_done(napi, received); + + return received; +} + +static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, + bool napi_en) +{ + if (napi_en) { + netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&tfile->napi); + } +} + +static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) +{ + if (tun->flags & IFF_NAPI) + napi_disable(&tfile->napi); +} + +static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) +{ + if (tun->flags & IFF_NAPI) + netif_napi_del(&tfile->napi); +} + #ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { @@ -541,6 +604,11 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); + if (tun && clean) { + tun_napi_disable(tun, tfile); + tun_napi_del(tun, tfile); + } + if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); @@ -598,6 +666,7 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); + tun_napi_disable(tun, tfile); tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); @@ -613,6 +682,7 @@ static void tun_detach_all(struct net_device *dev) synchronize_net(); for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); + tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); @@ -631,7 +701,8 @@ static void tun_detach_all(struct net_device *dev) module_put(THIS_MODULE); } -static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) +static int tun_attach(struct tun_struct *tun, struct file *file, + bool skip_filter, bool napi) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -677,10 +748,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; - if (tfile->detached) + if (tfile->detached) { tun_enable_queue(tfile); - else + } else { sock_hold(&tfile->sk); + tun_napi_init(tun, tfile, napi); + } tun_set_real_num_queues(tun); @@ -956,13 +1029,28 @@ static void tun_poll_controller(struct net_device *dev) * Tun only receives frames when: * 1) the char device endpoint gets data from user space * 2) the tun socket gets a sendmsg call from user space - * Since both of those are synchronous operations, we are guaranteed - * never to have pending data when we poll for it - * so there is nothing to do here but return. + * If NAPI is not enabled, since both of those are synchronous + * operations, we are guaranteed never to have pending data when we poll + * for it so there is nothing to do here but return. * We need this though so netpoll recognizes us as an interface that * supports polling, which enables bridge devices in virt setups to * still use netconsole + * If NAPI is enabled, however, we need to schedule polling for all + * queues. */ + struct tun_struct *tun = netdev_priv(dev); + + if (tun->flags & IFF_NAPI) { + struct tun_file *tfile; + int i; + + rcu_read_lock(); + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference(tun->tfiles[i]); + napi_schedule(&tfile->napi); + } + rcu_read_unlock(); + } return; } #endif @@ -1549,11 +1637,25 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } rxhash = __skb_get_hash_symmetric(skb); -#ifndef CONFIG_4KSTACKS - tun_rx_batched(tun, tfile, skb, more); -#else - netif_rx_ni(skb); -#endif + + if (tun->flags & IFF_NAPI) { + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; + int queue_len; + + spin_lock_bh(&queue->lock); + __skb_queue_tail(queue, skb); + queue_len = skb_queue_len(queue); + spin_unlock(&queue->lock); + + if (!more || queue_len > NAPI_POLL_WEIGHT) + napi_schedule(&tfile->napi); + + local_bh_enable(); + } else if (!IS_ENABLED(CONFIG_4KSTACKS)) { + tun_rx_batched(tun, tfile, skb, more); + } else { + netif_rx_ni(skb); + } stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); @@ -1980,7 +2082,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (err < 0) return err; - err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER); + err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, + ifr->ifr_flags & IFF_NAPI); if (err < 0) return err; @@ -2066,7 +2169,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) NETIF_F_HW_VLAN_STAG_TX); INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false); + err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); if (err < 0) goto err_free_flow; @@ -2216,7 +2319,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; - ret = tun_attach(tun, file, false); + ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 3cb5e1d85ddd..30b6184884eb 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -60,6 +60,7 @@ /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 +#define IFF_NAPI 0x0010 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 From 90e33d45940793def6f773b2d528e9f3c84ffdc7 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 22 Sep 2017 13:49:15 -0700 Subject: [PATCH 0269/2177] tun: enable napi_gro_frags() for TUN/TAP driver Add a TUN/TAP receive mode that exercises the napi_gro_frags() interface. This mode is available only in TAP mode, as the interface expects packets with Ethernet headers. Furthermore, packets follow the layout of the iovec_iter that was received. The first iovec is the linear data, and every one after the first is a fragment. If there are more fragments than the max number, drop the packet. Additionally, invoke eth_get_headlen() to exercise flow dissector code and to verify that the header resides in the linear data. The napi_gro_frags() mode requires setting the IFF_NAPI_FRAGS option. This is imposed because this mode is intended for testing via tools like syzkaller and packetdrill, and the increased flexibility it provides can introduce security vulnerabilities. This flag is accepted only if the device is in TAP mode and has the IFF_NAPI flag set as well. This is done because both of these are explicit requirements for correct operation in this mode. Signed-off-by: Petar Penkov Cc: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Cc: davem@davemloft.net Cc: ppenkov@stanford.edu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/tun.c | 134 ++++++++++++++++++++++++++++++++++-- include/uapi/linux/if_tun.h | 1 + 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f16407242b18..9880b3bc8fa5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -75,6 +75,7 @@ #include #include #include +#include #include @@ -121,7 +122,8 @@ do { \ #define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ - IFF_MULTI_QUEUE | IFF_NAPI) + IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) + #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 @@ -173,6 +175,7 @@ struct tun_file { unsigned int ifindex; }; struct napi_struct napi; + struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; struct skb_array tx_array; @@ -277,6 +280,7 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); napi_enable(&tfile->napi); + mutex_init(&tfile->napi_mutex); } } @@ -292,6 +296,11 @@ static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) netif_napi_del(&tfile->napi); } +static bool tun_napi_frags_enabled(const struct tun_struct *tun) +{ + return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; +} + #ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { @@ -1036,7 +1045,8 @@ static void tun_poll_controller(struct net_device *dev) * supports polling, which enables bridge devices in virt setups to * still use netconsole * If NAPI is enabled, however, we need to schedule polling for all - * queues. + * queues unless we are using napi_gro_frags(), which we call in + * process context and not in NAPI context. */ struct tun_struct *tun = netdev_priv(dev); @@ -1044,6 +1054,9 @@ static void tun_poll_controller(struct net_device *dev) struct tun_file *tfile; int i; + if (tun_napi_frags_enabled(tun)) + return; + rcu_read_lock(); for (i = 0; i < tun->numqueues; i++) { tfile = rcu_dereference(tun->tfiles[i]); @@ -1266,6 +1279,64 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) return mask; } +static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, + size_t len, + const struct iov_iter *it) +{ + struct sk_buff *skb; + size_t linear; + int err; + int i; + + if (it->nr_segs > MAX_SKB_FRAGS + 1) + return ERR_PTR(-ENOMEM); + + local_bh_disable(); + skb = napi_get_frags(&tfile->napi); + local_bh_enable(); + if (!skb) + return ERR_PTR(-ENOMEM); + + linear = iov_iter_single_seg_count(it); + err = __skb_grow(skb, linear); + if (err) + goto free; + + skb->len = len; + skb->data_len = len - linear; + skb->truesize += skb->data_len; + + for (i = 1; i < it->nr_segs; i++) { + size_t fragsz = it->iov[i].iov_len; + unsigned long offset; + struct page *page; + void *data; + + if (fragsz == 0 || fragsz > PAGE_SIZE) { + err = -EINVAL; + goto free; + } + + local_bh_disable(); + data = napi_alloc_frag(fragsz); + local_bh_enable(); + if (!data) { + err = -ENOMEM; + goto free; + } + + page = virt_to_head_page(data); + offset = data - page_address(page); + skb_fill_page_desc(skb, i - 1, page, offset, fragsz); + } + + return skb; +free: + /* frees skb and all frags allocated with napi_alloc_frag() */ + napi_free_frags(&tfile->napi); + return ERR_PTR(err); +} + /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, @@ -1478,6 +1549,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int err; u32 rxhash; int skb_xdp = 1; + bool frags = tun_napi_frags_enabled(tun); if (!(tun->dev->flags & IFF_UP)) return -EIO; @@ -1535,7 +1607,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, zerocopy = true; } - if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { + if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { /* For the packet that is not easy to be processed * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine. @@ -1556,10 +1628,24 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, linear = tun16_to_cpu(tun, gso.hdr_len); } - skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); + if (frags) { + mutex_lock(&tfile->napi_mutex); + skb = tun_napi_alloc_frags(tfile, copylen, from); + /* tun_napi_alloc_frags() enforces a layout for the skb. + * If zerocopy is enabled, then this layout will be + * overwritten by zerocopy_sg_from_iter(). + */ + zerocopy = false; + } else { + skb = tun_alloc_skb(tfile, align, copylen, linear, + noblock); + } + if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) this_cpu_inc(tun->pcpu_stats->rx_dropped); + if (frags) + mutex_unlock(&tfile->napi_mutex); return PTR_ERR(skb); } @@ -1571,6 +1657,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } + return -EFAULT; } } @@ -1578,6 +1669,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { this_cpu_inc(tun->pcpu_stats->rx_frame_errors); kfree_skb(skb); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } + return -EINVAL; } @@ -1603,7 +1699,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb->dev = tun->dev; break; case IFF_TAP: - skb->protocol = eth_type_trans(skb, tun->dev); + if (!frags) + skb->protocol = eth_type_trans(skb, tun->dev); break; } @@ -1638,7 +1735,23 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rxhash = __skb_get_hash_symmetric(skb); - if (tun->flags & IFF_NAPI) { + if (frags) { + /* Exercise flow dissector code path. */ + u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); + + if (headlen > skb_headlen(skb) || headlen < ETH_HLEN) { + this_cpu_inc(tun->pcpu_stats->rx_dropped); + napi_free_frags(&tfile->napi); + mutex_unlock(&tfile->napi_mutex); + WARN_ON(1); + return -ENOMEM; + } + + local_bh_disable(); + napi_gro_frags(&tfile->napi); + local_bh_enable(); + mutex_unlock(&tfile->napi_mutex); + } else if (tun->flags & IFF_NAPI) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; int queue_len; @@ -2061,6 +2174,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tfile->detached) return -EINVAL; + if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!(ifr->ifr_flags & IFF_NAPI) || + (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP) + return -EINVAL; + } + dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { if (ifr->ifr_flags & IFF_TUN_EXCL) diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 30b6184884eb..365ade5685c9 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -61,6 +61,7 @@ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 #define IFF_NAPI 0x0010 +#define IFF_NAPI_FRAGS 0x0020 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 From 6450f8f269a9271985e4a8c13920b7e4cf21c0f3 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 22 Sep 2017 15:31:38 -0700 Subject: [PATCH 0270/2177] hv_netvsc: Fix the real number of queues of non-vRSS cases For older hosts without multi-channel (vRSS) support, and some error cases, we still need to set the real number of queues to one. This patch adds this missing setting. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Haiyang Zhang Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a32ae02e1b6c..e9d54c9ee78c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1935,6 +1935,12 @@ static int netvsc_probe(struct hv_device *dev, /* We always need headroom for rndis header */ net->needed_headroom = RNDIS_AND_PPI_SIZE; + /* Initialize the number of queues to be 1, we may change it if more + * channels are offered later. + */ + netif_set_real_num_tx_queues(net, 1); + netif_set_real_num_rx_queues(net, 1); + /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; From 6457edfe7344ac1da334b9f24a42aacea084a451 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 22 Sep 2017 19:01:55 -0400 Subject: [PATCH 0271/2177] net: dsa: make slave close symmetrical to open The DSA slave open function configures the unicast MAC addresses on the master device, enable the switch port, change its STP state, then start the PHY device. Make the close function symmetric, by first stopping the PHY device, then changing the STP state, disabling the switch port and restore the master device. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02ace7d462c4..c2bb48579032 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -133,6 +133,11 @@ static int dsa_slave_close(struct net_device *dev) if (p->phy) phy_stop(p->phy); + dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); + + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->dp->index, p->phy); + dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); if (dev->flags & IFF_ALLMULTI) @@ -143,11 +148,6 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->ops->port_disable) - ds->ops->port_disable(ds, p->dp->index, p->phy); - - dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); - return 0; } From fb8a6a2b8b7cfee8a0cf2cd431e1d0f45dd1c9e0 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 22 Sep 2017 19:01:56 -0400 Subject: [PATCH 0272/2177] net: dsa: add port enable and disable helpers Provide dsa_port_enable and dsa_port_disable helpers to respectively enable and disable a switch port. This makes the dsa_port_set_state_now helper static. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 3 ++- net/dsa/port.c | 31 ++++++++++++++++++++++++++++++- net/dsa/slave.c | 19 +++++-------------- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9803952a5b40..0298a0f6a349 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -117,7 +117,8 @@ void dsa_master_ethtool_restore(struct net_device *dev); /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); -void dsa_port_set_state_now(struct dsa_port *dp, u8 state); +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, diff --git a/net/dsa/port.c b/net/dsa/port.c index 76d43a82d397..72c8dbd3d3f2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -56,7 +56,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, return 0; } -void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) { int err; @@ -65,6 +65,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +{ + u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, port, phy); + if (err) + return err; + } + + dsa_port_set_state_now(dp, stp_state); + + return 0; +} + +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + dsa_port_set_state_now(dp, BR_STATE_DISABLED); + + if (ds->ops->port_disable) + ds->ops->port_disable(ds, port, phy); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c2bb48579032..bd51ef56ec5b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -73,9 +73,7 @@ static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; struct net_device *master = dsa_master_netdev(p); - u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -98,13 +96,9 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->ops->port_enable) { - err = ds->ops->port_enable(ds, p->dp->index, p->phy); - if (err) - goto clear_promisc; - } - - dsa_port_set_state_now(p->dp, stp_state); + err = dsa_port_enable(dp, p->phy); + if (err) + goto clear_promisc; if (p->phy) phy_start(p->phy); @@ -128,15 +122,12 @@ static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = p->dp; if (p->phy) phy_stop(p->phy); - dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); - - if (ds->ops->port_disable) - ds->ops->port_disable(ds, p->dp->index, p->phy); + dsa_port_disable(dp, p->phy); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); From 088b8749da1e35b0dd9cb0e6500ca1c94c9bf547 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:43 -0700 Subject: [PATCH 0273/2177] liquidio: allow override of firmware present in flash Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 68 ++++++++++++------- .../ethernet/cavium/liquidio/liquidio_image.h | 1 + .../ethernet/cavium/liquidio/octeon_device.c | 11 ++- .../ethernet/cavium/liquidio/octeon_device.h | 10 +++ 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e7f54948173f..ce08f710de0b 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -59,9 +59,9 @@ static int debug = -1; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "NETIF_MSG debug bits"); -static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC; +static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_AUTO; module_param_string(fw_type, fw_type, sizeof(fw_type), 0444); -MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\". Use \"none\" to load firmware from flash."); +MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded (default is \"auto\"), which uses firmware in flash, if present, else loads \"nic\"."); static u32 console_bitmask; module_param(console_bitmask, int, 0644); @@ -1115,10 +1115,10 @@ liquidio_probe(struct pci_dev *pdev, return 0; } -static bool fw_type_is_none(void) +static bool fw_type_is_auto(void) { - return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE, - sizeof(LIO_FW_NAME_TYPE_NONE)) == 0; + return strncmp(fw_type, LIO_FW_NAME_TYPE_AUTO, + sizeof(LIO_FW_NAME_TYPE_AUTO)) == 0; } /** @@ -1302,7 +1302,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) * Implementation note: only soft-reset the device * if it is a CN6XXX OR the LAST CN23XX device. */ - if (fw_type_is_none()) + if (atomic_read(oct->adapter_fw_state) == FW_IS_PRELOADED) octeon_pci_flr(oct); else if (OCTEON_CN6XXX(oct) || !refcount) oct->fn_list.soft_reset(oct); @@ -1934,7 +1934,7 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (fw_type[0] == '\0') + if (fw_type_is_auto()) tmp_fw_type = LIO_FW_NAME_TYPE_NIC; else tmp_fw_type = fw_type; @@ -3882,9 +3882,9 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf) static int octeon_device_init(struct octeon_device *octeon_dev) { int j, ret; - int fw_loaded = 0; char bootcmd[] = "\n"; char *dbg_enb = NULL; + enum lio_fw_state fw_state; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)octeon_dev->priv; atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE); @@ -3916,24 +3916,40 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->app_mode = CVM_DRV_INVALID_APP; - if (OCTEON_CN23XX_PF(octeon_dev)) { - if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) { - fw_loaded = 0; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) - return 1; - /* things might have changed */ - if (!cn23xx_fw_loaded(octeon_dev)) - fw_loaded = 0; - else - fw_loaded = 1; - } else { - fw_loaded = 1; - } - } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) { - return 1; + /* CN23XX supports preloaded firmware if the following is true: + * + * The adapter indicates that firmware is currently running AND + * 'fw_type' is 'auto'. + * + * (default state is NEEDS_TO_BE_LOADED, override it if appropriate). + */ + if (OCTEON_CN23XX_PF(octeon_dev) && + cn23xx_fw_loaded(octeon_dev) && fw_type_is_auto()) { + atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, FW_IS_PRELOADED); } + /* If loading firmware, only first device of adapter needs to do so. */ + fw_state = atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, + FW_IS_BEING_LOADED); + + /* Here, [local variable] 'fw_state' is set to one of: + * + * FW_IS_PRELOADED: No firmware is to be loaded (see above) + * FW_NEEDS_TO_BE_LOADED: The driver's first instance will load + * firmware to the adapter. + * FW_IS_BEING_LOADED: The driver's second instance will not load + * firmware to the adapter. + */ + + /* Prior to f/w load, perform a soft reset of the Octeon device; + * if error resetting, return w/error. + */ + if (fw_state == FW_NEEDS_TO_BE_LOADED) + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* Initialize the dispatch mechanism used to push packets arriving on * Octeon Output queues. */ @@ -4063,7 +4079,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); - if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) { + if (fw_state == FW_NEEDS_TO_BE_LOADED) { dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); if (!ddr_timeout) { dev_info(&octeon_dev->pci_dev->dev, @@ -4125,6 +4141,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev) dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); return 1; } + + atomic_set(octeon_dev->adapter_fw_state, FW_HAS_BEEN_LOADED); } handshake[octeon_dev->octeon_id].init_ok = 1; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h index 78a3685f6fe0..5bf5e8791dfb 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h @@ -24,6 +24,7 @@ #define LIO_FW_BASE_NAME "lio_" #define LIO_FW_NAME_SUFFIX ".bin" #define LIO_FW_NAME_TYPE_NIC "nic" +#define LIO_FW_NAME_TYPE_AUTO "auto" #define LIO_FW_NAME_TYPE_NONE "none" #define LIO_MAX_FIRMWARE_VERSION_LEN 16 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 29d53b1763a7..e4aa3395a578 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -541,6 +541,7 @@ static char oct_dev_app_str[CVM_DRV_APP_COUNT + 1][32] = { static struct octeon_device *octeon_device[MAX_OCTEON_DEVICES]; static atomic_t adapter_refcounts[MAX_OCTEON_DEVICES]; +static atomic_t adapter_fw_states[MAX_OCTEON_DEVICES]; static u32 octeon_device_count; /* locks device array (i.e. octeon_device[]) */ @@ -770,6 +771,10 @@ int octeon_register_device(struct octeon_device *oct, oct->adapter_refcount = &adapter_refcounts[oct->octeon_id]; atomic_set(oct->adapter_refcount, 0); + /* Like the reference count, the f/w state is shared 'per-adapter' */ + oct->adapter_fw_state = &adapter_fw_states[oct->octeon_id]; + atomic_set(oct->adapter_fw_state, FW_NEEDS_TO_BE_LOADED); + spin_lock(&octeon_devices_lock); for (idx = (int)oct->octeon_id - 1; idx >= 0; idx--) { if (!octeon_device[idx]) { @@ -780,11 +785,15 @@ int octeon_register_device(struct octeon_device *oct, atomic_inc(oct->adapter_refcount); return 1; /* here, refcount is guaranteed to be 1 */ } - /* if another device is at same bus/dev, use its refcounter */ + /* If another device is at same bus/dev, use its refcounter + * (and f/w state variable). + */ if ((octeon_device[idx]->loc.bus == bus) && (octeon_device[idx]->loc.dev == dev)) { oct->adapter_refcount = octeon_device[idx]->adapter_refcount; + oct->adapter_fw_state = + octeon_device[idx]->adapter_fw_state; break; } } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 894af199ddef..33d19c4509bc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -50,6 +50,13 @@ enum octeon_pci_swap_mode { OCTEON_PCI_32BIT_LW_SWAP = 3 }; +enum lio_fw_state { + FW_IS_PRELOADED = 0, + FW_NEEDS_TO_BE_LOADED = 1, + FW_IS_BEING_LOADED = 2, + FW_HAS_BEEN_LOADED = 3, +}; + enum { OCTEON_CONFIG_TYPE_DEFAULT = 0, NUM_OCTEON_CONFS, @@ -557,6 +564,9 @@ struct octeon_device { } loc; atomic_t *adapter_refcount; /* reference count of adapter */ + + atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */ + bool ptp_enable; }; From b36e48209157fdd98a5589a3dd60ff3fbf51e16d Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:47 -0700 Subject: [PATCH 0274/2177] liquidio: verify firmware version when auto-loaded from flash. Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index ce08f710de0b..a3c9867c0340 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3303,7 +3303,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) { struct lio *lio = NULL; struct net_device *netdev; - u8 mac[6], i, j; + u8 mac[6], i, j, *fw_ver; struct octeon_soft_command *sc; struct liquidio_if_cfg_context *ctx; struct liquidio_if_cfg_resp *resp; @@ -3414,6 +3414,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) goto setup_nic_dev_fail; } + /* Verify f/w version (in case of 'auto' loading from flash) */ + fw_ver = octeon_dev->fw_info.liquidio_firmware_version; + if (memcmp(LIQUIDIO_BASE_VERSION, + fw_ver, + strlen(LIQUIDIO_BASE_VERSION))) { + dev_err(&octeon_dev->pci_dev->dev, + "Unmatched firmware version. Expected %s.x, got %s.\n", + LIQUIDIO_BASE_VERSION, fw_ver); + goto setup_nic_dev_fail; + } else if (atomic_read(octeon_dev->adapter_fw_state) == + FW_IS_PRELOADED) { + dev_info(&octeon_dev->pci_dev->dev, + "Using auto-loaded firmware version %s.\n", + fw_ver); + } + octeon_swap_8B_data((u64 *)(&resp->cfg_info), (sizeof(struct liquidio_if_cfg_info)) >> 3); From 429cbf6bde1adff108171ad4b2387e62f851d609 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:51 -0700 Subject: [PATCH 0275/2177] liquidio: update module parameter fw_type to reflect firmware type loaded Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a3c9867c0340..963803bc6633 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1934,10 +1934,12 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (fw_type_is_auto()) + if (fw_type_is_auto()) { tmp_fw_type = LIO_FW_NAME_TYPE_NIC; - else + strncpy(fw_type, tmp_fw_type, sizeof(fw_type)); + } else { tmp_fw_type = fw_type; + } sprintf(fw_name, "%s%s%s_%s%s", LIO_FW_DIR, LIO_FW_BASE_NAME, octeon_get_conf(oct)->card_name, tmp_fw_type, From ba581f77df23c8ee70b372966e69cf10bc5453d8 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Sat, 23 Sep 2017 16:07:28 +0530 Subject: [PATCH 0276/2177] cxgb4: do DCB state reset in couple of places reset the driver's DCB state in couple of places where it was missing. Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 15 +++++++++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6ee2ed30626b..4e7f72b17e82 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -40,8 +40,7 @@ static inline bool cxgb4_dcb_state_synced(enum cxgb4_dcb_state state) return false; } -/* Initialize a port's Data Center Bridging state. Typically used after a - * Link Down event. +/* Initialize a port's Data Center Bridging state. */ void cxgb4_dcb_state_init(struct net_device *dev) { @@ -106,6 +105,15 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev) } } +/* Reset a port's Data Center Bridging state. Typically used after a + * Link Down event. + */ +void cxgb4_dcb_reset(struct net_device *dev) +{ + cxgb4_dcb_cleanup_apps(dev); + cxgb4_dcb_state_init(dev); +} + /* Finite State machine for Data Center Bridging. */ void cxgb4_dcb_state_fsm(struct net_device *dev, @@ -194,8 +202,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, * state. We need to reset back to a ground state * of incomplete. */ - cxgb4_dcb_cleanup_apps(dev); - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; dcb->supported = CXGB4_DCBX_FW_SUPPORT; linkwatch_fire_event(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index ccf24d3dc982..02040b99c78a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -131,6 +131,7 @@ struct port_dcb_info { void cxgb4_dcb_state_init(struct net_device *); void cxgb4_dcb_version_init(struct net_device *); +void cxgb4_dcb_reset(struct net_device *dev); void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input); void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *); void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index aa93ae95d3b9..13b636b0af5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -281,7 +281,7 @@ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) else { #ifdef CONFIG_CHELSIO_T4_DCB if (cxgb4_dcb_enabled(dev)) { - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb_tx_queue_prio_enable(dev, false); } #endif /* CONFIG_CHELSIO_T4_DCB */ @@ -2304,10 +2304,16 @@ static int cxgb_close(struct net_device *dev) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; + int ret; netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); + ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); +#ifdef CONFIG_CHELSIO_T4_DCB + cxgb4_dcb_reset(dev); + dcb_tx_queue_prio_enable(dev, false); +#endif + return ret; } int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, From 9484dc74fcf0750cd6726c9aa27edf97223916a8 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Sat, 23 Sep 2017 22:36:52 +0800 Subject: [PATCH 0277/2177] tun: delete original tun_get() and rename __tun_get() to tun_get() it seems no need to keep tun_get() and __tun_get() at same time. Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- drivers/net/tun.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9880b3bc8fa5..2c36f6ebad79 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -774,7 +774,7 @@ out: return err; } -static struct tun_struct *__tun_get(struct tun_file *tfile) +static struct tun_struct *tun_get(struct tun_file *tfile) { struct tun_struct *tun; @@ -787,11 +787,6 @@ static struct tun_struct *__tun_get(struct tun_file *tfile) return tun; } -static struct tun_struct *tun_get(struct file *file) -{ - return __tun_get(file->private_data); -} - static void tun_put(struct tun_struct *tun) { dev_put(tun->dev); @@ -1250,7 +1245,7 @@ static void tun_net_init(struct net_device *dev) static unsigned int tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); struct sock *sk; unsigned int mask = 0; @@ -1784,8 +1779,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = tun_get(file); struct tun_file *tfile = file->private_data; + struct tun_struct *tun = tun_get(tfile); ssize_t result; if (!tun) @@ -1969,7 +1964,7 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); ssize_t len = iov_iter_count(to), ret; if (!tun) @@ -2046,7 +2041,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); if (!tun) return -EBADFD; @@ -2062,7 +2057,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); int ret; if (!tun) @@ -2094,7 +2089,7 @@ static int tun_peek_len(struct socket *sock) struct tun_struct *tun; int ret = 0; - tun = __tun_get(tfile); + tun = tun_get(tfile); if (!tun) return 0; @@ -2490,7 +2485,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = 0; rtnl_lock(); - tun = __tun_get(tfile); + tun = tun_get(tfile); if (cmd == TUNSETIFF) { ret = -EEXIST; if (tun) @@ -2837,15 +2832,16 @@ static int tun_chr_close(struct inode *inode, struct file *file) } #ifdef CONFIG_PROC_FS -static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) { + struct tun_file *tfile = file->private_data; struct tun_struct *tun; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); rtnl_lock(); - tun = tun_get(f); + tun = tun_get(tfile); if (tun) tun_get_iff(current->nsproxy->net_ns, tun, &ifr); rtnl_unlock(); From 3aa605f28b0d004a640a826380b39c7dcf70195d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Sep 2017 11:07:28 -0700 Subject: [PATCH 0278/2177] sch_netem: faster rb tree removal While running TCP tests involving netem storing millions of packets, I had the idea to speed up tfifo_reset() and did experiments. I tried the rbtree_postorder_for_each_entry_safe() method that is used in skb_rbtree_purge() but discovered it was slower than the current tfifo_reset() method. I measured time taken to release skbs with three occupation levels : 10^4, 10^5 and 10^6 skbs with three methods : 1) (current 'naive' method) while ((p = rb_first(&q->t_root))) { struct sk_buff *skb = netem_rb_to_skb(p); rb_erase(p, &q->t_root); rtnl_kfree_skbs(skb, skb); } 2) Use rb_next() instead of rb_first() in the loop : p = rb_first(&q->t_root); while (p) { struct sk_buff *skb = netem_rb_to_skb(p); p = rb_next(p); rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } 3) "optimized" method using rbtree_postorder_for_each_entry_safe() struct sk_buff *skb, *next; rbtree_postorder_for_each_entry_safe(skb, next, &q->t_root, rbnode) { rtnl_kfree_skbs(skb, skb); } q->t_root = RB_ROOT; Results : method_1:while (rb_first()) rb_erase() 10000 skbs in 690378 ns (69 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 10000 skbs in 541846 ns (54 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 10000 skbs in 868307 ns (86 ns per skb) method_1:while (rb_first()) rb_erase() 99996 skbs in 7804021 ns (78 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 100000 skbs in 5942456 ns (59 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 100000 skbs in 11584940 ns (115 ns per skb) method_1:while (rb_first()) rb_erase() 1000000 skbs in 108577838 ns (108 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 1000000 skbs in 82619635 ns (82 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 1000000 skbs in 127328743 ns (127 ns per skb) Method 2) is simply faster, probably because it maintains a smaller working size set. Note that this is the method we use in tcp_ofo_queue() already. I will also change skb_rbtree_purge() in a second patch. Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 063a4bdb9ee6..5a4f10080290 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -361,12 +361,13 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche static void tfifo_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct rb_node *p; + struct rb_node *p = rb_first(&q->t_root); - while ((p = rb_first(&q->t_root))) { + while (p) { struct sk_buff *skb = netem_rb_to_skb(p); - rb_erase(p, &q->t_root); + p = rb_next(p); + rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } } From 7c90584c66cc4b033a3b684b0e0950f79e7b7166 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Sep 2017 12:39:12 -0700 Subject: [PATCH 0279/2177] net: speed up skb_rbtree_purge() As measured in my prior patch ("sch_netem: faster rb tree removal"), rbtree_postorder_for_each_entry_safe() is nice looking but much slower than using rb_next() directly, except when tree is small enough to fit in CPU caches (then the cost is the same) Also note that there is not even an increase of text size : $ size net/core/skbuff.o.before net/core/skbuff.o text data bss dec hex filename 40711 1298 0 42009 a419 net/core/skbuff.o.before 40711 1298 0 42009 a419 net/core/skbuff.o From: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16982de649b9..000ce735fa8d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2848,12 +2848,15 @@ EXPORT_SYMBOL(skb_queue_purge); */ void skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); kfree_skb(skb); - - *root = RB_ROOT; + } } /** From e451ae8e4f6b3f6bd3b83a5595657b5421b3bf69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:01:06 +0300 Subject: [PATCH 0280/2177] neigh: make struct neigh_table::entry_size unsigned int Neigh entry size can't be negative. Space savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-7 (-7) function old new delta lowpan_neigh_construct 25 24 -1 clip_seq_sub_iter 152 151 -1 clip_ioctl 1475 1474 -1 clip_constructor 93 92 -1 __neigh_create 2455 2452 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9816df225af3..9a25512e0a6e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -190,7 +190,7 @@ struct neigh_hash_table { struct neigh_table { int family; - int entry_size; + unsigned int entry_size; int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, From 01ccdf126ca5f9d4fe0889f65ee67afac910f19c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:03:04 +0300 Subject: [PATCH 0281/2177] neigh: make strucrt neigh_table::entry_size unsigned int Key length can't be negative. Leave comparisons against nla_len() signed just in case truncated attribute can sneak in there. Space savings: add/remove: 0/0 grow/shrink: 0/7 up/down: 0/-7 (-7) function old new delta pneigh_delete 273 272 -1 mlx5e_rep_netevent_event 1415 1414 -1 mlx5e_create_encap_header_ipv6 1194 1193 -1 mlx5e_create_encap_header_ipv4 1071 1070 -1 cxgb4_l2t_get 1104 1103 -1 __pneigh_lookup 69 68 -1 __neigh_create 2452 2451 -1 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 4 ++-- include/net/neighbour.h | 2 +- net/core/neighbour.c | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index f7ef8871dd0b..1817a0307d26 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -422,7 +422,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, u8 lport; u16 vlan; struct l2t_entry *e; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *)neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); @@ -536,7 +536,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) struct l2t_entry *e; struct sk_buff_head *arpq = NULL; struct l2t_data *d = adap->l2t; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9a25512e0a6e..2492000e1035 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -191,7 +191,7 @@ struct neigh_hash_table { struct neigh_table { int family; unsigned int entry_size; - int key_len; + unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16a1a4c4eb57..6ea3a1a7f36a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) { struct neighbour *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val; struct neigh_hash_table *nht; @@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { u32 hash_val; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); struct neigh_hash_table *nht; @@ -572,7 +572,7 @@ out_neigh_release: } EXPORT_SYMBOL(__neigh_create); -static u32 pneigh_hash(const void *pkey, int key_len) +static u32 pneigh_hash(const void *pkey, unsigned int key_len) { u32 hash_val = *(u32 *)(pkey + key_len - 4); hash_val ^= (hash_val >> 16); @@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len) static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct net *net, const void *pkey, - int key_len, + unsigned int key_len, struct net_device *dev) { while (n) { @@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); return __pneigh_lookup_1(tbl->phash_buckets[hash_val], @@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net_device *dev, int creat) { struct pneigh_entry *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); @@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); @@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < (int)tbl->key_len) goto out; if (ndm->ndm_flags & NTF_PROXY) { @@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(tb[NDA_DST]) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) goto out; dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; From 763556980dde78166886734ae470664c67067dd4 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 10:44:57 +0200 Subject: [PATCH 0282/2177] ieee802154: atusb: fix firmware version check to enable frame retries Geert reported: as fw_ver_maj is unsigned char, gcc 4.1.2 complains: warning: comparison is always true due to limited range of data type Besides the warning the old check would also fail for firmware versions like 1.x with x < 3. These would support frame retries, but the driver would not enable the feature. Reported-by: Geert Uytterhoeven Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 115fa3f37a86..6b3cdf4bd667 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1060,7 +1060,7 @@ static int atusb_probe(struct usb_interface *interface, atusb_get_and_show_build(atusb); atusb_set_extended_addr(atusb); - if (atusb->fw_ver_maj >= 0 && atusb->fw_ver_min >= 3) + if ((atusb->fw_ver_maj == 0 && atusb->fw_ver_min >= 3) || atusb->fw_ver_maj > 0) hw->flags |= IEEE802154_HW_FRAME_RETRIES; ret = atusb_get_and_clear_error(atusb); From 06ff5dad46feee9ebc0b9f14f0ae3fe3d232392e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 26 Sep 2017 14:18:36 +0100 Subject: [PATCH 0283/2177] ieee802154: atusb: make two structures static, fixes warnings The arrays atusb_chip_data and hulusb_chip_data are local to the source and do not need to be in global scope, so make them static. Also remove unnecessary forward declaration of atusb_chip_data. Cleans up sparse warnings: symbol 'atusb_chip_data' was not declared. Should it be static? symbol 'hulusb_chip_data' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 6b3cdf4bd667..d5584063049f 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -45,8 +45,6 @@ #define ATUSB_ALLOC_DELAY_MS 100 /* delay after failed allocation */ #define ATUSB_TX_TIMEOUT_MS 200 /* on the air timeout */ -struct atusb_chip_data; - struct atusb { struct ieee802154_hw *hw; struct usb_device *usb_dev; @@ -767,14 +765,14 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) return 0; } -struct atusb_chip_data atusb_chip_data = { +static struct atusb_chip_data atusb_chip_data = { .t_channel_switch = 1, .rssi_base_val = -91, .set_txpower = atusb_set_txpower, .set_channel = atusb_set_channel, }; -struct atusb_chip_data hulusb_chip_data = { +static struct atusb_chip_data hulusb_chip_data = { .t_channel_switch = 11, .rssi_base_val = -100, .set_txpower = hulusb_set_txpower, From 1f4cf93b133ba6596ae69cfd09b48aa7b47cca41 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:04:23 +0200 Subject: [PATCH 0284/2177] net: ena: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 52beba8c7a39..ded29af648c9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -315,7 +315,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue cmd_size_in_bytes, comp, comp_size_in_bytes); - if (unlikely(IS_ERR(comp_ctx))) + if (IS_ERR(comp_ctx)) admin_queue->running_state = false; spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -1130,7 +1130,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, comp, comp_size); - if (unlikely(IS_ERR(comp_ctx))) { + if (IS_ERR(comp_ctx)) { if (comp_ctx == ERR_PTR(-ENODEV)) pr_debug("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); From 98e4fcff3e755c6c3de2d4f63c080b4009a599bd Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:21:42 +0200 Subject: [PATCH 0285/2177] datagram: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index f7fb7e3f2acf..0b7b4c22719e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, } if (!skb->len) { skb = skb_set_peeked(skb); - if (unlikely(IS_ERR(skb))) { + if (IS_ERR(skb)) { *err = PTR_ERR(skb); return NULL; } From 63a4e80be4f523dbde6412decfa8244ff73cc4b9 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:22:31 +0200 Subject: [PATCH 0286/2177] ipv6: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96861c702c06..13c3b697f8c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3297,7 +3297,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev, struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (unlikely(IS_ERR(rt))) + if (IS_ERR(rt)) return PTR_ERR(rt); /* ifp->rt can be accessed outside of rtnl */ From d9db5e3680c87fdbdf86fcb1c42caea4bf98680c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:22:58 +0200 Subject: [PATCH 0287/2177] kcm: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index af4e76ac88ff..0b750a22c4b9 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info, } newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { + if (IS_ERR(newfile)) { err = PTR_ERR(newfile); goto out_sock_alloc_fail; } From 1fac4b2fdbccab69cb781aae68f540be94d5549e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:12:26 +0200 Subject: [PATCH 0288/2177] bnxt_en: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c25f5b555adf..5ba49938ba55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1491,7 +1491,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, (struct rx_tpa_end_cmp *)rxcmp, (struct rx_tpa_end_cmp_ext *)rxcmp1, event); - if (unlikely(IS_ERR(skb))) + if (IS_ERR(skb)) return -EBUSY; rc = -ENOMEM; From 92978ee801844b16180f2168ffffd05647da551a Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:13:23 +0200 Subject: [PATCH 0289/2177] net/mlx5: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 4614ddfa91eb..6a7c8b04447e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } mdata = mlx5e_ipsec_add_metadata(skb); - if (unlikely(IS_ERR(mdata))) { + if (IS_ERR(mdata)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); goto drop; } From 2091c227fa855776aafffad7ecd25ac0734df1a0 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:14:15 +0200 Subject: [PATCH 0290/2177] ldmvsw: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5b56c24b6ed2..5feeaa9f0a9e 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -307,7 +307,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) /* Get (or create) the vnet associated with this port */ vp = vsw_get_vnet(hp, vdev->mp, &handle); - if (unlikely(IS_ERR(vp))) { + if (IS_ERR(vp)) { err = PTR_ERR(vp); pr_err("Failed to get vnet for vsw-port\n"); mdesc_release(hp); From b92af5a72ca982f0aa3df22f57a178aa5b0f4357 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Aug 2017 15:29:12 +0300 Subject: [PATCH 0291/2177] net/mlx5: Fix creating a new FTE when an existing but full FTE exists Currently, when a flow steering rule is added, we look for a FTE with an identical value. If we find a match, we try to merge the required destinations with the existing ones. In a case where the existing destination list is full, the code should return an error to its consumer. However, the current code just tries to create another FTE. Fixing that by returning an error in this special scenario. Fixes: f478be79a22e ("net/mlx5: Add hash table for flow groups in flow table") Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a7bea688ec8..6ffe9251bf62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1449,7 +1449,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, int dest_num) { struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct mlx5_flow_handle *rule; struct rhlist_head *tmp, *list; struct match_list { struct list_head list; @@ -1513,6 +1513,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, unlock_ref_node(&g->node); } + rule = ERR_PTR(-ENOENT); + free_list: if (!list_empty(&match_head)) { struct match_list *match_tmp; @@ -1553,7 +1555,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule)) + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, From 800350a3f145010c353bd7425428c05ac5cfc26a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Aug 2017 17:50:03 +0300 Subject: [PATCH 0292/2177] net/mlx5: Avoid NULL pointer dereference on steering cleanup On cleanup, when the node is the last child of parent then it calls to tree_put_node on the parent, if the parent's reference count is decremented to 0 (for e.g. when deleting last destination of FTE) then we free the parent as well and vice versa. In such a case we will try to free the parent node again. Increment the parent reference count before cleaning it's children will prevent implicit release of the parent object. Fixes: 0da2d66666d3 ('net/mlx5: Properly remove all steering objects') signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6ffe9251bf62..f390828d8728 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2068,8 +2068,10 @@ static void clean_tree(struct fs_node *node) struct fs_node *iter; struct fs_node *temp; + tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); + tree_put_node(node); tree_remove_node(node); } } From 75d1d187b2ac86d1af2f1fd125ec21f104ca34b0 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 16 Jul 2017 15:18:45 +0300 Subject: [PATCH 0293/2177] net/mlx5: Move the entry index allocator to flow group When new flow table entry is added, we search for free index in the flow group and not in the flow table, therefore we can move the allocator from flow table to flow group. In downstream patches it will enable us to lock smaller part of the steering tree. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +++++++++--------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f390828d8728..2a0b5560a8b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -384,7 +384,6 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); - ida_destroy(&ft->fte_allocator); rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; @@ -445,7 +444,7 @@ static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) WARN_ON(ret); fte->status = 0; fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&ft->fte_allocator, fte->index); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } static void del_fte(struct fs_node *node) @@ -488,6 +487,7 @@ static void del_flow_group(struct fs_node *node) ft->autogroup.num_groups--; rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); @@ -537,6 +537,7 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) kfree(fg); return ERR_PTR(ret); } + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -575,7 +576,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); - ida_init(&ft->fte_allocator); return ft; } @@ -892,7 +892,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: - ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -1003,6 +1002,7 @@ err_remove_fg: rhash_fg)); err_free_fg: rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); kfree(fg); return ERR_PTR(err); @@ -1181,18 +1181,18 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act) { - struct mlx5_flow_table *ft; struct fs_fte *fte; int index; int ret; - fs_get_obj(ft, fg->node.parent); - index = ida_simple_get(&ft->fte_allocator, fg->start_index, - fg->start_index + fg->max_ftes, + index = ida_simple_get(&fg->fte_allocator, 0, + fg->max_ftes, GFP_KERNEL); if (index < 0) return ERR_PTR(index); + index += fg->start_index; + fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) { ret = PTR_ERR(fte); @@ -1207,7 +1207,7 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, err_hash: kfree(fte); err_alloc: - ida_simple_remove(&ft->fte_allocator, index); + ida_simple_remove(&fg->fte_allocator, index - fg->start_index); return ERR_PTR(ret); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5509a752f98e..02c969c3d333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -119,7 +119,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - struct ida fte_allocator; struct rhltable fgs_hash; }; @@ -199,6 +198,7 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; + struct ida fte_allocator; u32 id; struct rhashtable ftes_hash; struct rhlist_head hash; From 46719d77d5f38b8ef04aa5a5cd91263b11d741d7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 25 Jul 2017 19:20:49 +0300 Subject: [PATCH 0294/2177] net/mlx5: Export building of matched flow groups list Refactor the code and export the build of the matched flow groups list to separate function. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 112 +++++++++++------- 1 file changed, 70 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2a0b5560a8b5..33bcaca70a69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1441,6 +1441,67 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } +struct match_list { + struct list_head list; + struct mlx5_flow_group *g; +}; + +struct match_list_head { + struct list_head list; + struct match_list first; +}; + +static void free_match_list(struct match_list_head *head) +{ + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + list_del(&iter->list); + kfree(iter); + } + } +} + +static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ + struct rhlist_head *tmp, *list; + struct mlx5_flow_group *g; + int err = 0; + + rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head->list))) { + match_head->first.g = g; + list_add_tail(&match_head->first.list, + &match_head->list); + continue; + } + + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + if (!curr_match) { + free_match_list(match_head); + err = -ENOMEM; + goto out; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head->list); + } +out: + rcu_read_unlock(); + return err; +} + static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, @@ -1450,38 +1511,17 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct rhlist_head *tmp, *list; - struct match_list { - struct list_head list; - struct mlx5_flow_group *g; - } match_list, *iter; - LIST_HEAD(match_head); + struct match_list_head match_head; + struct match_list *iter; + int err; - rcu_read_lock(); /* Collect all fgs which has a matching match_criteria */ - list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); - rhl_for_each_entry_rcu(g, tmp, list, hash) { - struct match_list *curr_match; - - if (likely(list_empty(&match_head))) { - match_list.g = g; - list_add_tail(&match_list.list, &match_head); - continue; - } - curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); - - if (!curr_match) { - rcu_read_unlock(); - rule = ERR_PTR(-ENOMEM); - goto free_list; - } - curr_match->g = g; - list_add_tail(&curr_match->list, &match_head); - } - rcu_read_unlock(); + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head, list) { + list_for_each_entry(iter, &match_head.list, list) { struct fs_fte *fte; g = iter->g; @@ -1500,7 +1540,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head, list) { + list_for_each_entry(iter, &match_head.list, list) { g = iter->g; nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); @@ -1516,19 +1556,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, rule = ERR_PTR(-ENOENT); free_list: - if (!list_empty(&match_head)) { - struct match_list *match_tmp; - - /* The most common case is having one FG. Since we want to - * optimize this case, we save the first on the stack. - * Therefore, no need to free it. - */ - list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); - list_for_each_entry_safe(iter, match_tmp, &match_head, list) { - list_del(&iter->list); - kfree(iter); - } - } + free_match_list(&match_head); return rule; } From 19f100fef4ad46f21cfdfb1eeeb63fc38c2e57f1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 26 Jul 2017 16:28:26 +0300 Subject: [PATCH 0295/2177] net/mlx5: Refactor FTE and FG creation code Split the creation code to two parts: 1) Object allocation - allocate the steering node and initialize its resources. 2) The firmware command execution. Adding active flag to each node - this flag indicates if the object exists in the hardware or not, if not we don't free the hardware resource in error flow. This change will give us the ability to take write lock on the parent node (e.g. FG for FTE creationg) only on the first part. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 356 ++++++++++-------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + 2 files changed, 190 insertions(+), 167 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 33bcaca70a69..41f26f440099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -179,14 +179,14 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - unsigned int refcount, void (*remove_func)(struct fs_node *)) { - atomic_set(&node->refcount, refcount); + atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); mutex_init(&node->lock); node->remove_func = remove_func; + node->active = false; } static void tree_add_node(struct fs_node *node, struct fs_node *parent) @@ -381,9 +381,11 @@ static void del_flow_table(struct fs_node *node) fs_get_obj(ft, node); dev = get_dev(&ft->node); - err = mlx5_cmd_destroy_flow_table(dev, ft); - if (err) - mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + if (node->active) { + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; @@ -435,18 +437,6 @@ out: } } -static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) -{ - struct mlx5_flow_table *ft; - int ret; - - ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - WARN_ON(ret); - fte->status = 0; - fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); -} - static void del_fte(struct fs_node *node) { struct mlx5_flow_table *ft; @@ -461,14 +451,20 @@ static void del_fte(struct fs_node *node) trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); - err = mlx5_cmd_delete_fte(dev, ft, - fte->index); - if (err) - mlx5_core_warn(dev, - "flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); + if (node->active) { + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + } - destroy_fte(fte, fg); + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } static void del_flow_group(struct fs_node *node) @@ -492,7 +488,7 @@ static void del_flow_group(struct fs_node *node) &fg->hash, rhash_fg); WARN_ON(err); - if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); } @@ -518,14 +514,57 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, return fte; } -static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, + u32 *match_value, + struct mlx5_flow_act *flow_act) +{ + struct fs_fte *fte; + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); + + fte = alloc_fte(flow_act, match_value, index + fg->start_index); + if (IS_ERR(fte)) { + ret = PTR_ERR(fte); + goto err_ida_remove; + } + + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_free; + + tree_init_node(&fte->node, del_fte); + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + + return fte; + +err_free: + kfree(fte); +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ERR_PTR(ret); +} + +static void dealloc_flow_group(struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); +} + +static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index) { struct mlx5_flow_group *fg; - void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, - create_fg_in, match_criteria); - u8 match_criteria_enable = MLX5_GET(create_flow_group_in, - create_fg_in, - match_criteria_enable); int ret; fg = kzalloc(sizeof(*fg), GFP_KERNEL); @@ -536,16 +575,47 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) if (ret) { kfree(fg); return ERR_PTR(ret); - } +} ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); fg->node.type = FS_TYPE_FLOW_GROUP; - fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, - start_flow_index); - fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, - end_flow_index) - fg->start_index + 1; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + return fg; } @@ -870,7 +940,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, 1, del_flow_table); + tree_init_node(&ft->node, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -882,6 +952,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = connect_flow_table(root->dev, ft, fs_prio); if (err) goto destroy_ft; + ft->node.active = true; lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); @@ -959,55 +1030,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, } EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); -/* Flow table should be locked */ -static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, - u32 *fg_in, - struct list_head - *prev_fg, - bool is_auto_fg) -{ - struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev = get_dev(&ft->node); - int err; - - if (!dev) - return ERR_PTR(-ENODEV); - - fg = alloc_flow_group(fg_in); - if (IS_ERR(fg)) - return fg; - - err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); - if (err) - goto err_free_fg; - - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) - goto err_remove_fg; - - if (ft->autogroup.active) - ft->autogroup.num_groups++; - /* Add node to tree */ - tree_init_node(&fg->node, !is_auto_fg, del_flow_group); - tree_add_node(&fg->node, &ft->node); - /* Add node to group list */ - list_add(&fg->node.list, prev_fg); - - trace_mlx5_fs_add_fg(fg); - return fg; - -err_remove_fg: - WARN_ON(rhltable_remove(&ft->fgs_hash, - &fg->hash, - rhash_fg)); -err_free_fg: - rhashtable_destroy(&fg->ftes_hash); - ida_destroy(&fg->fte_allocator); - kfree(fg); - - return ERR_PTR(err); -} - struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { @@ -1016,7 +1038,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable = MLX5_GET(create_flow_group_in, fg_in, match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_core_dev *dev = get_dev(&ft->node); struct mlx5_flow_group *fg; + int err; if (!check_valid_mask(match_criteria_enable, match_criteria)) return ERR_PTR(-EINVAL); @@ -1025,8 +1053,20 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, return ERR_PTR(-EPERM); lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); unlock_ref_node(&ft->node); + if (IS_ERR(fg)) + return fg; + + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + tree_put_node(&fg->node); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; return fg; } @@ -1111,7 +1151,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, 1, del_rule); + tree_init_node(&rule->node, del_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1167,6 +1207,7 @@ add_rule_fte(struct fs_fte *fte, if (err) goto free_handle; + fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; out: @@ -1177,59 +1218,17 @@ free_handle: return ERR_PTR(err); } -static struct fs_fte *create_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) { - struct fs_fte *fte; - int index; - int ret; - - index = ida_simple_get(&fg->fte_allocator, 0, - fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - index += fg->start_index; - - fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_alloc; - } - ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) - goto err_hash; - - return fte; - -err_hash: - kfree(fte); -err_alloc: - ida_simple_remove(&fg->fte_allocator, index - fg->start_index); - return ERR_PTR(ret); -} - -static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct list_head *prev = &ft->node.children; - unsigned int candidate_index = 0; struct mlx5_flow_group *fg; - void *match_criteria_addr; + unsigned int candidate_index = 0; unsigned int group_size = 0; - u32 *in; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return ERR_PTR(-ENOMEM); - if (ft->autogroup.num_groups < ft->autogroup.required_groups) /* We save place for flow groups in addition to max types */ group_size = ft->max_fte / (ft->autogroup.required_groups + 1); @@ -1247,25 +1246,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) { - fg = ERR_PTR(-ENOSPC); + if (candidate_index + group_size > ft->max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) goto out; - } + + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable); - MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); - MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + - group_size - 1); + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - memcpy(match_criteria_addr, match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } - fg = create_flow_group_common(ft, in, prev, true); -out: kvfree(in); - return fg; + return err; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -1368,23 +1397,17 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } fs_get_obj(ft, fg->node.parent); - fte = create_fte(fg, match_value, flow_act); + fte = alloc_insert_fte(fg, match_value, flow_act); if (IS_ERR(fte)) return (void *)fte; - tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); - destroy_fte(fte, fg); - kfree(fte); + tree_put_node(&fte->node); return handle; } - tree_add_node(&fte->node, &fg->node); - /* fte list isn't sorted */ - list_add_tail(&fte->node.list, &fg->node.children); - trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) { @@ -1571,6 +1594,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + int err; int i; if (!check_valid_spec(spec)) @@ -1586,24 +1610,22 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; - g = create_autogroup(ft, spec->match_criteria_enable, - spec->match_criteria); + g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; goto unlock; } + err = create_auto_flow_group(ft, g); + if (err) { + rule = ERR_PTR(err); + goto put_fg; + } + rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, NULL); - if (IS_ERR(rule)) { - /* Remove assumes refcount > 0 and autogroup creates a group - * with a refcount = 0. - */ - unlock_ref_node(&ft->node); - tree_get_node(&g->node); - tree_remove_node(&g->node); - return rule; - } +put_fg: + tree_put_node(&g->node); unlock: unlock_ref_node(&ft->node); return rule; @@ -1847,7 +1869,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, 1, NULL); + tree_init_node(&fs_prio->node, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1873,7 +1895,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -1998,7 +2020,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL); tree_add_node(&ns->node, NULL); return root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 02c969c3d333..6e5d25b4f8de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -82,6 +82,7 @@ struct fs_node { /* lock the node for writing and traversing */ struct mutex lock; atomic_t refcount; + bool active; void (*remove_func)(struct fs_node *); }; From c7784b1c8ab3f44dc2e643a8feb77584792c9108 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 09:07:44 +0300 Subject: [PATCH 0296/2177] net/mlx5: Replace fs_node mutex with reader/writer semaphore Currently, steering object is protected by mutex lock, replace the mutex lock with reader/writer semaphore . In this patch we still use only write semaphore. In downstream patches we will switch part of the write locks to read locks. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 41f26f440099..9406e7272807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -145,10 +145,10 @@ static struct init_tree_node { } }; -enum fs_i_mutex_lock_class { - FS_MUTEX_GRANDPARENT, - FS_MUTEX_PARENT, - FS_MUTEX_CHILD +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD }; static const struct rhashtable_params rhash_fte = { @@ -184,7 +184,7 @@ static void tree_init_node(struct fs_node *node, atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); - mutex_init(&node->lock); + init_rwsem(&node->lock); node->remove_func = remove_func; node->active = false; } @@ -208,10 +208,10 @@ static void tree_get_node(struct fs_node *node) } static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_mutex_lock_class class) + enum fs_i_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, class); + down_write_nested(&node->lock, class); atomic_inc(&node->refcount); } } @@ -219,7 +219,7 @@ static void nested_lock_ref_node(struct fs_node *node, static void lock_ref_node(struct fs_node *node) { if (node) { - mutex_lock(&node->lock); + down_write(&node->lock); atomic_inc(&node->refcount); } } @@ -228,7 +228,7 @@ static void unlock_ref_node(struct fs_node *node) { if (node) { atomic_dec(&node->refcount); - mutex_unlock(&node->lock); + up_write(&node->lock); } } @@ -1376,7 +1376,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int old_action; int ret; - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); ret = check_conflicting_ftes(fte, flow_act); if (ret) { handle = ERR_PTR(ret); @@ -1400,7 +1400,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fte = alloc_insert_fte(fg, match_value, flow_act); if (IS_ERR(fte)) return (void *)fte; - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); @@ -1548,7 +1548,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct fs_fte *fte; g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + nested_lock_ref_node(&g->node, FS_LOCK_PARENT); fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, rhash_fte); if (fte) { @@ -1566,7 +1566,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, list_for_each_entry(iter, &match_head.list, list) { g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + nested_lock_ref_node(&g->node, FS_LOCK_PARENT); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, NULL); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { @@ -1605,7 +1605,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, return ERR_PTR(-EINVAL); } - nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); + nested_lock_ref_node(&ft->node, FS_LOCK_GRANDPARENT); rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 6e5d25b4f8de..b5c079f35051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -80,7 +80,7 @@ struct fs_node { struct fs_node *parent; struct fs_node *root; /* lock the node for writing and traversing */ - struct mutex lock; + struct rw_semaphore lock; atomic_t refcount; bool active; void (*remove_func)(struct fs_node *); From bd71b08ec2ee4504bcc3b37a9283ce15e93dfacd Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 09:19:11 +0300 Subject: [PATCH 0297/2177] net/mlx5: Support multiple updates of steering rules in parallel Most of the time spent on adding new flow steering rule is executing the firmware command. The most common action is adding a new flow steering entry. In order to enhance the update rate we parallelize the commands by doing the following: 1) Replace the mutex lock with readers-writers semaphore and take the write lock only when necessary (e.g. allocating a new flow table entry index or adding a node to the parent's children list). When we try to find a suitable child in the parent's children list (e.g. search for flow group with the same match_criteria of the rule) then we only take the read lock. 2) Add versioning mechanism - each steering entity (FT, FG, FTE, DST) will have an incremental version. The version is increased when the entity is changed (e.g. when a new FTE was added to FG - the FG's version is increased). Versioning is used in order to determine if the last traverse of an entity's children is valid or a rescan under write lock is required. This support improves the insertion rate of steering rules from ~5k/sec to ~40k/sec. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 388 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 4 +- 2 files changed, 265 insertions(+), 127 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9406e7272807..e7301cf747c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = { }; -static void del_rule(struct fs_node *node); -static void del_flow_table(struct fs_node *node); -static void del_flow_group(struct fs_node *node); -static void del_fte(struct fs_node *node); +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); static struct mlx5_flow_rule * @@ -179,13 +185,15 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - void (*remove_func)(struct fs_node *)) + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) { atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); init_rwsem(&node->lock); - node->remove_func = remove_func; + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; node->active = false; } @@ -202,13 +210,22 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent) node->root = parent->root; } -static void tree_get_node(struct fs_node *node) +static int tree_get_node(struct fs_node *node) { - atomic_inc(&node->refcount); + return atomic_add_unless(&node->refcount, 1, 0); } -static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_lock_class class) +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_read_nested(&node->lock, class); + atomic_inc(&node->refcount); + } +} + +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { down_write_nested(&node->lock, class); @@ -216,7 +233,7 @@ static void nested_lock_ref_node(struct fs_node *node, } } -static void lock_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node) { if (node) { down_write(&node->lock); @@ -224,28 +241,38 @@ static void lock_ref_node(struct fs_node *node) } } -static void unlock_ref_node(struct fs_node *node) +static void up_read_ref_node(struct fs_node *node) { - if (node) { - atomic_dec(&node->refcount); - up_write(&node->lock); - } + atomic_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node) +{ + atomic_dec(&node->refcount); + up_write(&node->lock); } static void tree_put_node(struct fs_node *node) { struct fs_node *parent_node = node->parent; - lock_ref_node(parent_node); if (atomic_dec_and_test(&node->refcount)) { - if (parent_node) + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + /* Only root namespace doesn't have parent and we just + * need to free its node. + */ + down_write_ref_node(parent_node); list_del_init(&node->list); - if (node->remove_func) - node->remove_func(node); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node); + } kfree(node); node = NULL; } - unlock_ref_node(parent_node); if (!node && parent_node) tree_put_node(parent_node); } @@ -371,11 +398,10 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } -static void del_flow_table(struct fs_node *node) +static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - struct fs_prio *prio; int err; fs_get_obj(ft, node); @@ -386,12 +412,21 @@ static void del_flow_table(struct fs_node *node) if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } -static void del_rule(struct fs_node *node) +static void del_sw_hw_rule(struct fs_node *node) { struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; @@ -407,7 +442,6 @@ static void del_rule(struct fs_node *node) fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); trace_mlx5_fs_del_rule(rule); - list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); list_del(&rule->next_ft); @@ -437,7 +471,7 @@ out: } } -static void del_fte(struct fs_node *node) +static void del_hw_fte(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -448,8 +482,8 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); - trace_mlx5_fs_del_fte(fte); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); if (node->active) { err = mlx5_cmd_delete_fte(dev, ft, @@ -459,6 +493,16 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); } +} + +static void del_sw_fte(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); err = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, @@ -467,30 +511,39 @@ static void del_fte(struct fs_node *node) ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } -static void del_flow_group(struct fs_node *node) +static void del_hw_flow_group(struct fs_node *node) { struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); trace_mlx5_fs_del_fg(fg); - if (ft->autogroup.active) - ft->autogroup.num_groups--; + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); rhashtable_destroy(&fg->ftes_hash); ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active) + ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); WARN_ON(err); - if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); } static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, @@ -540,7 +593,7 @@ static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, if (ret) goto err_free; - tree_init_node(&fte->node, del_fte); + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); tree_add_node(&fte->node, &fg->node); list_add_tail(&fte->node.list, &fg->node.children); @@ -611,10 +664,11 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f return ERR_PTR(ret); } - tree_init_node(&fg->node, del_flow_group); + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); return fg; } @@ -794,7 +848,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - lock_ref_node(&fte->node); + down_write_ref_node(&fte->node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -803,7 +857,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, ft, fg->id, modify_mask, fte); - unlock_ref_node(&fte->node); + up_write_ref_node(&fte->node); return err; } @@ -940,7 +994,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, del_flow_table); + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -953,11 +1007,11 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa if (err) goto destroy_ft; ft->node.active = true; - lock_ref_node(&fs_prio->node); + down_write_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - unlock_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); return ft; destroy_ft: @@ -1052,11 +1106,11 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - lock_ref_node(&ft->node); + down_write_ref_node(&ft->node); fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, start_index, end_index, ft->node.children.prev); - unlock_ref_node(&ft->node); + up_write_ref_node(&ft->node); if (IS_ERR(fg)) return fg; @@ -1151,7 +1205,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, del_rule); + tree_init_node(&rule->node, NULL, del_sw_hw_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1209,6 +1263,7 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fte->node.version); out: return handle; @@ -1369,54 +1424,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, struct fs_fte *fte) { struct mlx5_flow_handle *handle; - struct mlx5_flow_table *ft; + int old_action; int i; + int ret; - if (fte) { - int old_action; - int ret; + ret = check_conflicting_ftes(fte, flow_act); + if (ret) + return ERR_PTR(ret); - nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); - ret = check_conflicting_ftes(fte, flow_act); - if (ret) { - handle = ERR_PTR(ret); - goto unlock_fte; - } - - old_action = fte->action; - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - trace_mlx5_fs_set_fte(fte, false); - goto add_rules; - } - } - fs_get_obj(ft, fg->node.parent); - - fte = alloc_insert_fte(fg, match_value, flow_act); - if (IS_ERR(fte)) - return (void *)fte; - nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); - handle = add_rule_fte(fte, fg, dest, dest_num, false); + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); if (IS_ERR(handle)) { - unlock_ref_node(&fte->node); - tree_put_node(&fte->node); + fte->action = old_action; return handle; } + trace_mlx5_fs_set_fte(fte, false); -add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } } -unlock_fte: - unlock_ref_node(&fte->node); return handle; } @@ -1480,8 +1511,10 @@ static void free_match_list(struct match_list_head *head) struct match_list *iter, *match_tmp; list_del(&head->first.list); + tree_put_node(&head->first.g->node); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { + tree_put_node(&iter->g->node); list_del(&iter->list); kfree(iter); } @@ -1505,6 +1538,8 @@ static int build_match_list(struct match_list_head *match_head, struct match_list *curr_match; if (likely(list_empty(&match_head->list))) { + if (!tree_get_node(&g->node)) + continue; match_head->first.g = g; list_add_tail(&match_head->first.list, &match_head->list); @@ -1517,6 +1552,10 @@ static int build_match_list(struct match_list_head *match_head, err = -ENOMEM; goto out; } + if (!tree_get_node(&g->node)) { + kfree(curr_match); + continue; + } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } @@ -1525,62 +1564,119 @@ out: return err; } +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} + static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + int ft_version) { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct match_list_head match_head; struct match_list *iter; - int err; + bool take_write = false; + struct fs_fte *fte; + u64 version; - /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec); - if (err) - return ERR_PTR(err); + list_for_each_entry(iter, match_head, list) { + nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); + ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL); + } +search_again_locked: + version = matched_fgs_get_version(match_head); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head.list, list) { - struct fs_fte *fte; + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; g = iter->g; - nested_lock_ref_node(&g->node, FS_LOCK_PARENT); - fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, - rhash_fte); - if (fte) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); - unlock_ref_node(&g->node); - goto free_list; + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) + continue; + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + } else { + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); } - unlock_ref_node(&g->node); + + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte_tmp); + up_write_ref_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node); + return rule; } /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head.list, list) { - g = iter->g; - nested_lock_ref_node(&g->node, FS_LOCK_PARENT); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, NULL); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { - unlock_ref_node(&g->node); - goto free_list; - } - unlock_ref_node(&g->node); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + list_for_each_entry(iter, match_head, list) + nested_down_write_ref_node(&iter->g->node, + FS_LOCK_PARENT); + take_write = true; } + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } + + /* Check the fgs version, for case the new FTE with the + * same values was added while the fgs weren't locked + */ + if (version != matched_fgs_get_version(match_head)) + goto search_again_locked; + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + if (!g->node.active) + continue; + fte = alloc_insert_fte(g, spec->match_value, flow_act); + if (IS_ERR(fte)) { + if (PTR_ERR(fte) == -ENOSPC) + continue; + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + return (void *)fte; + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + return rule; + } rule = ERR_PTR(-ENOENT); - -free_list: - free_match_list(&match_head); - +out: + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); return rule; } @@ -1594,6 +1690,10 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + struct match_list_head match_head; + bool take_write = false; + struct fs_fte *fte; + int version; int err; int i; @@ -1604,31 +1704,67 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); - nested_lock_ref_node(&ft->node, FS_LOCK_GRANDPARENT); - rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) - goto unlock; + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); + + if (!take_write) + up_read_ref_node(&ft->node); + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + return rule; + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; - goto unlock; + up_write_ref_node(&ft->node); + return rule; } + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node); + err = create_auto_flow_group(ft, g); - if (err) { - rule = ERR_PTR(err); - goto put_fg; + if (err) + goto err_release_fg; + + fte = alloc_insert_fte(g, spec->match_value, flow_act); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + goto err_release_fg; } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, NULL); -put_fg: + dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); tree_put_node(&g->node); -unlock: - unlock_ref_node(&ft->node); return rule; + +err_release_fg: + up_write_ref_node(&g->node); + tree_put_node(&g->node); + return ERR_PTR(err); } static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) @@ -1869,7 +2005,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, NULL); + tree_init_node(&fs_prio->node, NULL, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1895,7 +2031,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -2020,7 +2156,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, NULL); return root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b5c079f35051..875b753862b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -83,7 +83,9 @@ struct fs_node { struct rw_semaphore lock; atomic_t refcount; bool active; - void (*remove_func)(struct fs_node *); + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; }; struct mlx5_flow_rule { From f5c2ff179f51101893e42e78683b23a487929d6c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 29 Aug 2017 19:17:12 +0300 Subject: [PATCH 0298/2177] net/mlx5: Allocate FTE object without lock Allocation of new FTE is a massive operation, part of it could be done without taking the flow group write lock. Split the FTE allocation to two functions of actions which need to be under lock and action which don't have. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e7301cf747c5..bc4bbb72fa86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -546,9 +546,33 @@ static void del_sw_flow_group(struct fs_node *node) WARN_ON(err); } -static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, - u32 *match_value, - unsigned int index) +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(u32 *match_value, + struct mlx5_flow_act *flow_act) { struct fs_fte *fte; @@ -559,51 +583,13 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->flow_tag = flow_act->flow_tag; - fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; fte->modify_id = flow_act->modify_id; - return fte; -} - -static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) -{ - struct fs_fte *fte; - int index; - int ret; - - index = ida_simple_get(&fg->fte_allocator, 0, - fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - fte = alloc_fte(flow_act, match_value, index + fg->start_index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_ida_remove; - } - - ret = rhashtable_insert_fast(&fg->ftes_hash, - &fte->hash, - rhash_fte); - if (ret) - goto err_free; - tree_init_node(&fte->node, del_hw_fte, del_sw_fte); - tree_add_node(&fte->node, &fg->node); - list_add_tail(&fte->node.list, &fg->node.children); return fte; - -err_free: - kfree(fte); -err_ida_remove: - ida_simple_remove(&fg->fte_allocator, index); - return ERR_PTR(ret); } static void dealloc_flow_group(struct mlx5_flow_group *fg) @@ -1589,6 +1575,11 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, bool take_write = false; struct fs_fte *fte; u64 version; + int err; + + fte = alloc_fte(spec->match_value, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); list_for_each_entry(iter, match_head, list) { nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); @@ -1620,6 +1611,7 @@ search_again_locked: flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node); tree_put_node(&fte_tmp->node); + kfree(fte); return rule; } @@ -1655,13 +1647,14 @@ search_again_locked: if (!g->node.active) continue; - fte = alloc_insert_fte(g, spec->match_value, flow_act); - if (IS_ERR(fte)) { - if (PTR_ERR(fte) == -ENOSPC) + err = insert_fte(g, fte); + if (err) { + if (err == -ENOSPC) continue; list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - return (void *)fte; + kfree(fte); + return ERR_PTR(err); } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); @@ -1677,6 +1670,7 @@ search_again_locked: out: list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); + kfree(fte); return rule; } @@ -1746,12 +1740,18 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_insert_fte(g, spec->match_value, flow_act); + fte = alloc_fte(spec->match_value, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; } + err = insert_fte(g, fte); + if (err) { + kfree(fte); + goto err_release_fg; + } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node); rule = add_rule_fg(g, spec->match_value, flow_act, dest, From a369d4ac4dff92129ea0dfa3d66f45a830e29098 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 13:18:40 +0300 Subject: [PATCH 0299/2177] net/mlx5: Add FGs and FTEs memory pool Add memory pool allocation for flow groups and flow table entry. It is useful because these objects are not small and could be allocated/deallocated many times. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 67 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 + 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index bc4bbb72fa86..7a136ae2547a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -269,8 +269,9 @@ static void tree_put_node(struct fs_node *node) if (node->del_sw_func) node->del_sw_func(node); up_write_ref_node(parent_node); + } else { + kfree(node); } - kfree(node); node = NULL; } if (!node && parent_node) @@ -389,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) return container_of(ns, struct mlx5_flow_root_namespace, ns); } +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + static inline struct mlx5_core_dev *get_dev(struct fs_node *node) { struct mlx5_flow_root_namespace *root = find_root(node); @@ -424,6 +434,7 @@ static void del_sw_flow_table(struct fs_node *node) rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; + kfree(ft); } static void del_sw_hw_rule(struct fs_node *node) @@ -469,6 +480,7 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } + kfree(rule); } static void del_hw_fte(struct fs_node *node) @@ -497,6 +509,7 @@ static void del_hw_fte(struct fs_node *node) static void del_sw_fte(struct fs_node *node) { + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; struct fs_fte *fte; int err; @@ -509,6 +522,7 @@ static void del_sw_fte(struct fs_node *node) rhash_fte); WARN_ON(err); ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); } static void del_hw_flow_group(struct fs_node *node) @@ -529,6 +543,7 @@ static void del_hw_flow_group(struct fs_node *node) static void del_sw_flow_group(struct fs_node *node) { + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; int err; @@ -544,6 +559,7 @@ static void del_sw_flow_group(struct fs_node *node) &fg->hash, rhash_fg); WARN_ON(err); + kmem_cache_free(steering->fgs_cache, fg); } static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) @@ -571,12 +587,14 @@ err_ida_remove: return ret; } -static struct fs_fte *alloc_fte(u32 *match_value, +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, + u32 *match_value, struct mlx5_flow_act *flow_act) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct fs_fte *fte; - fte = kzalloc(sizeof(*fte), GFP_KERNEL); + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); if (!fte) return ERR_PTR(-ENOMEM); @@ -592,13 +610,15 @@ static struct fs_fte *alloc_fte(u32 *match_value, return fte; } -static void dealloc_flow_group(struct mlx5_flow_group *fg) +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) { rhashtable_destroy(&fg->ftes_hash); - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); } -static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, void *match_criteria, int start_index, int end_index) @@ -606,13 +626,13 @@ static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, struct mlx5_flow_group *fg; int ret; - fg = kzalloc(sizeof(*fg), GFP_KERNEL); + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); if (ret) { - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); } ida_init(&fg->fte_allocator); @@ -633,10 +653,11 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f int end_index, struct list_head *prev) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *fg; int ret; - fg = alloc_flow_group(match_criteria_enable, match_criteria, + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, start_index, end_index); if (IS_ERR(fg)) return fg; @@ -646,7 +667,7 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f &fg->hash, rhash_fg); if (ret) { - dealloc_flow_group(fg); + dealloc_flow_group(steering, fg); return ERR_PTR(ret); } @@ -1569,6 +1590,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, int dest_num, int ft_version) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; struct match_list *iter; @@ -1577,7 +1599,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(spec->match_value, flow_act); + fte = alloc_fte(ft, spec->match_value, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1611,7 +1633,7 @@ search_again_locked: flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node); tree_put_node(&fte_tmp->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1653,7 +1675,7 @@ search_again_locked: continue; list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return ERR_PTR(err); } @@ -1670,7 +1692,7 @@ search_again_locked: out: list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1682,6 +1704,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; struct match_list_head match_head; @@ -1740,7 +1763,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(spec->match_value, flow_act); + fte = alloc_fte(ft, spec->match_value, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1748,7 +1771,7 @@ search_again_locked: err = insert_fte(g, fte); if (err) { - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); goto err_release_fg; } @@ -2281,6 +2304,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); kfree(steering); } @@ -2386,6 +2411,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 875b753862b0..ebe184515433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -65,6 +65,8 @@ enum fs_fte_status { struct mlx5_flow_steering { struct mlx5_core_dev *dev; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; From e0a8f9de16fce34fc2957eca4c71d3ff2ac286d5 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:42 +0300 Subject: [PATCH 0300/2177] qed: Add iWARP enablement support This patch is the last of the initial iWARP patch series. It adds the possiblity to actually detect iWARP from the device and enable it in the critical locations which basically make iWARP available. It wasn't submitted until now as iWARP hadn't been accepted into the rdma tree. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 6 ++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 10 +++++----- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 5 ++++- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 1 + 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index af106be8cc08..afd07ad91631 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2069,6 +2069,12 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs); + if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) { + DP_NOTICE(p_hwfn, + "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); + p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE; + } + switch (p_hwfn->hw_info.personality) { case QED_PCI_ETH_IWARP: /* Each QP requires one connection */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 376485d99357..8b99c7d26f34 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1691,12 +1691,12 @@ qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn, case FW_MB_PARAM_GET_PF_RDMA_ROCE: *p_proto = QED_PCI_ETH_ROCE; break; - case FW_MB_PARAM_GET_PF_RDMA_BOTH: - DP_NOTICE(p_hwfn, - "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n"); - *p_proto = QED_PCI_ETH_ROCE; - break; case FW_MB_PARAM_GET_PF_RDMA_IWARP: + *p_proto = QED_PCI_ETH_IWARP; + break; + case FW_MB_PARAM_GET_PF_RDMA_BOTH: + *p_proto = QED_PCI_ETH_RDMA; + break; default: DP_NOTICE(p_hwfn, "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 6fb99518a61f..06715f7403ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -156,7 +156,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, return rc; p_hwfn->p_rdma_info = p_rdma_info; - p_rdma_info->proto = PROTOCOLID_ROCE; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + p_rdma_info->proto = PROTOCOLID_IWARP; + else + p_rdma_info->proto = PROTOCOLID_ROCE; num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, NULL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 46d0c3cb83a5..a1d33f35aad3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -377,6 +377,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, p_ramrod->personality = PERSONALITY_ISCSI; break; case QED_PCI_ETH_ROCE: + case QED_PCI_ETH_IWARP: p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: From d1abfd0b4ee2b83af88098a0c7105622c3d66e73 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:43 +0300 Subject: [PATCH 0301/2177] qed: Add iWARP out of order support iWARP requires OOO support which is already provided by the ll2 interface (until now was used only for iSCSI offload). The changes mostly include opening a ll2 dedicated connection for OOO and notifiying the FW about the handle id. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 44 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 11 +++++- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 7 +++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 9d989c96278c..568e9853cc8d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -41,6 +41,7 @@ #include "qed_rdma.h" #include "qed_reg_addr.h" #include "qed_sp.h" +#include "qed_ooo.h" #define QED_IWARP_ORD_DEFAULT 32 #define QED_IWARP_IRD_DEFAULT 32 @@ -119,6 +120,13 @@ static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod) +{ + p_ramrod->ll2_ooo_q_index = RESC_START(p_hwfn, QED_LL2_QUEUE) + + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle; +} + static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid) { int rc; @@ -1876,6 +1884,16 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; } + if (iwarp_info->ll2_ooo_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate ooo connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; + } + qed_llh_remove_mac_filter(p_hwfn, p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); return rc; @@ -1927,10 +1945,12 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; + u16 n_ooo_bufs; int rc = 0; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; iwarp_info->max_mtu = params->max_mtu; @@ -1978,6 +1998,29 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, if (rc) goto err; + /* Start OOO connection */ + data.input.conn_type = QED_LL2_TYPE_OOO; + data.input.mtu = params->max_mtu; + + n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) / + iwarp_info->max_mtu; + n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE); + + data.input.rx_num_desc = n_ooo_bufs; + data.input.rx_num_ooo_buffers = n_ooo_bufs; + + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_num_desc = QED_IWARP_LL2_OOO_DEF_TX_SIZE; + data.p_connection_handle = &iwarp_info->ll2_ooo_handle; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + if (rc) + goto err; + return rc; err: qed_iwarp_ll2_stop(p_hwfn, p_ptt); @@ -2014,6 +2057,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, qed_iwarp_async_event); + qed_ooo_setup(p_hwfn); return qed_iwarp_ll2_start(p_hwfn, params, p_ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 148ef3c33a5d..9e2bfde894df 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -47,7 +47,12 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); #define QED_IWARP_LL2_SYN_TX_SIZE (128) #define QED_IWARP_LL2_SYN_RX_SIZE (256) #define QED_IWARP_MAX_SYN_PKT_SIZE (128) -#define QED_IWARP_HANDLE_INVAL (0xff) + +#define QED_IWARP_LL2_OOO_DEF_TX_SIZE (256) +#define QED_IWARP_MAX_OOO (16) +#define QED_IWARP_LL2_OOO_MAX_RX_SIZE (16384) + +#define QED_IWARP_HANDLE_INVAL (0xff) struct qed_iwarp_ll2_buff { void *data; @@ -67,6 +72,7 @@ struct qed_iwarp_info { u8 crc_needed; u8 tcp_flags; u8 ll2_syn_handle; + u8 ll2_ooo_handle; u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; @@ -147,6 +153,9 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn); int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_rdma_start_in_params *params); +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod); + int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 06715f7403ef..4f46f2851780 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -551,10 +551,13 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, if (rc) return rc; - if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + qed_iwarp_init_fw_ramrod(p_hwfn, + &p_ent->ramrod.iwarp_init_func.iwarp); p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; - else + } else { p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + } p_params_header = &p_ramrod->params_header; p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, From 471115ab9804f45cb8e091e426c9c67fe75e41b0 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:44 +0300 Subject: [PATCH 0302/2177] qed: Fix maximum number of CQs for iWARP The maximum number of CQs supported is bound to the number of connections supported, which differs between RoCE and iWARP. This fixes a crash that occurred in iWARP when running 1000 sessions using perftest. Fixes: 67b40dccc45 ("qed: Implement iWARP initialization, teardown and qp operations") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 4f46f2851780..c8c4b3940564 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -209,11 +209,11 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, goto free_pd_map; } - /* Allocate bitmap for cq's. The maximum number of CQs is bounded to - * twice the number of QPs. + /* Allocate bitmap for cq's. The maximum number of CQs is bound to + * the number of connections we support. (num_qps in iWARP or + * num_qps/2 in RoCE). */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, - p_rdma_info->num_qps * 2, "CQ"); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, num_cons, "CQ"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate cq bitmap, rc = %d\n", rc); @@ -222,10 +222,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate bitmap for toggle bit for cq icids * We toggle the bit every time we create or resize cq for a given icid. - * The maximum number of CQs is bounded to twice the number of QPs. + * Size needs to equal the size of the cq bmap. */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, - p_rdma_info->num_qps * 2, "Toggle"); + num_cons, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate toogle bits, rc = %d\n", rc); From 1e99c497012cd8647972876f1bd18545bc907aea Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:45 +0300 Subject: [PATCH 0303/2177] qed: iWARP - Add check for errors on a SYN packet A SYN packet which arrives with errors from FW should be dropped. This required adding an additional field to the ll2 rx completion data. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 8 ++++++++ drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + include/linux/qed/qed_ll2_if.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 568e9853cc8d..8fc9c811f6e3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1733,6 +1733,14 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) memset(&cm_info, 0, sizeof(cm_info)); ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle; + + /* Check if packet was received with errors... */ + if (data->err_flags) { + DP_NOTICE(p_hwfn, "Error received on SYN packet: 0x%x\n", + data->err_flags); + goto err; + } + if (GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index c06ad4f0758e..250afa5486cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -413,6 +413,7 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_comp_rx_data *data) { data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags); + data->err_flags = le16_to_cpu(p_cqe->rx_cqe_fp.err_flags.flags); data->length.packet_length = le16_to_cpu(p_cqe->rx_cqe_fp.packet_length); data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan); diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index dd7a3b86bb9e..89fa0bbd54f3 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -101,6 +101,7 @@ struct qed_ll2_comp_rx_data { void *cookie; dma_addr_t rx_buf_addr; u16 parse_flags; + u16 err_flags; u16 vlan; bool b_last_packet; u8 connection_handle; From 3bd3b9ed1b602c065aa0b1ba109b9622afa6ff98 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Sun, 24 Sep 2017 17:41:24 +0530 Subject: [PATCH 0304/2177] net: bcm63xx_enet: Use setup_timer and mod_timer Use setup_timer and mod_timer API instead of structure assignments. This is done using Coccinelle and semantic patch used for this as follows: @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); Signed-off-by: Himanshu Jha Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index f8bbbbfca06e..c6221f04a748 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2331,11 +2331,8 @@ static int bcm_enetsw_open(struct net_device *dev) } /* start phy polling timer */ - init_timer(&priv->swphy_poll); - priv->swphy_poll.function = swphy_poll_timer; - priv->swphy_poll.data = (unsigned long)priv; - priv->swphy_poll.expires = jiffies; - add_timer(&priv->swphy_poll); + setup_timer(&priv->swphy_poll, swphy_poll_timer, (unsigned long)priv); + mod_timer(&priv->swphy_poll, jiffies); return 0; out: From 6aaae2b6c4330a46204bca042f1d2f41e8e18dea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:50 +0200 Subject: [PATCH 0305/2177] bpf: rename bpf_compute_data_end into bpf_compute_data_pointers Just do the rename into bpf_compute_data_pointers() as we'll add one more pointer here to recompute. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 9 ++++++--- kernel/bpf/sockmap.c | 4 ++-- net/bpf/test_run.c | 2 +- net/core/filter.c | 14 +++++++------- net/core/lwt_bpf.c | 2 +- net/sched/act_bpf.c | 4 ++-- net/sched/cls_bpf.c | 4 ++-- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..052bab3d62e7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -496,10 +496,13 @@ struct xdp_buff { void *data_hard_start; }; -/* compute the linear packet data range [data, data_end) which - * will be accessed by cls_bpf, act_bpf and lwt programs +/* Compute the linear packet data range [data, data_end) which + * will be accessed by various program types (cls_bpf, act_bpf, + * lwt, ...). Subsystems allowing direct data access must (!) + * ensure that cb[] area can be written to when BPF program is + * invoked (otherwise cb[] save/restore is necessary). */ -static inline void bpf_compute_data_end(struct sk_buff *skb) +static inline void bpf_compute_data_pointers(struct sk_buff *skb) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..a298d6666698 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -102,7 +102,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) skb_orphan(skb); skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; @@ -369,7 +369,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6be41a44d688..df672517b4fd 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (is_l2) __skb_push(skb, ETH_HLEN); if (is_direct_pkt_access) - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); retval = bpf_test_run(prog, skb, repeat, &duration); if (!is_l2) __skb_push(skb, ETH_HLEN); diff --git a/net/core/filter.c b/net/core/filter.c index 82edad58d066..c468e7cfad19 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1402,7 +1402,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err = __bpf_try_make_writable(skb, write_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return err; } @@ -1962,7 +1962,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, ret = skb_vlan_push(skb, vlan_proto, vlan_tci); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -1984,7 +1984,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) ret = skb_vlan_pop(skb); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2178,7 +2178,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, * need to be verified first. */ ret = bpf_skb_proto_xlat(skb, proto); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2303,7 +2303,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : bpf_skb_net_grow(skb, len_diff_abs); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2394,7 +2394,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, skb_gso_reset(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2434,7 +2434,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, skb_reset_mac_header(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return 0; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1307731ddfe4..e7e626fb87bb 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, */ preempt_disable(); rcu_read_lock(); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); rcu_read_unlock(); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c0c707eb2c96..5ef8ce8c83d4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 520c5027646a..36671b0fb125 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -99,11 +99,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); } From de8f3a83b0a0fddb2cf56e7a718127e9619ea3da Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:51 +0200 Subject: [PATCH 0306/2177] bpf: add meta pointer for direct access This work enables generic transfer of metadata from XDP into skb. The basic idea is that we can make use of the fact that the resulting skb must be linear and already comes with a larger headroom for supporting bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work on a similar principle and introduce a small helper bpf_xdp_adjust_meta() for adjusting a new pointer called xdp->data_meta. Thus, the packet has a flexible and programmable room for meta data, followed by the actual packet data. struct xdp_buff is therefore laid out that we first point to data_hard_start, then data_meta directly prepended to data followed by data_end marking the end of packet. bpf_xdp_adjust_head() takes into account whether we have meta data already prepended and if so, memmove()s this along with the given offset provided there's enough room. xdp->data_meta is optional and programs are not required to use it. The rationale is that when we process the packet in XDP (e.g. as DoS filter), we can push further meta data along with it for the XDP_PASS case, and give the guarantee that a clsact ingress BPF program on the same device can pick this up for further post-processing. Since we work with skb there, we can also set skb->mark, skb->priority or other skb meta data out of BPF, thus having this scratch space generic and programmable allows for more flexibility than defining a direct 1:1 transfer of potentially new XDP members into skb (it's also more efficient as we don't need to initialize/handle each of such new members). The facility also works together with GRO aggregation. The scratch space at the head of the packet can be multiple of 4 byte up to 32 byte large. Drivers not yet supporting xdp->data_meta can simply be set up with xdp->data_meta as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out, such that the subsequent match against xdp->data for later access is guaranteed to fail. The verifier treats xdp->data_meta/xdp->data the same way as we treat xdp->data/xdp->data_end pointer comparisons. The requirement for doing the compare against xdp->data is that it hasn't been modified from it's original address we got from ctx access. It may have a range marking already from prior successful xdp->data/xdp->data_end pointer comparisons though. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 + .../net/ethernet/cavium/thunder/nicvf_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + .../ethernet/netronome/nfp/nfp_net_common.c | 1 + drivers/net/ethernet/qlogic/qede/qede_fp.c | 1 + drivers/net/tun.c | 1 + drivers/net/virtio_net.c | 2 + include/linux/bpf.h | 1 + include/linux/filter.h | 21 +++- include/linux/skbuff.h | 68 ++++++++++- include/uapi/linux/bpf.h | 13 +- kernel/bpf/verifier.c | 114 +++++++++++++----- net/bpf/test_run.c | 1 + net/core/dev.c | 33 ++++- net/core/filter.c | 77 +++++++++++- net/core/skbuff.c | 2 + 19 files changed, 298 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index d8f0c837b72c..06ce63c00821 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, xdp.data_hard_start = *data_ptr - offset; xdp.data = *data_ptr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = *data_ptr + *len; orig_data = xdp.data; mapping = rx_buf->mapping - bp->rx_dma_offset; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 49b80da51ba7..d68478afccbf 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, xdp.data_hard_start = page_address(page); xdp.data = (void *)cpu_addr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1519dfb851d0..f426762bd83a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2107,6 +2107,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - i40e_rx_offset(rx_ring); xdp.data_end = xdp.data + size; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d962368d08d0..04bb03bda1cd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2326,6 +2326,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b97a55c827eb..8f9cb8abc497 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -762,6 +762,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp.data_hard_start = va - frags[0].page_offset; xdp.data = va; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + length; orig_data = xdp.data; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f1dd638384d3..30b3f3fbd719 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -794,6 +794,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, return false; xdp.data = va + *rx_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1c0187f0af51..e3a38be3600a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1583,6 +1583,7 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start, xdp.data_hard_start = hard_start; xdp.data = data + *off; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = data + *off + *len; orig_data = xdp.data; diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 6fc854b120b0..48ec4c56cddf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, xdp.data_hard_start = page_address(bd->data); xdp.data = xdp.data_hard_start + *data_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; /* Queues always have a full reset currently, so for the time diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2c36f6ebad79..a6e0bffe3d29 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1468,6 +1468,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, xdp.data_hard_start = buf; xdp.data = buf + pad; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dd14a4547932..fc059f193e7d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -554,6 +554,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -686,6 +687,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, data = page_address(xdp_page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + (len - vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..2b672c50f160 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -137,6 +137,7 @@ enum bpf_reg_type { PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 052bab3d62e7..911d454af107 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -487,12 +487,14 @@ struct sk_filter { struct bpf_skb_data_end { struct qdisc_skb_cb qdisc_cb; + void *data_meta; void *data_end; }; struct xdp_buff { void *data; void *data_end; + void *data_meta; void *data_hard_start; }; @@ -507,7 +509,8 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); - cb->data_end = skb->data + skb_headlen(skb); + cb->data_meta = skb->data - skb_metadata_len(skb); + cb->data_end = skb->data + skb_headlen(skb); } static inline u8 *bpf_skb_cb(struct sk_buff *skb) @@ -728,8 +731,22 @@ int xdp_do_redirect(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush_map(void); +/* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. + */ +static __always_inline void +xdp_set_data_meta_invalid(struct xdp_buff *xdp) +{ + xdp->data_meta = xdp->data + 1; +} + +static __always_inline bool +xdp_data_meta_unsupported(const struct xdp_buff *xdp) +{ + return unlikely(xdp->data_meta > xdp->data); +} + void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_redirect(u32 ifindex); struct sock *do_sk_redirect_map(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f9db5539a6fb..19e64bfb1a66 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -489,8 +489,9 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - unsigned short _unused; - unsigned char nr_frags; + __u8 __unused; + __u8 meta_len; + __u8 nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ @@ -3400,6 +3401,69 @@ static inline ktime_t net_invalid_timestamp(void) return 0; } +static inline u8 skb_metadata_len(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->meta_len; +} + +static inline void *skb_metadata_end(const struct sk_buff *skb) +{ + return skb_mac_header(skb); +} + +static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b, + u8 meta_len) +{ + const void *a = skb_metadata_end(skb_a); + const void *b = skb_metadata_end(skb_b); + /* Using more efficient varaiant than plain call to memcmp(). */ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + u64 diffs = 0; + + switch (meta_len) { +#define __it(x, op) (x -= sizeof(u##op)) +#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) + case 32: diffs |= __it_diff(a, b, 64); + case 24: diffs |= __it_diff(a, b, 64); + case 16: diffs |= __it_diff(a, b, 64); + case 8: diffs |= __it_diff(a, b, 64); + break; + case 28: diffs |= __it_diff(a, b, 64); + case 20: diffs |= __it_diff(a, b, 64); + case 12: diffs |= __it_diff(a, b, 64); + case 4: diffs |= __it_diff(a, b, 32); + break; + } + return diffs; +#else + return memcmp(a - meta_len, b - meta_len, meta_len); +#endif +} + +static inline bool skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b) +{ + u8 len_a = skb_metadata_len(skb_a); + u8 len_b = skb_metadata_len(skb_b); + + if (!(len_a | len_b)) + return false; + + return len_a != len_b ? + true : __skb_metadata_differs(skb_a, skb_b, len_a); +} + +static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len) +{ + skb_shinfo(skb)->meta_len = meta_len; +} + +static inline void skb_metadata_clear(struct sk_buff *skb) +{ + skb_metadata_set(skb, 0); +} + struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..e43491ac4823 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -582,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -638,6 +644,7 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -715,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -723,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -783,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b914fbe1383e..f849eca36052 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -177,6 +177,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...) va_end(args); } +static bool type_is_pkt_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_PACKET || + type == PTR_TO_PACKET_META; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -187,6 +193,7 @@ static const char * const reg_type_str[] = { [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", [PTR_TO_STACK] = "fp", [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", [PTR_TO_PACKET_END] = "pkt_end", }; @@ -226,7 +233,7 @@ static void print_verifier_state(struct bpf_verifier_state *state) verbose("(id=%d", reg->id); if (t != SCALAR_VALUE) verbose(",off=%d", reg->off); - if (t == PTR_TO_PACKET) + if (type_is_pkt_pointer(t)) verbose(",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || @@ -519,6 +526,31 @@ static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno) __mark_reg_known_zero(regs + regno); } +static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) +{ + return type_is_pkt_pointer(reg->type); +} + +static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) +{ + return reg_is_pkt_pointer(reg) || + reg->type == PTR_TO_PACKET_END; +} + +/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ +static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, + enum bpf_reg_type which) +{ + /* The register can already have a range from prior markings. + * This is fine as long as it hasn't been advanced from its + * origin. + */ + return reg->type == which && + reg->id == 0 && + reg->off == 0 && + tnum_equals_const(reg->var_off, 0); +} + /* Attempts to improve min/max values based on var_off information */ static void __update_reg_bounds(struct bpf_reg_state *reg) { @@ -702,6 +734,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: + case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: case CONST_PTR_TO_MAP: return true; @@ -1047,7 +1080,10 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, switch (reg->type) { case PTR_TO_PACKET: - /* special case, because of NET_IP_ALIGN */ + case PTR_TO_PACKET_META: + /* Special case, because of NET_IP_ALIGN. Given metadata sits + * right in front, treat it the very same way. + */ return check_pkt_ptr_alignment(reg, off, size, strict); case PTR_TO_MAP_VALUE: pointer_desc = "value "; @@ -1124,8 +1160,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a - * PTR_TO_PACKET[_END]. In the latter case, we know - * the offset is zero. + * PTR_TO_PACKET[_META,_END]. In the latter + * case, we know the offset is zero. */ if (reg_type == SCALAR_VALUE) mark_reg_unknown(state->regs, value_regno); @@ -1170,7 +1206,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else { err = check_stack_read(state, off, size, value_regno); } - } else if (reg->type == PTR_TO_PACKET) { + } else if (reg_is_pkt_pointer(reg)) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { verbose("cannot write into packet\n"); return -EACCES; @@ -1310,6 +1346,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, switch (reg->type) { case PTR_TO_PACKET: + case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size); case PTR_TO_MAP_VALUE: return check_map_access(env, regno, reg->off, access_size); @@ -1342,7 +1379,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && + if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) { verbose("helper access to the packet is not allowed\n"); return -EACCES; @@ -1351,7 +1388,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; - if (type != PTR_TO_PACKET && type != expected_type) + if (!type_is_pkt_pointer(type) && + type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_SIZE || arg_type == ARG_CONST_SIZE_OR_ZERO) { @@ -1375,7 +1413,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (register_is_null(*reg)) /* final test in check_stack_boundary() */; - else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE && + else if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; @@ -1401,7 +1440,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - if (type == PTR_TO_PACKET) + if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, meta->map_ptr->key_size); else @@ -1417,7 +1456,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - if (type == PTR_TO_PACKET) + if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, meta->map_ptr->value_size); else @@ -1590,8 +1629,8 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid, - * so turn them into unknown SCALAR_VALUE. +/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] + * are now invalid, so turn them into unknown SCALAR_VALUE. */ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { @@ -1600,18 +1639,15 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) int i; for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == PTR_TO_PACKET || - regs[i].type == PTR_TO_PACKET_END) + if (reg_is_pkt_pointer_any(®s[i])) mark_reg_unknown(regs, i); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; - if (reg->type != PTR_TO_PACKET && - reg->type != PTR_TO_PACKET_END) - continue; - __mark_reg_unknown(reg); + if (reg_is_pkt_pointer_any(reg)) + __mark_reg_unknown(reg); } } @@ -1871,7 +1907,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; - if (ptr_reg->type == PTR_TO_PACKET) { + if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ dst_reg->range = 0; @@ -1931,7 +1967,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; - if (ptr_reg->type == PTR_TO_PACKET) { + if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ if (smin_val < 0) @@ -2421,7 +2457,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } static void find_good_pkt_pointers(struct bpf_verifier_state *state, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type) { struct bpf_reg_state *regs = state->regs, *reg; int i; @@ -2483,7 +2520,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) + if (regs[i].type == type && regs[i].id == dst_reg->id) /* keep the maximum range already checked */ regs[i].range = max_t(u16, regs[i].range, dst_reg->off); @@ -2491,7 +2528,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; - if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) + if (reg->type == type && reg->id == dst_reg->id) reg->range = max_t(u16, reg->range, dst_reg->off); } } @@ -2856,19 +2893,39 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(this_branch, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(other_branch, dst_reg); + find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], + PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], + PTR_TO_PACKET); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { + find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && + dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { + find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + regs[insn->src_reg].type == PTR_TO_PACKET_META) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], + PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && + reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + regs[insn->src_reg].type == PTR_TO_PACKET_META) { + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], + PTR_TO_PACKET_META); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; @@ -3298,8 +3355,9 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; /* Check our ids match any regs they're supposed to */ return check_ids(rold->id, rcur->id, idmap); + case PTR_TO_PACKET_META: case PTR_TO_PACKET: - if (rcur->type != PTR_TO_PACKET) + if (rcur->type != rold->type) return false; /* We must have at least as much range as the old ptr * did, so that any accesses which were safe before are diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index df672517b4fd..a86e6687026e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data_hard_start = data; xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + size; retval = bpf_test_run(prog, &xdp, repeat, &duration); diff --git a/net/core/dev.c b/net/core/dev.c index 97abddd9039a..e350c768d4b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3864,8 +3864,8 @@ drop: static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - u32 act = XDP_DROP; void *orig_data; int hlen, off; u32 mac_len; @@ -3876,8 +3876,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (skb_cloned(skb)) return XDP_PASS; - if (skb_linearize(skb)) - goto do_drop; + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (troom > 0 && __skb_linearize(skb)) + goto do_drop; + } /* The XDP program wants to see the packet starting at the MAC * header. @@ -3885,6 +3902,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; xdp.data = skb->data - mac_len; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; @@ -3902,10 +3920,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: __skb_push(skb, mac_len); - /* fall through */ - case XDP_PASS: break; - + case XDP_PASS: + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); + break; default: bpf_warn_invalid_xdp_action(act); /* fall through */ @@ -4695,6 +4715,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); diff --git a/net/core/filter.c b/net/core/filter.c index c468e7cfad19..9b6e7e84aafd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2447,14 +2447,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) +{ + return xdp_data_meta_unsupported(xdp) ? 0 : + xdp->data - xdp->data_meta; +} + BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + unsigned long metalen = xdp_get_metalen(xdp); + void *data_start = xdp->data_hard_start + metalen; void *data = xdp->data + offset; - if (unlikely(data < xdp->data_hard_start || + if (unlikely(data < data_start || data > xdp->data_end - ETH_HLEN)) return -EINVAL; + if (metalen) + memmove(xdp->data_meta + offset, + xdp->data_meta, metalen); + xdp->data_meta += offset; xdp->data = data; return 0; @@ -2468,6 +2480,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) +{ + void *meta = xdp->data_meta + offset; + unsigned long metalen = xdp->data - meta; + + if (xdp_data_meta_unsupported(xdp)) + return -ENOTSUPP; + if (unlikely(meta < xdp->data_hard_start || + meta > xdp->data)) + return -EINVAL; + if (unlikely((metalen & (sizeof(__u32) - 1)) || + (metalen > 32))) + return -EACCES; + + xdp->data_meta = meta; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { + .func = bpf_xdp_adjust_meta, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static int __bpf_tx_xdp(struct net_device *dev, struct bpf_map *map, struct xdp_buff *xdp, @@ -2692,7 +2731,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head) + func == bpf_xdp_adjust_head || + func == bpf_xdp_adjust_meta) return true; return false; @@ -3288,6 +3328,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_xdp_adjust_meta: + return &bpf_xdp_adjust_meta_proto; case BPF_FUNC_redirect: return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: @@ -3418,6 +3460,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): if (size != size_default) return false; @@ -3444,6 +3487,7 @@ static bool sk_filter_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; @@ -3468,6 +3512,7 @@ static bool lwt_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, data_meta): return false; } @@ -3586,6 +3631,9 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3619,6 +3667,9 @@ static bool xdp_is_valid_access(int off, int size, case offsetof(struct xdp_md, data): info->reg_type = PTR_TO_PACKET; break; + case offsetof(struct xdp_md, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case offsetof(struct xdp_md, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3677,6 +3728,12 @@ static bool sk_skb_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + return false; + } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): @@ -3689,8 +3746,6 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - return false; case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; @@ -3847,6 +3902,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, offsetof(struct sk_buff, data)); break; + case offsetof(struct __sk_buff, data_meta): + off = si->off; + off -= offsetof(struct __sk_buff, data_meta); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); @@ -4095,6 +4159,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; + case offsetof(struct xdp_md, data_meta): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, data_meta)); + break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), si->dst_reg, si->src_reg, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 000ce735fa8d..d98c2e3ce2bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1509,6 +1509,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + skb_metadata_clear(skb); + /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). From ac29991ba137cc0e3b0f647fb41e79300230f15c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:52 +0200 Subject: [PATCH 0307/2177] bpf: update bpf.h uapi header for tools Looks like a couple of updates missed to get carried into tools/include/uapi/, so copy the bpf.h header as usual to pull in latest updates. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 45 ++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 461811e57140..e43491ac4823 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -143,12 +143,6 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -enum bpf_sockmap_flags { - BPF_SOCKMAP_UNSPEC, - BPF_SOCKMAP_STRPARSER, - __MAX_BPF_SOCKMAP_FLAG -}; - /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -368,9 +362,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) + * redirect to endpoint in map + * @map: pointer to dev map + * @key: index in map to lookup + * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -577,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -632,7 +643,8 @@ union bpf_attr { FN(skb_adjust_room), \ FN(redirect_map), \ FN(sk_redirect_map), \ - FN(sock_map_update), + FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -710,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -718,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -753,20 +768,23 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will result - * in packet drop. + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook @@ -775,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { From 22c8852624fc90a90709d1237625ca57999c5092 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:53 +0200 Subject: [PATCH 0308/2177] bpf: improve selftests and add tests for meta pointer Add various test_verifier selftests, and a simple xdp/tc functional test that is being attached to veths. Also let new versions of clang use the recently added -mcpu=probe support [1] for the BPF target, so that it can probe the underlying kernel for BPF insn set extensions. We could also just set this options always, where older versions just ignore it and give a note to the user that the -mcpu value is not supported, but given emitting the note cannot be turned off from clang side lets not confuse users running selftests with it, thus fallback to the default generic one when we see that clang doesn't support it. Also allow CPU option to be overridden in the Makefile from command line. [1] https://github.com/llvm-mirror/llvm/commit/d7276a40d87b89aed89978dec6457a5b8b3a0db5 Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 21 +- tools/testing/selftests/bpf/bpf_helpers.h | 2 + tools/testing/selftests/bpf/test_verifier.c | 247 +++++++++++++++++++ tools/testing/selftests/bpf/test_xdp_meta.c | 53 ++++ tools/testing/selftests/bpf/test_xdp_meta.sh | 51 ++++ 5 files changed, 370 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_xdp_meta.c create mode 100755 tools/testing/selftests/bpf/test_xdp_meta.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f4b23d697448..924af8d79bde 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,9 +15,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o + test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ + sockmap_verdict_prog.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh include ../lib.mk @@ -34,8 +35,20 @@ $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ CLANG ?= clang +LLC ?= llc + +PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +# Let newer LLVM versions transparently probe the kernel for availability +# of full BPF instruction set. +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif %.o: %.c $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ - -O2 -target bpf -c $< -o $@ + -Wno-compare-distinct-pointer-types \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 4875395b0b52..a56053db26f5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -62,6 +62,8 @@ static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; +static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_meta; static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..a0426147523d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,253 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "meta access, test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet, off=-8", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_meta), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 !read_ok", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test10", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test11", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test12", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c new file mode 100644 index 000000000000..8d0182650653 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.c @@ -0,0 +1,53 @@ +#include +#include +#include + +#include "bpf_helpers.h" + +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem + +SEC("t") +int ing_cls(struct __sk_buff *ctx) +{ + __u8 *data, *data_meta, *data_end; + __u32 diff = 0; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return TC_ACT_SHOT; + + diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; + diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + + return diff ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("x") +int ing_xdp(struct xdp_md *ctx) +{ + __u8 *data, *data_meta, *data_end; + int ret; + + ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + if (ret < 0) + return XDP_DROP; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return XDP_DROP; + + __builtin_memcpy(data_meta, data, ETH_ALEN); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh new file mode 100755 index 000000000000..307aa856cee3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_xdp_meta [PASS]"; + else + echo "selftests: test_xdp_meta [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +ip link set dev lo xdp off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdp support" + exit 0 +fi +set -e + +ip netns add ns1 +ip netns add ns2 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 type veth peer name veth2 + +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 + +ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1 +ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2 + +ip netns exec ns1 tc qdisc add dev veth1 clsact +ip netns exec ns2 tc qdisc add dev veth2 clsact + +ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t + +ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x +ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x + +ip netns exec ns1 ip link set dev veth1 up +ip netns exec ns2 ip link set dev veth2 up + +ip netns exec ns1 ping -c 1 10.1.1.22 +ip netns exec ns2 ping -c 1 10.1.1.11 + +exit 0 From 65d88fd0baaa5c9def9383ac696097911d4ceb73 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:54 +0200 Subject: [PATCH 0309/2177] bpf, nfp: add meta data support Implement support for transferring XDP meta data into skb for nfp driver; before calling into the program, xdp.data_meta points to xdp.data, where on program return with pass verdict, we call into skb_metadata_set(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 40 +++++++------------ 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index e3a38be3600a..d2f73feb8497 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1574,27 +1574,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, return true; } -static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start, - unsigned int *off, unsigned int *len) -{ - struct xdp_buff xdp; - void *orig_data; - int ret; - - xdp.data_hard_start = hard_start; - xdp.data = data + *off; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = data + *off + *len; - - orig_data = xdp.data; - ret = bpf_prog_run_xdp(prog, &xdp); - - *len -= xdp.data - orig_data; - *off += xdp.data - orig_data; - - return ret; -} - /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1630,6 +1609,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_meta_parsed meta; struct net_device *netdev; dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; void *new_frag; idx = D_IDX(rx_ring, rx_ring->rd_p); @@ -1708,16 +1688,24 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && dp->bpf_offload_xdp) && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; - void *hard_start; + struct xdp_buff xdp; int act; - hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data = orig_data; + xdp.data_meta = orig_data; + xdp.data_end = orig_data + pkt_len; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len -= xdp.data - orig_data; + pkt_off += xdp.data - orig_data; - act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start, - &pkt_off, &pkt_len); switch (act) { case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; break; case XDP_TX: dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; @@ -1785,6 +1773,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxd->rxd.vlan)); + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); napi_gro_receive(&rx_ring->r_vec->napi, skb); } From 366a88fe2f40d6772985ec78cdd34df7f109bb88 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:55 +0200 Subject: [PATCH 0310/2177] bpf, ixgbe: add meta data support Implement support for transferring XDP meta data into skb for ixgbe driver; before calling into the program, xdp.data_meta points to xdp.data, where on program return with pass verdict, we call into skb_metadata_set(). We implement this for the default ixgbe_build_skb() variant. For the ixgbe_construct_skb() that is used when legacy-rx buffer mananagement mode is turned on via ethtool, I found that XDP gets 0 headroom, so neither xdp_adjust_head() nor xdp_adjust_meta() can be used with this. Just add a comment with explanation for this operating mode. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 04bb03bda1cd..3942c6208745 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2133,6 +2133,21 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, #if L1_CACHE_BYTES < 128 prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbe_build_skb(). The latter + * provides us currently with 192 bytes of headroom. + * + * For ixgbe_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * change in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); @@ -2165,6 +2180,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, struct xdp_buff *xdp, union ixgbe_adv_rx_desc *rx_desc) { + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else @@ -2174,10 +2190,14 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(xdp->data); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points extactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(xdp->data + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif /* build an skb to around the page buffer */ @@ -2188,6 +2208,8 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, /* update pointers within the skb to store the data */ skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* record DMA address if this is the start of a chain of buffers */ if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) @@ -2326,7 +2348,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; - xdp_set_data_meta_invalid(&xdp); + xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; From be2336ebfd7a1aec597a26f086fc4235ab87dd2c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:21 +0200 Subject: [PATCH 0311/2177] mlxsw: spectrum_dpipe: Fix indentation in header description Fix indentation in mlxsw_meta header's description. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..91648094ab4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -46,18 +46,21 @@ enum mlxsw_sp_field_metadata_id { }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { - { .name = "erif_port", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, - .bitwidth = 32, - .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + { + .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, }, - { .name = "l3_forward", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, - .bitwidth = 1, + { + .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, }, - { .name = "l3_drop", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, - .bitwidth = 1, + { + .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, }, }; From c0859d697c258f7c864e81bc1f83d1c274e7cf4c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:22 +0200 Subject: [PATCH 0312/2177] mlxsw: Add fields for mlxsw's meta header for adjacency table This patch adds field for mlxsw's meta header which will be used to describe the match/action behavior of the adjacency table. The fields are: 1. Adj_index - The global index of the nexthop group in the adjacency table. 2. Adj_hash_index - Local index offset which is based on packets hash mod the nexthop group size. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 91648094ab4c..9253273a5c03 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -43,6 +43,8 @@ enum mlxsw_sp_field_metadata_id { MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { @@ -62,6 +64,16 @@ static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, .bitwidth = 1, }, + { + .name = "adj_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + .bitwidth = 32, + }, + { + .name = "adj_hash_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, + .bitwidth = 32, + }, }; enum mlxsw_sp_dpipe_header_id { From dbe4598c1e929a24dc352a7dc523a3cc22a093f2 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:23 +0200 Subject: [PATCH 0313/2177] mlxsw: spectrum_router: Keep nexthops in a linked list Keep nexthops in a linked list for easy access. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0bd93dc88ffa..0cd4b2a7d9d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -78,6 +78,7 @@ struct mlxsw_sp_router { struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; + struct list_head nexthop_list; struct { struct mlxsw_sp_lpm_tree *trees; unsigned int tree_count; @@ -2028,6 +2029,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; + struct list_head router_list_node; struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group * this belongs to */ @@ -2784,6 +2786,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; @@ -2807,6 +2811,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -4045,6 +4050,8 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; nh->ifindex = dev->ifindex; @@ -4056,6 +4063,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, @@ -5990,6 +5998,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list); err = mlxsw_sp_lpm_init(mlxsw_sp); if (err) goto err_lpm_init; From ec2437f42b44edc84054feb943d49e8030154c38 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:24 +0200 Subject: [PATCH 0314/2177] mlxsw: spectrum_router: Use helper to check for last neighbor Use list_is_last helper to check for last neighbor. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0cd4b2a7d9d0..65e59a989084 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1317,7 +1317,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, typeof(*neigh_entry), rif_list_node); } - if (neigh_entry->rif_list_node.next == &rif->neigh_list) + if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list)) return NULL; return list_next_entry(neigh_entry, rif_list_node); } From c556cd28930661f337d7989fe74ac31871fd3888 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:25 +0200 Subject: [PATCH 0315/2177] mlxsw: spectrum_router: Add helpers for nexthop access This is done as a preparation before introducing the ability to dump the adjacency table via dpipe, and to count the table size. The current table implementation avoids tunnel entries, thus a helper for checking if the nexthop group contains tunnel entries is also provided. The mlxsw's nexthop representative struct stays private to the router module. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 71 +++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_router.h | 12 ++++ 2 files changed, 83 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 65e59a989084..c062b4f666e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2068,6 +2068,77 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh) { + if (list_empty(&router->nexthop_list)) + return NULL; + else + return list_first_entry(&router->nexthop_list, + typeof(*nh), router_list_node); + } + if (list_is_last(&nh->router_list_node, &router->nexthop_list)) + return NULL; + return list_next_entry(nh, router_list_node); +} + +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +{ + return nh->offloaded; +} + +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->offloaded) + return NULL; + return nh->neigh_entry->ha; +} + +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + u32 adj_hash_index = 0; + int i; + + if (!nh->offloaded || !nh_grp->adj_index_valid) + return -EINVAL; + + *p_adj_index = nh_grp->adj_index; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter == nh) + break; + if (nh_iter->offloaded) + adj_hash_index++; + } + + *p_adj_hash_index = adj_hash_index; + return 0; +} + +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) +{ + return nh->rif; +} + +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) + return true; + } + return false; +} + static struct fib_info * mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index ae4c99b3f2fc..d6951d516cf4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -62,6 +62,7 @@ enum mlxsw_sp_rif_counter_dir { }; struct mlxsw_sp_neigh_entry; +struct mlxsw_sp_nexthop; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); @@ -108,5 +109,16 @@ union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev); __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index); +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); +#define mlxsw_sp_nexthop_for_each(nh, router) \ + for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ + nh = mlxsw_sp_nexthop_next(router, nh)) #endif /* _MLXSW_ROUTER_H_*/ From c538adb3c6e77e0f6563b71923a81de182b5132c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:26 +0200 Subject: [PATCH 0316/2177] mlxsw: spectrum_dpipe: Add initial support for the router adjacency table Add initial support for router adjacency table. The table does lookup based on the nexthop-group index and the local nexthop offset. After locating the nexthop entry it sets the destination MAC address and the egress RIF. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 100 +++++++++++++++++- .../ethernet/mellanox/mlxsw/spectrum_dpipe.h | 1 + 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 9253273a5c03..ca16f8924c0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -841,6 +841,97 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST6); } +static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + return devlink_dpipe_match_put(skb, &match); +} + +static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_action_put(skb, &action); +} + +static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop *nh; + u64 size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) + if (mlxsw_sp_nexthop_offload(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) + size++; + return size; +} + +static u64 +mlxsw_sp_dpipe_table_adj_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + u64 size; + + rtnl_lock(); + size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); + rtnl_unlock(); + + return size; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { + .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .size_get = mlxsw_sp_dpipe_table_adj_size_get, +}; + +static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + &mlxsw_sp_dpipe_table_adj_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -861,8 +952,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); if (err) goto err_host6_table_init; - return 0; + err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp); + if (err) + goto err_adj_table_init; + + return 0; +err_adj_table_init: + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); err_host6_table_init: mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: @@ -876,6 +973,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index 283fde4e6783..815d543cf114 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" +#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj" #endif /* _MLXSW_PIPELINE_H_*/ From f4de25fb530c936af7c3d9a158a7dde86adb2848 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:27 +0200 Subject: [PATCH 0317/2177] mlxsw: reg: Add support for counters on RATR In order to add the ability for setting counters on nexthops the RATR register should be extended. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 44 ++++++++++++++++++----- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 17eba19100de..d44e673a4c4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4549,6 +4549,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_ratr_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8); + +/* reg_ratr_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, @@ -4576,6 +4597,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); } +static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, + bool counter_enable) +{ + enum mlxsw_reg_flow_counter_set_type set_type; + + if (counter_enable) + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES; + else + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT; + + mlxsw_reg_ratr_counter_index_set(payload, counter_index); + mlxsw_reg_ratr_counter_set_type_set(payload, set_type); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -5297,15 +5332,6 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); -enum mlxsw_reg_flow_counter_set_type { - /* No count */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW From a5390278a5eb573b76d2d28ce576b6b62c2200be Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:28 +0200 Subject: [PATCH 0318/2177] mlxsw: spectrum: Add support for setting counters on nexthops Add support for setting counters on nexthops based on dpipe's adjacency table counter status. This patch also adds the ability for getting the counter value, which will be used by the dpipe adjacency table dump implementation in the next patches. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 52 +++++++++++++++++-- .../ethernet/mellanox/mlxsw/spectrum_router.h | 2 + 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c062b4f666e3..a75064a8ba80 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2052,6 +2052,8 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_ipip_entry *ipip_entry; }; + unsigned int counter_index; + bool counter_valid; }; struct mlxsw_sp_nexthop_group { @@ -2068,6 +2070,41 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) + return; + + nh->counter_valid = true; +} + +static void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); + nh->counter_valid = false; +} + +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter) +{ + if (!nh->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, + p_counter, NULL); +} + struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh) { @@ -2396,8 +2433,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; @@ -2406,6 +2443,11 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, true, MLXSW_REG_RATR_TYPE_ETHERNET, adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + if (nh->counter_valid) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + else + mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } @@ -2440,7 +2482,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_mac_update + err = mlxsw_sp_nexthop_update (mlxsw_sp, adj_index, nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: @@ -2857,6 +2899,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); if (!dev) @@ -2883,6 +2926,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -4120,6 +4164,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -4135,6 +4180,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index d6951d516cf4..a6e86590939f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -120,5 +120,7 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); #define mlxsw_sp_nexthop_for_each(nh, router) \ for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ nh = mlxsw_sp_nexthop_next(router, nh)) +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter); #endif /* _MLXSW_ROUTER_H_*/ From 190d38a52a73ef8ac05c1931dda730e0f9b79095 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:29 +0200 Subject: [PATCH 0319/2177] mlxsw: spectrum_dpipe: Add support for adjacency table dump Add support for adjacency table dump. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index ca16f8924c0a..e6755a96b269 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -895,6 +895,243 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) return size; } +enum mlxsw_sp_dpipe_table_adj_match { + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, +}; + +enum mlxsw_sp_dpipe_table_adj_action { + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *actions) +{ + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int +mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_values, + struct devlink_dpipe_action *actions) +{ struct devlink_dpipe_value *action_value; + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT; + + entry->action_values = action_values; + entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, + u32 adj_index, u32 adj_hash_index, + unsigned char *ha, + struct mlxsw_sp_rif *rif) +{ + struct devlink_dpipe_value *value; + u32 *p_rif_value; + u32 *p_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + p_index = value->value; + *p_index = adj_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + p_index = value->value; + *p_index = adj_hash_index; + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + ether_addr_copy(value->value, ha); + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + p_rif_value = value->value; + *p_rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; +} + +static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct devlink_dpipe_entry *entry) +{ + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + unsigned char *ha = mlxsw_sp_nexthop_ha(nh); + u32 adj_hash_index = 0; + u32 adj_index = 0; + int err; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, + adj_hash_index, ha, rif); + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp_nexthop *nh; + int entry_index = 0; + int nh_count_max; + int nh_count = 0; + int nh_skip; + int j; + int err; + + rtnl_lock(); + nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + nh_skip = nh_count; + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + if (nh_count < nh_skip) + goto skip; + + mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry); + entry->index = entry_index; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + entry_index++; + j++; +skip: + nh_count++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (nh_count != nh_count_max) + goto start_again; + rtnl_unlock(); + + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(match_values[0])); + memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(actions[0])); + memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(action_values[0])); + + mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions); + err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry, + match_values, matches, + action_values, actions); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + static u64 mlxsw_sp_dpipe_table_adj_size_get(void *priv) { @@ -911,6 +1148,7 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv) static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, .size_get = mlxsw_sp_dpipe_table_adj_size_get, }; From 427e652aa34d90960f729c0b902c3c4a8a821b2e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:30 +0200 Subject: [PATCH 0320/2177] mlxsw: spectrum_dpipe: Add support for controlling nexthop counters Add support for controlling nexthop counters via dpipe. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 24 +++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 12 +++++----- .../ethernet/mellanox/mlxsw/spectrum_router.h | 6 +++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index e6755a96b269..a056f23d3a0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1132,6 +1132,29 @@ out: return err; } +static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_nexthop *nh; + u32 adj_hash_index = 0; + u32 adj_index = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + if (enable) + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_update(mlxsw_sp, + adj_index + adj_hash_index, nh); + } + return 0; +} + static u64 mlxsw_sp_dpipe_table_adj_size_get(void *priv) { @@ -1149,6 +1172,7 @@ static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update, .size_get = mlxsw_sp_dpipe_table_adj_size_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a75064a8ba80..321f7356073c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2070,8 +2070,8 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; -static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { struct devlink *devlink; @@ -2086,8 +2086,8 @@ static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, nh->counter_valid = true; } -static void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { if (!nh->counter_valid) return; @@ -2433,8 +2433,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index a6e86590939f..3d449180b035 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -122,5 +122,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); nh = mlxsw_sp_nexthop_next(router, nh)) int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter); +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); #endif /* _MLXSW_ROUTER_H_*/ From f4344e0a48121d07cf8f69415278e84c39dbe9bf Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:31 -0400 Subject: [PATCH 0321/2177] net: dsa: return -ENODEV is there is no slave PHY Instead of returning -EOPNOTSUPP when a slave device has no PHY, directly return -ENODEV as ethtool and phylib do. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bd51ef56ec5b..79c5a0cd9923 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -266,10 +266,10 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return phy_mii_ioctl(p->phy, ifr, cmd); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return phy_mii_ioctl(p->phy, ifr, cmd); } static int dsa_slave_port_attr_set(struct net_device *dev, @@ -429,7 +429,7 @@ dsa_slave_get_link_ksettings(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); if (!p->phy) - return -EOPNOTSUPP; + return -ENODEV; phy_ethtool_ksettings_get(p->phy, cmd); @@ -442,10 +442,10 @@ dsa_slave_set_link_ksettings(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return phy_ethtool_ksettings_set(p->phy, cmd); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return phy_ethtool_ksettings_set(p->phy, cmd); } static void dsa_slave_get_drvinfo(struct net_device *dev, @@ -481,22 +481,22 @@ static int dsa_slave_nway_reset(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return genphy_restart_aneg(p->phy); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return genphy_restart_aneg(p->phy); } static u32 dsa_slave_get_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) { - genphy_update_link(p->phy); - return p->phy->link; - } + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + genphy_update_link(p->phy); + + return p->phy->link; } static int dsa_slave_get_eeprom_len(struct net_device *dev) From 0115dcd1787d2d1c50719fab98d6bcb0c17931a1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:32 -0400 Subject: [PATCH 0322/2177] net: dsa: use slave device phydev There is no need to store a phy_device in dsa_slave_priv since net_device already provides one. Simply s/p->phy/dev->phydev/. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 - net/dsa/slave.c | 116 +++++++++++++++++++++------------------------ 2 files changed, 53 insertions(+), 64 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 0298a0f6a349..eccc62776283 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -79,7 +79,6 @@ struct dsa_slave_priv { * The phylib phy_device pointer for the PHY connected * to this port. */ - struct phy_device *phy; phy_interface_t phy_interface; int old_link; int old_pause; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 79c5a0cd9923..4ea1c6eb0da8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -96,12 +96,12 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - err = dsa_port_enable(dp, p->phy); + err = dsa_port_enable(dp, dev->phydev); if (err) goto clear_promisc; - if (p->phy) - phy_start(p->phy); + if (dev->phydev) + phy_start(dev->phydev); return 0; @@ -124,10 +124,10 @@ static int dsa_slave_close(struct net_device *dev) struct net_device *master = dsa_master_netdev(p); struct dsa_port *dp = p->dp; - if (p->phy) - phy_stop(p->phy); + if (dev->phydev) + phy_stop(dev->phydev); - dsa_port_disable(dp, p->phy); + dsa_port_disable(dp, dev->phydev); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); @@ -264,12 +264,10 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(p->phy, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); } static int dsa_slave_port_attr_set(struct net_device *dev, @@ -426,12 +424,10 @@ static int dsa_slave_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - phy_ethtool_ksettings_get(p->phy, cmd); + phy_ethtool_ksettings_get(dev->phydev, cmd); return 0; } @@ -440,12 +436,10 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(p->phy, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static void dsa_slave_get_drvinfo(struct net_device *dev, @@ -479,24 +473,20 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) static int dsa_slave_nway_reset(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(p->phy); + return genphy_restart_aneg(dev->phydev); } static u32 dsa_slave_get_link(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - genphy_update_link(p->phy); + genphy_update_link(dev->phydev); - return p->phy->link; + return dev->phydev->link; } static int dsa_slave_get_eeprom_len(struct net_device *dev) @@ -631,7 +621,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!p->phy) + if (!dev->phydev) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -642,12 +632,12 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) return ret; if (e->eee_enabled) { - ret = phy_init_eee(p->phy, 0); + ret = phy_init_eee(dev->phydev, 0); if (ret) return ret; } - return phy_ethtool_set_eee(p->phy, e); + return phy_ethtool_set_eee(dev->phydev, e); } static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) @@ -657,7 +647,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!p->phy) + if (!dev->phydev) return -ENODEV; if (!ds->ops->get_mac_eee) @@ -667,7 +657,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) if (ret) return ret; - return phy_ethtool_get_eee(p->phy, e); + return phy_ethtool_get_eee(dev->phydev, e); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -976,26 +966,26 @@ static void dsa_slave_adjust_link(struct net_device *dev) struct dsa_switch *ds = p->dp->ds; unsigned int status_changed = 0; - if (p->old_link != p->phy->link) { + if (p->old_link != dev->phydev->link) { status_changed = 1; - p->old_link = p->phy->link; + p->old_link = dev->phydev->link; } - if (p->old_duplex != p->phy->duplex) { + if (p->old_duplex != dev->phydev->duplex) { status_changed = 1; - p->old_duplex = p->phy->duplex; + p->old_duplex = dev->phydev->duplex; } - if (p->old_pause != p->phy->pause) { + if (p->old_pause != dev->phydev->pause) { status_changed = 1; - p->old_pause = p->phy->pause; + p->old_pause = dev->phydev->pause; } if (ds->ops->adjust_link && status_changed) - ds->ops->adjust_link(ds, p->dp->index, p->phy); + ds->ops->adjust_link(ds, p->dp->index, dev->phydev); if (status_changed) - phy_print_status(p->phy); + phy_print_status(dev->phydev); } static int dsa_slave_fixed_link_update(struct net_device *dev, @@ -1020,17 +1010,18 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; - p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); - if (!p->phy) { + slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); + if (!slave_dev->phydev) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; } /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) - p->phy_interface = p->phy->interface; - return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); + p->phy_interface = slave_dev->phydev->interface; + + return phy_connect_direct(slave_dev, slave_dev->phydev, + dsa_slave_adjust_link, p->phy_interface); } static int dsa_slave_phy_setup(struct net_device *slave_dev) @@ -1082,22 +1073,23 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } } else { - p->phy = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, - phy_flags, - p->phy_interface); + slave_dev->phydev = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); } of_node_put(phy_dn); } - if (p->phy && phy_is_fixed) - fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); + if (slave_dev->phydev && phy_is_fixed) + fixed_phy_set_link_update(slave_dev->phydev, + dsa_slave_fixed_link_update); /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) { + if (!slave_dev->phydev) { ret = dsa_slave_phy_connect(slave_dev, p->dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", @@ -1108,7 +1100,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) } } - phy_attached_info(p->phy); + phy_attached_info(slave_dev->phydev); return 0; } @@ -1128,12 +1120,12 @@ int dsa_slave_suspend(struct net_device *slave_dev) netif_device_detach(slave_dev); - if (p->phy) { - phy_stop(p->phy); + if (slave_dev->phydev) { + phy_stop(slave_dev->phydev); p->old_pause = -1; p->old_link = -1; p->old_duplex = -1; - phy_suspend(p->phy); + phy_suspend(slave_dev->phydev); } return 0; @@ -1141,13 +1133,11 @@ int dsa_slave_suspend(struct net_device *slave_dev) int dsa_slave_resume(struct net_device *slave_dev) { - struct dsa_slave_priv *p = netdev_priv(slave_dev); - netif_device_attach(slave_dev); - if (p->phy) { - phy_resume(p->phy); - phy_start(p->phy); + if (slave_dev->phydev) { + phy_resume(slave_dev->phydev); + phy_start(slave_dev->phydev); } return 0; @@ -1240,8 +1230,8 @@ void dsa_slave_destroy(struct net_device *slave_dev) port_dn = p->dp->dn; netif_carrier_off(slave_dev); - if (p->phy) { - phy_disconnect(p->phy); + if (slave_dev->phydev) { + phy_disconnect(slave_dev->phydev); if (of_phy_is_fixed_link(port_dn)) of_phy_deregister_fixed_link(port_dn); From 771df31ace8a0264bcc3576d3f02660e3366cd6d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:33 -0400 Subject: [PATCH 0323/2177] net: dsa: use phy_ethtool_get_link_ksettings Use phy_ethtool_get_link_ksettings now that dsa_slave_get_link_ksettings does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4ea1c6eb0da8..bb0f64f47ae7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -420,17 +420,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) } /* ethtool operations *******************************************************/ -static int -dsa_slave_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - if (!dev->phydev) - return -ENODEV; - - phy_ethtool_ksettings_get(dev->phydev, cmd); - - return 0; -} static int dsa_slave_set_link_ksettings(struct net_device *dev, @@ -921,8 +910,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, - .get_link_ksettings = dsa_slave_get_link_ksettings, .set_link_ksettings = dsa_slave_set_link_ksettings, + .get_link_ksettings = phy_ethtool_get_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, }; From aa62a8ca85cd05b4af4d7be70ecb735f65b0449a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:34 -0400 Subject: [PATCH 0324/2177] net: dsa: use phy_ethtool_set_link_ksettings Use phy_ethtool_set_link_ksettings now that dsa_slave_set_link_ksettings does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bb0f64f47ae7..d2b632cae468 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -421,16 +421,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* ethtool operations *******************************************************/ -static int -dsa_slave_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - if (!dev->phydev) - return -ENODEV; - - return phy_ethtool_ksettings_set(dev->phydev, cmd); -} - static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -910,8 +900,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, - .set_link_ksettings = dsa_slave_set_link_ksettings, .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, }; From 69b2c1629649b1e765516c7b452797b9697de9ac Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:35 -0400 Subject: [PATCH 0325/2177] net: dsa: use phy_ethtool_nway_reset Use phy_ethtool_nway_reset now that dsa_slave_nway_reset does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d2b632cae468..bf8800de13c1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -450,14 +450,6 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) ds->ops->get_regs(ds, p->dp->index, regs, _p); } -static int dsa_slave_nway_reset(struct net_device *dev) -{ - if (!dev->phydev) - return -ENODEV; - - return genphy_restart_aneg(dev->phydev); -} - static u32 dsa_slave_get_link(struct net_device *dev) { if (!dev->phydev) @@ -888,7 +880,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, .get_regs = dsa_slave_get_regs, - .nway_reset = dsa_slave_nway_reset, + .nway_reset = phy_ethtool_nway_reset, .get_link = dsa_slave_get_link, .get_eeprom_len = dsa_slave_get_eeprom_len, .get_eeprom = dsa_slave_get_eeprom, From 2a52a8c6e594cdc562f503492ba89ac7bc0c4074 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:20 +0200 Subject: [PATCH 0326/2177] mlxsw: spectrum_acl: Propagate errors from mlxsw_afa_block_jump/continue Propagate error instead of doing WARN_ON right away. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 14 ++++++++------ .../mellanox/mlxsw/core_acl_flex_actions.h | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 6 +++--- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 10 +++++----- .../ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- .../net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 +++- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index ab3ffe7a8eda..bc55d0e76705 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_continue); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_jump); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 501819c790d6..06b0be432b8f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -57,8 +57,8 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e907ec446a73..9355d914a4c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -468,9 +468,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, enum mlxsw_afk_element element, const char *key_value, const char *mask_value, unsigned int len); -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id); +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index eede75fbd585..93dcd315f7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -378,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, key_value, mask_value, len); } -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) { - mlxsw_afa_block_continue(rulei->act_block); + return mlxsw_afa_block_continue(rulei->act_block); } -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id) +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) { - mlxsw_afa_block_jump(rulei->act_block, group_id); + return mlxsw_afa_block_jump(rulei->act_block, group_id); } int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 50b40de1fb91..7e8284b46968 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, goto err_rulei_create; } - mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); if (err) goto err_rulei_commit; @@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, err_rule_insert: err_rulei_commit: +err_rulei_act_continue: mlxsw_sp_acl_rulei_destroy(rulei); err_rulei_create: parman_item_remove(region->parman, parman_prio, parman_item); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d..f1cedccb58cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -84,7 +84,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(ruleset); group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); - mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + if (err) + return err; } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; From 3b8e9238a8d194e82f0202a5fb68a63686ebe420 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:21 +0200 Subject: [PATCH 0327/2177] net: sched: introduce helper to identify gact pass action Introduce a helper called is_tcf_gact_pass which could be used to tell if the action is gact pass or not. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 41afe1ce7b16..d979a0d48f9e 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -33,6 +33,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, return false; } +static inline bool is_tcf_gact_ok(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_OK, false); +} + static inline bool is_tcf_gact_shot(const struct tc_action *a) { return __is_tcf_gact_act(a, TC_ACT_SHOT, false); From b2925957ec1a9349c6ac42fc5ac95bcf0dd4a6a0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:22 +0200 Subject: [PATCH 0328/2177] mlxsw: spectrum_flower: Offload "ok" termination action If action is "gact_ok", offload it to HW. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index f1cedccb58cc..2f0e57857ea4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (err) + return err; + } else if (is_tcf_gact_shot(a)) { err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) return err; From 79ede4ae2d01b0282bfaaf761308a5ac485c8144 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:35 +0200 Subject: [PATCH 0329/2177] nfp: add helper to get flower cmsg length Add a helper function that returns the length of the cmsg data when given the cmsg skb Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 5 +++++ drivers/net/ethernet/netronome/nfp/flower/metadata.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index a2ec60344236..7a5ccf0cc7c2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -323,6 +323,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; } +static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb) +{ + return skb->len - NFP_FLOWER_CMSG_HLEN; +} + struct sk_buff * nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports); void diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 3226ddc55f99..193520ef23f0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -140,7 +140,7 @@ exit_rcu_unlock: void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) { - unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN; + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); struct nfp_fl_stats_frame *stats_frame; unsigned char *msg; int i; From 611aec101ab7c19755e8ea6d480f679aaffed5ad Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:36 +0200 Subject: [PATCH 0330/2177] nfp: compile flower vxlan tunnel metadata match fields Compile ovs-tc flower vxlan metadata match fields for offloading. Only support offload of tunnel data when the VXLAN port specifically matches well known port 4789. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.h | 38 ++++++++++ .../net/ethernet/netronome/nfp/flower/main.h | 2 + .../net/ethernet/netronome/nfp/flower/match.c | 60 ++++++++++++++-- .../ethernet/netronome/nfp/flower/offload.c | 70 ++++++++++++++++--- 4 files changed, 158 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7a5ccf0cc7c2..af9165b3b652 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -83,6 +83,14 @@ #define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN 0x50000000 + +enum nfp_flower_tun_type { + NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_VXLAN = 2, +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; @@ -230,6 +238,36 @@ struct nfp_flower_ipv6 { struct in6_addr ipv6_dst; }; +/* Flow Frame VXLAN --> Tunnel details (4W/16B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | gpe_flags | Reserved | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_vxlan { + __be32 ip_src; + __be32 ip_dst; + __be16 tun_flags; + u8 tos; + u8 ttl; + u8 gpe_flags; + u8 reserved[2]; + u8 nxt_proto; + __be32 tun_id; +}; + +#define NFP_FL_TUN_VNI_OFFSET 8 + /* The base header for a control message packet. * Defines an 8-bit version, and an 8-bit type, padded * to a 32-bit word. Rest of the packet is type-specific. diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c20dd00a1cae..cd695eabce02 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -58,6 +58,8 @@ struct nfp_app; #define NFP_FL_MASK_REUSE_TIME_NS 40000 #define NFP_FL_MASK_ID_LOCATION 1 +#define NFP_FL_VXLAN_PORT 4789 + struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; struct timespec64 *last_used; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index d25b5038c3a2..1fd1bab0611f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type) static int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, - bool mask_version) + bool mask_version, enum nfp_flower_tun_type tun_type) { if (mask_version) { frame->in_port = cpu_to_be32(~0); return 0; } - frame->in_port = cpu_to_be32(cmsg_port); + if (tun_type) + frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + else + frame->in_port = cpu_to_be32(cmsg_port); return 0; } @@ -189,15 +192,53 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, } } +static void +nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ipv4_addrs *vxlan_ips; + struct flow_dissector_key_keyid *vni; + + /* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */ + memset(frame, 0, sizeof(struct nfp_flower_vxlan)); + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID)) { + u32 temp_vni; + + vni = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target); + temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET; + frame->tun_id = cpu_to_be32(temp_vni); + } + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + vxlan_ips = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target); + frame->ip_src = vxlan_ips->src; + frame->ip_dst = vxlan_ips->dst; + } +} + int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; int err; u8 *ext; u8 *msk; + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) + tun_type = NFP_FL_TUNNEL_VXLAN; + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); memset(nfp_flow->mask_data, 0, key_ls->key_size); @@ -216,14 +257,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, /* Populate Exact Port data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, nfp_repr_get_port_id(netdev), - false); + false, tun_type); if (err) return err; /* Populate Mask Port Data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, nfp_repr_get_port_id(netdev), - true); + true, tun_type); if (err) return err; @@ -291,5 +332,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, msk += sizeof(struct nfp_flower_ipv6); } + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) { + /* Populate Exact VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext, + flow, false); + /* Populate Mask VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk, + flow, true); + ext += sizeof(struct nfp_flower_vxlan); + msk += sizeof(struct nfp_flower_vxlan); + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index a18b4d2b1d3e..637372ba8f55 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -52,8 +52,25 @@ BIT(FLOW_DISSECTOR_KEY_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + static int nfp_flower_xmit_flow(struct net_device *netdev, struct nfp_fl_payload *nfp_flow, u8 mtype) @@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; + /* If any tun dissector is used then the required set must be used. */ + if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + return -EOPNOTSUPP; + + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; + key_size = sizeof(struct nfp_flower_meta_one) + + sizeof(struct nfp_flower_in_port) + + sizeof(struct nfp_flower_mac_mpls); + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; + struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, flow->mask); - /* We are expecting a tunnel. For now we ignore offloading. */ - if (mask_enc_ctl->addr_type) + struct flow_dissector_key_control *enc_ctl = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + flow->key); + if (mask_enc_ctl->addr_type != 0xffff || + enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) return -EOPNOTSUPP; + + /* These fields are already verified as used. */ + mask_ipv4 = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + flow->mask); + if (mask_ipv4->dst != cpu_to_be32(~0)) + return -EOPNOTSUPP; + + mask_enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->mask); + enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->key); + + if (mask_enc_ports->dst != cpu_to_be16(~0) || + enc_ports->dst != htons(NFP_FL_VXLAN_PORT)) + return -EOPNOTSUPP; + + key_layer |= NFP_FLOWER_LAYER_VXLAN; + key_size += sizeof(struct nfp_flower_vxlan); } if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { @@ -151,12 +211,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, FLOW_DISSECTOR_KEY_IP, flow->mask); - key_layer_two = 0; - key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; - key_size = sizeof(struct nfp_flower_meta_one) + - sizeof(struct nfp_flower_in_port) + - sizeof(struct nfp_flower_mac_mpls); - if (mask_basic && mask_basic->n_proto) { /* Ethernet type is present in the key. */ switch (key_basic->n_proto) { From b27d6a95a70de551df828de2b658efd949a9864e Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:37 +0200 Subject: [PATCH 0331/2177] nfp: compile flower vxlan tunnel set actions Compile set tunnel actions for tc flower. Only support VXLAN and ensure a tunnel destination port of 4789 is used. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 171 ++++++++++++++++-- .../net/ethernet/netronome/nfp/flower/cmsg.h | 31 +++- 2 files changed, 180 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index db9750695dc7..38f3835ae176 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "cmsg.h" #include "main.h" @@ -80,14 +81,27 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } +static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev, + enum nfp_flower_tun_type tun_type) +{ + if (!out_dev->rtnl_link_ops) + return false; + + if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan")) + return tun_type == NFP_FL_TUNNEL_VXLAN; + + return false; +} + static int nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, struct nfp_fl_payload *nfp_flow, bool last, - struct net_device *in_dev) + struct net_device *in_dev, enum nfp_flower_tun_type tun_type, + int *tun_out_cnt) { size_t act_size = sizeof(struct nfp_fl_output); + u16 tmp_output_op, tmp_flags; struct net_device *out_dev; - u16 tmp_output_op; int ifindex; /* Set action opcode to output action. */ @@ -97,34 +111,126 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, output->a_op = cpu_to_be16(tmp_output_op); - /* Set action output parameters. */ - output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0); - ifindex = tcf_mirred_ifindex(action); out_dev = __dev_get_by_index(dev_net(in_dev), ifindex); if (!out_dev) return -EOPNOTSUPP; - /* Only offload egress ports are on the same device as the ingress - * port. - */ - if (!switchdev_port_same_parent_id(in_dev, out_dev)) - return -EOPNOTSUPP; + tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; - output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); - if (!output->port) - return -EOPNOTSUPP; + if (tun_type) { + /* Verify the egress netdev matches the tunnel type. */ + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) + return -EOPNOTSUPP; + if (*tun_out_cnt) + return -EOPNOTSUPP; + (*tun_out_cnt)++; + + output->flags = cpu_to_be16(tmp_flags | + NFP_FL_OUT_FLAGS_USE_TUN); + output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else { + /* Set action output parameters. */ + output->flags = cpu_to_be16(tmp_flags); + + /* Only offload if egress ports are on the same device as the + * ingress port. + */ + if (!switchdev_port_same_parent_id(in_dev, out_dev)) + return -EOPNOTSUPP; + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) + return -EOPNOTSUPP; + } nfp_flow->meta.shortcut = output->port; return 0; } +static bool nfp_fl_supported_tun_port(const struct tc_action *action) +{ + struct ip_tunnel_info *tun = tcf_tunnel_info(action); + + return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT); +} + +static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_tunnel); + struct nfp_fl_pre_tunnel *pre_tun_act; + u16 tmp_pre_tun_op; + + /* Pre_tunnel action must be first on action list. + * If other actions already exist they need pushed forward. + */ + if (act_len) + memmove(act_data + act_size, act_data, act_len); + + pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data; + + memset(pre_tun_act, 0, act_size); + + tmp_pre_tun_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PRE_TUNNEL); + + pre_tun_act->a_op = cpu_to_be16(tmp_pre_tun_op); + + return pre_tun_act; +} + +static int +nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, + const struct tc_action *action, + struct nfp_fl_pre_tunnel *pre_tun) +{ + struct ip_tunnel_info *vxlan = tcf_tunnel_info(action); + size_t act_size = sizeof(struct nfp_fl_set_vxlan); + u32 tmp_set_vxlan_type_index = 0; + u16 tmp_set_vxlan_op; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; + + if (vxlan->options_len) { + /* Do not support options e.g. vxlan gpe. */ + return -EOPNOTSUPP; + } + + tmp_set_vxlan_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, + NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL); + + set_vxlan->a_op = cpu_to_be16(tmp_set_vxlan_op); + + /* Set tunnel type and pre-tunnel index. */ + tmp_set_vxlan_type_index |= + FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) | + FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + + set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index); + + set_vxlan->tun_id = vxlan->key.tun_id; + set_vxlan->tun_flags = vxlan->key.tun_flags; + set_vxlan->ipv4_ttl = vxlan->key.ttl; + set_vxlan->ipv4_tos = vxlan->key.tos; + + /* Complete pre_tunnel action. */ + pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst; + + return 0; +} + static int nfp_flower_loop_action(const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, - struct net_device *netdev) + struct net_device *netdev, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt) { + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_vxlan *s_vxl; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_pop_vlan *pop_v; struct nfp_fl_output *output; @@ -137,7 +243,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, true, netdev); + err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -147,7 +254,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, false, netdev); + err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -170,6 +278,29 @@ nfp_flower_loop_action(const struct tc_action *a, nfp_fl_push_vlan(psh_v, a); *a_len += sizeof(struct nfp_fl_push_vlan); + } else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) { + /* Pre-tunnel action is required for tunnel encap. + * This checks for next hop entries on NFP. + * If none, the packet falls back before applying other actions. + */ + if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + + sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + *tun_type = NFP_FL_TUNNEL_VXLAN; + pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_set_vxlan(s_vxl, a, pre_tun); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_set_vxlan); + } else if (is_tcf_tunnel_release(a)) { + /* Tunnel decap is handled by default so accept action. */ + return 0; } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; @@ -182,18 +313,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err; + int act_len, act_cnt, err, tun_out_cnt; + enum nfp_flower_tun_type tun_type; const struct tc_action *a; LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; + tun_type = NFP_FL_TUNNEL_NONE; act_len = 0; act_cnt = 0; + tun_out_cnt = 0; tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev); + err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev, + &tun_type, &tun_out_cnt); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index af9165b3b652..ff42ce8a1e9c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -67,10 +67,12 @@ #define NFP_FL_LW_SIZ 2 /* Action opcodes */ -#define NFP_FL_ACTION_OPCODE_OUTPUT 0 -#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 -#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 -#define NFP_FL_ACTION_OPCODE_NUM 32 +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_ACT_JMP_ID GENMASK(15, 8) #define NFP_FL_ACT_LEN_LW GENMASK(7, 0) @@ -85,6 +87,8 @@ /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 +#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) enum nfp_flower_tun_type { NFP_FL_TUNNEL_NONE = 0, @@ -123,6 +127,25 @@ struct nfp_flower_meta_one { u16 reserved; }; +struct nfp_fl_pre_tunnel { + __be16 a_op; + __be16 reserved; + __be32 ipv4_dst; + /* reserved for use with IPv6 addresses */ + __be32 extra[3]; +}; + +struct nfp_fl_set_vxlan { + __be16 a_op; + __be16 reserved; + __be64 tun_id; + __be32 tun_type_index; + __be16 tun_flags; + u8 ipv4_ttl; + u8 ipv4_tos; + __be32 extra[2]; +} __packed; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 From fd0dd1ab1e107369c950796bb9b0e8eab6134bf1 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:38 +0200 Subject: [PATCH 0332/2177] nfp: offload flower vxlan endpoint MAC addresses Generate a list of MAC addresses of netdevs that could be used as VXLAN tunnel end points. Give offloaded MACs an index for storage on the NFP in the ranges: 0x100-0x1ff physical port representors 0x200-0x2ff VF port representors 0x300-0x3ff other offloads (e.g. vxlan netdevs, ovs bridges) Assign phys and vf indexes based on unique 8 bit values in the port num. Maintain list of other netdevs to ensure same netdev is not offloaded twice and each gets a unique ID without exhausting the entries. Because the IDs are unique but constant for a netdev, any changes are implemented by overwriting the index on NFP. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 3 +- .../net/ethernet/netronome/nfp/flower/cmsg.c | 7 - .../net/ethernet/netronome/nfp/flower/cmsg.h | 9 + .../net/ethernet/netronome/nfp/flower/main.c | 13 + .../net/ethernet/netronome/nfp/flower/main.h | 18 + .../net/ethernet/netronome/nfp/flower/match.c | 7 + .../netronome/nfp/flower/tunnel_conf.c | 374 ++++++++++++++++++ 7 files changed, 423 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 96e579a15cbe..becaacf1554d 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -37,7 +37,8 @@ nfp-objs += \ flower/main.o \ flower/match.o \ flower/metadata.o \ - flower/offload.o + flower/offload.o \ + flower/tunnel_conf.o endif ifeq ($(CONFIG_BPF_SYSCALL),y) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index c3ca05d10fe1..b756006dba6f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -38,17 +38,10 @@ #include #include "main.h" -#include "../nfpcore/nfp_cpp.h" #include "../nfp_net.h" #include "../nfp_net_repr.h" #include "./cmsg.h" -#define nfp_flower_cmsg_warn(app, fmt, args...) \ - do { \ - if (net_ratelimit()) \ - nfp_warn((app)->cpp, fmt, ## args); \ - } while (0) - static struct nfp_flower_cmsg_hdr * nfp_flower_cmsg_get_hdr(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index ff42ce8a1e9c..dc248193c996 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -39,6 +39,7 @@ #include #include "../nfp_app.h" +#include "../nfpcore/nfp_cpp.h" #define NFP_FLOWER_LAYER_META BIT(0) #define NFP_FLOWER_LAYER_PORT BIT(1) @@ -90,6 +91,12 @@ #define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) #define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define nfp_flower_cmsg_warn(app, fmt, args...) \ + do { \ + if (net_ratelimit()) \ + nfp_warn((app)->cpp, fmt, ## args); \ + } while (0) + enum nfp_flower_tun_type { NFP_FL_TUNNEL_NONE = 0, NFP_FL_TUNNEL_VXLAN = 2, @@ -310,6 +317,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, @@ -343,6 +351,7 @@ enum nfp_flower_cmsg_port_type { NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0, NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1, NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3, }; enum nfp_flower_cmsg_port_vnic_type { diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 91fe03617106..e46e7c60d491 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -436,6 +436,16 @@ static void nfp_flower_clean(struct nfp_app *app) app->priv = NULL; } +static int nfp_flower_start(struct nfp_app *app) +{ + return nfp_tunnel_config_start(app); +} + +static void nfp_flower_stop(struct nfp_app *app) +{ + nfp_tunnel_config_stop(app); +} + const struct nfp_app_type app_flower = { .id = NFP_APP_FLOWER_NIC, .name = "flower", @@ -453,6 +463,9 @@ const struct nfp_app_type app_flower = { .repr_open = nfp_flower_repr_netdev_open, .repr_stop = nfp_flower_repr_netdev_stop, + .start = nfp_flower_start, + .stop = nfp_flower_stop, + .ctrl_msg_rx = nfp_flower_cmsg_rx, .sriov_enable = nfp_flower_sriov_enable, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index cd695eabce02..9de375acc254 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -84,6 +84,13 @@ struct nfp_fl_stats_id { * @flow_table: Hash table used to store flower rules * @cmsg_work: Workqueue for control messages processing * @cmsg_skbs: List of skbs for control message processing + * @nfp_mac_off_list: List of MAC addresses to offload + * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs + * @nfp_mac_off_lock: Lock for the MAC address list + * @nfp_mac_index_lock: Lock for the MAC index list + * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs + * @nfp_mac_off_count: Number of MACs in address list + * @nfp_tun_mac_nb: Notifier to monitor link state */ struct nfp_flower_priv { struct nfp_app *app; @@ -96,6 +103,13 @@ struct nfp_flower_priv { DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS); struct work_struct cmsg_work; struct sk_buff_head cmsg_skbs; + struct list_head nfp_mac_off_list; + struct list_head nfp_mac_index_list; + struct mutex nfp_mac_off_lock; + struct mutex nfp_mac_index_lock; + struct ida nfp_mac_off_ids; + int nfp_mac_off_count; + struct notifier_block nfp_tun_mac_nb; }; struct nfp_fl_key_ls { @@ -165,4 +179,8 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); +int nfp_tunnel_config_start(struct nfp_app *app); +void nfp_tunnel_config_stop(struct nfp_app *app); +void nfp_tunnel_write_macs(struct nfp_app *app); + #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 1fd1bab0611f..cb3ff6c126e8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -232,6 +232,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_repr *netdev_repr; int err; u8 *ext; u8 *msk; @@ -341,6 +342,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, flow, true); ext += sizeof(struct nfp_flower_vxlan); msk += sizeof(struct nfp_flower_vxlan); + + /* Configure tunnel end point MAC. */ + if (nfp_netdev_is_nfp_repr(netdev)) { + netdev_repr = netdev_priv(netdev); + nfp_tunnel_write_macs(netdev_repr->app); + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c new file mode 100644 index 000000000000..34be85803020 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -0,0 +1,374 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" +#include "../nfp_net.h" + +/** + * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP + * @reserved: reserved for future use + * @count: number of MAC addresses in the message + * @index: index of MAC address in the lookup table + * @addr: interface MAC address + * @addresses: series of MACs to offload + */ +struct nfp_tun_mac_addr { + __be16 reserved; + __be16 count; + struct index_mac_addr { + __be16 index; + u8 addr[ETH_ALEN]; + } addresses[]; +}; + +/** + * struct nfp_tun_mac_offload_entry - list of MACs to offload + * @index: index of MAC address for offloading + * @addr: interface MAC address + * @list: list pointer + */ +struct nfp_tun_mac_offload_entry { + __be16 index; + u8 addr[ETH_ALEN]; + struct list_head list; +}; + +#define NFP_MAX_MAC_INDEX 0xff + +/** + * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id + * @ifindex: netdev ifindex of the device + * @index: index of netdevs mac on NFP + * @list: list pointer + */ +struct nfp_tun_mac_non_nfp_idx { + int ifindex; + u8 index; + struct list_head list; +}; + +static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) +{ + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan")) + return true; + + return false; +} + +static int +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) +{ + struct sk_buff *skb; + unsigned char *msg; + + skb = nfp_flower_cmsg_alloc(app, plen, mtype); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb)); + + nfp_ctrl_tx(app->ctrl, skb); + return 0; +} + +void nfp_tunnel_write_macs(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + struct nfp_tun_mac_addr *payload; + struct list_head *ptr, *storage; + int mac_count, err, pay_size; + + mutex_lock(&priv->nfp_mac_off_lock); + if (!priv->nfp_mac_off_count) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + pay_size = sizeof(struct nfp_tun_mac_addr) + + sizeof(struct index_mac_addr) * priv->nfp_mac_off_count; + + payload = kzalloc(pay_size, GFP_KERNEL); + if (!payload) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + payload->count = cpu_to_be16(priv->nfp_mac_off_count); + + mac_count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + payload->addresses[mac_count].index = entry->index; + ether_addr_copy(payload->addresses[mac_count].addr, + entry->addr); + mac_count++; + } + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + pay_size, payload); + + kfree(payload); + + if (err) { + mutex_unlock(&priv->nfp_mac_off_lock); + /* Write failed so retain list for future retry. */ + return; + } + + /* If list was successfully offloaded, flush it. */ + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&entry->list); + kfree(entry); + } + + priv->nfp_mac_off_count = 0; + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + int idx; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + idx = entry->index; + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + } + + idx = ida_simple_get(&priv->nfp_mac_off_ids, 0, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (idx < 0) { + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_mac_index_lock); + return -ENOMEM; + } + entry->ifindex = ifindex; + entry->index = idx; + list_add_tail(&entry->list, &priv->nfp_mac_index_list); + mutex_unlock(&priv->nfp_mac_index_lock); + + return idx; +} + +static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + ida_simple_remove(&priv->nfp_mac_off_ids, + entry->index); + list_del(&entry->list); + kfree(entry); + break; + } + } + mutex_unlock(&priv->nfp_mac_index_lock); +} + +static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev, + struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + u16 nfp_mac_idx; + int port = 0; + + /* Check if MAC should be offloaded. */ + if (!is_valid_ether_addr(netdev->dev_addr)) + return; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_repr_get_port_id(netdev); + else if (!nfp_tun_is_netdev_to_offload(netdev)) + return; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n"); + return; + } + + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) { + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; + } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) { + port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port); + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT; + } else { + /* Must assign our own unique 8-bit index. */ + int idx = nfp_tun_get_mac_idx(app, netdev->ifindex); + + if (idx < 0) { + nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n"); + kfree(entry); + return; + } + nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; + } + + entry->index = cpu_to_be16(nfp_mac_idx); + ether_addr_copy(entry->addr, netdev->dev_addr); + + mutex_lock(&priv->nfp_mac_off_lock); + priv->nfp_mac_off_count++; + list_add_tail(&entry->list, &priv->nfp_mac_off_list); + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_mac_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct net_device *netdev; + struct nfp_app *app; + + if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + /* If non-nfp netdev then free its offload index. */ + if (nfp_tun_is_netdev_to_offload(netdev)) + nfp_tun_del_mac_idx(app, netdev->ifindex); + } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR || + event == NETDEV_REGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + nfp_tun_add_to_mac_offload_list(netdev, app); + + /* Force a list write to keep NFP up to date. */ + nfp_tunnel_write_macs(app); + } + return NOTIFY_OK; +} + +int nfp_tunnel_config_start(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct net_device *netdev; + int err; + + /* Initialise priv data for MAC offloading. */ + priv->nfp_mac_off_count = 0; + mutex_init(&priv->nfp_mac_off_lock); + INIT_LIST_HEAD(&priv->nfp_mac_off_list); + priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler; + mutex_init(&priv->nfp_mac_index_lock); + INIT_LIST_HEAD(&priv->nfp_mac_index_list); + ida_init(&priv->nfp_mac_off_ids); + + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); + if (err) + goto err_free_mac_ida; + + /* Parse netdevs already registered for MACs that need offloaded. */ + rtnl_lock(); + for_each_netdev(&init_net, netdev) + nfp_tun_add_to_mac_offload_list(netdev, app); + rtnl_unlock(); + + return 0; + +err_free_mac_ida: + ida_destroy(&priv->nfp_mac_off_ids); + return err; +} + +void nfp_tunnel_config_stop(struct nfp_app *app) +{ + struct nfp_tun_mac_offload_entry *mac_entry; + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *mac_idx; + struct list_head *ptr, *storage; + + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); + + /* Free any memory that may be occupied by MAC list. */ + mutex_lock(&priv->nfp_mac_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&mac_entry->list); + kfree(mac_entry); + } + mutex_unlock(&priv->nfp_mac_off_lock); + + /* Free any memory that may be occupied by MAC index list. */ + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, + list); + list_del(&mac_idx->list); + kfree(mac_idx); + } + mutex_unlock(&priv->nfp_mac_index_lock); + + ida_destroy(&priv->nfp_mac_off_ids); +} From 2d9ad71a8ce67eea9ee38512a215e1893bd5cf87 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:39 +0200 Subject: [PATCH 0333/2177] nfp: offload vxlan IPv4 endpoints of flower rules Maintain a list of IPv4 addresses used as the tunnel destination IP match fields in currently active flower rules. Offload the entire list of NFP_FL_IPV4_ADDRS_MAX (even if some are unused) when new IPs are added or removed. The NFP should only be aware of tunnel end points that are currently used by rules on the device Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.h | 1 + .../net/ethernet/netronome/nfp/flower/main.h | 7 + .../net/ethernet/netronome/nfp/flower/match.c | 14 +- .../ethernet/netronome/nfp/flower/offload.c | 4 + .../netronome/nfp/flower/tunnel_conf.c | 120 ++++++++++++++++++ 5 files changed, 143 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index dc248193c996..6540bb1ceefb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -318,6 +318,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 9de375acc254..53306af6cfe8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -86,8 +86,10 @@ struct nfp_fl_stats_id { * @cmsg_skbs: List of skbs for control message processing * @nfp_mac_off_list: List of MAC addresses to offload * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs + * @nfp_ipv4_off_list: List of IPv4 addresses to offload * @nfp_mac_off_lock: Lock for the MAC address list * @nfp_mac_index_lock: Lock for the MAC index list + * @nfp_ipv4_off_lock: Lock for the IPv4 address list * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs * @nfp_mac_off_count: Number of MACs in address list * @nfp_tun_mac_nb: Notifier to monitor link state @@ -105,8 +107,10 @@ struct nfp_flower_priv { struct sk_buff_head cmsg_skbs; struct list_head nfp_mac_off_list; struct list_head nfp_mac_index_list; + struct list_head nfp_ipv4_off_list; struct mutex nfp_mac_off_lock; struct mutex nfp_mac_index_lock; + struct mutex nfp_ipv4_off_lock; struct ida nfp_mac_off_ids; int nfp_mac_off_count; struct notifier_block nfp_tun_mac_nb; @@ -142,6 +146,7 @@ struct nfp_fl_payload { struct rcu_head rcu; spinlock_t lock; /* lock stats */ struct nfp_fl_stats stats; + __be32 nfp_tun_ipv4_addr; char *unmasked_data; char *mask_data; char *action_data; @@ -182,5 +187,7 @@ void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); int nfp_tunnel_config_start(struct nfp_app *app); void nfp_tunnel_config_stop(struct nfp_app *app); void nfp_tunnel_write_macs(struct nfp_app *app); +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index cb3ff6c126e8..865a815ab92a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -195,7 +195,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, static void nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, struct tc_cls_flower_offload *flow, - bool mask_version) + bool mask_version, __be32 *tun_dst) { struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *vxlan_ips; @@ -223,6 +223,7 @@ nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, target); frame->ip_src = vxlan_ips->src; frame->ip_dst = vxlan_ips->dst; + *tun_dst = vxlan_ips->dst; } } @@ -232,6 +233,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + __be32 tun_dst, tun_dst_mask = 0; struct nfp_repr *netdev_repr; int err; u8 *ext; @@ -336,10 +338,10 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) { /* Populate Exact VXLAN Data. */ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext, - flow, false); + flow, false, &tun_dst); /* Populate Mask VXLAN Data. */ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk, - flow, true); + flow, true, &tun_dst_mask); ext += sizeof(struct nfp_flower_vxlan); msk += sizeof(struct nfp_flower_vxlan); @@ -347,6 +349,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, if (nfp_netdev_is_nfp_repr(netdev)) { netdev_repr = netdev_priv(netdev); nfp_tunnel_write_macs(netdev_repr->app); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = tun_dst; + nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 637372ba8f55..3d9537ebdea4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -306,6 +306,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) if (!flow_pay->action_data) goto err_free_mask; + flow_pay->nfp_tun_ipv4_addr = 0; flow_pay->meta.flags = 0; spin_lock_init(&flow_pay->lock); @@ -415,6 +416,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_free_flow; + if (nfp_flow->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + err = nfp_flower_xmit_flow(netdev, nfp_flow, NFP_FLOWER_CMSG_TYPE_FLOW_DEL); if (err) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 34be85803020..185505140f5e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -32,6 +32,7 @@ */ #include +#include #include #include @@ -40,6 +41,30 @@ #include "../nfp_net_repr.h" #include "../nfp_net.h" +#define NFP_FL_IPV4_ADDRS_MAX 32 + +/** + * struct nfp_tun_ipv4_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses + */ +struct nfp_tun_ipv4_addr { + __be32 count; + __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX]; +}; + +/** + * struct nfp_ipv4_addr_entry - cached IPv4 addresses + * @ipv4_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv4_addr_entry { + __be32 ipv4_addr; + int ref_count; + struct list_head list; +}; + /** * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP * @reserved: reserved for future use @@ -112,6 +137,87 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) return 0; } +static void nfp_tun_write_ipv4_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct nfp_tun_ipv4_addr payload; + struct list_head *ptr, *storage; + int count; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); + mutex_lock(&priv->nfp_ipv4_off_lock); + count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + if (count >= NFP_FL_IPV4_ADDRS_MAX) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); + return; + } + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + payload.ipv4_addr[count++] = entry->ipv4_addr; + } + payload.count = cpu_to_be32(count); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, + sizeof(struct nfp_tun_ipv4_addr), + &payload); +} + +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count++; + mutex_unlock(&priv->nfp_ipv4_off_lock); + return; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return; + } + entry->ipv4_addr = ipv4; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->nfp_ipv4_off_list); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count--; + if (!entry->ref_count) { + list_del(&entry->list); + kfree(entry); + } + break; + } + } + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + void nfp_tunnel_write_macs(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -324,6 +430,10 @@ int nfp_tunnel_config_start(struct nfp_app *app) INIT_LIST_HEAD(&priv->nfp_mac_index_list); ida_init(&priv->nfp_mac_off_ids); + /* Initialise priv data for IPv4 offloading. */ + mutex_init(&priv->nfp_ipv4_off_lock); + INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); if (err) goto err_free_mac_ida; @@ -346,6 +456,7 @@ void nfp_tunnel_config_stop(struct nfp_app *app) struct nfp_tun_mac_offload_entry *mac_entry; struct nfp_flower_priv *priv = app->priv; struct nfp_tun_mac_non_nfp_idx *mac_idx; + struct nfp_ipv4_addr_entry *ip_entry; struct list_head *ptr, *storage; unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); @@ -371,4 +482,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app) mutex_unlock(&priv->nfp_mac_index_lock); ida_destroy(&priv->nfp_mac_off_ids); + + /* Free any memory that may be occupied by ipv4 list. */ + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + list_del(&ip_entry->list); + kfree(ip_entry); + } + mutex_unlock(&priv->nfp_ipv4_off_lock); } From 8e6a9046b66a7dfb11ae8be226afaaf417649411 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:40 +0200 Subject: [PATCH 0334/2177] nfp: flower vxlan neighbour offload Receive a request when the NFP does not know the next hop for a packet that is to be encapsulated in a VXLAN tunnel. Do a route lookup, determine the next hop entry and update neighbour table on NFP. Monitor the kernel neighbour table for link changes and update NFP with relevant information. Overwrite routes with zero values on the NFP when they expire. Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.c | 6 + .../net/ethernet/netronome/nfp/flower/cmsg.h | 2 + .../net/ethernet/netronome/nfp/flower/main.h | 7 + .../netronome/nfp/flower/tunnel_conf.c | 253 ++++++++++++++++++ 4 files changed, 268 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index b756006dba6f..862787daaa68 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -181,6 +181,12 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_FLOW_STATS: nfp_flower_rx_flow_stats(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: + nfp_tunnel_request_route(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: + /* Acks from the NFP that the route is added - ignore. */ + break; default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 6540bb1ceefb..1dc72a1ed577 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -317,7 +317,9 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 53306af6cfe8..93ad969c3653 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -87,12 +87,15 @@ struct nfp_fl_stats_id { * @nfp_mac_off_list: List of MAC addresses to offload * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs * @nfp_ipv4_off_list: List of IPv4 addresses to offload + * @nfp_neigh_off_list: List of neighbour offloads * @nfp_mac_off_lock: Lock for the MAC address list * @nfp_mac_index_lock: Lock for the MAC index list * @nfp_ipv4_off_lock: Lock for the IPv4 address list + * @nfp_neigh_off_lock: Lock for the neighbour address list * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs * @nfp_mac_off_count: Number of MACs in address list * @nfp_tun_mac_nb: Notifier to monitor link state + * @nfp_tun_neigh_nb: Notifier to monitor neighbour state */ struct nfp_flower_priv { struct nfp_app *app; @@ -108,12 +111,15 @@ struct nfp_flower_priv { struct list_head nfp_mac_off_list; struct list_head nfp_mac_index_list; struct list_head nfp_ipv4_off_list; + struct list_head nfp_neigh_off_list; struct mutex nfp_mac_off_lock; struct mutex nfp_mac_index_lock; struct mutex nfp_ipv4_off_lock; + struct mutex nfp_neigh_off_lock; struct ida nfp_mac_off_ids; int nfp_mac_off_count; struct notifier_block nfp_tun_mac_nb; + struct notifier_block nfp_tun_neigh_nb; }; struct nfp_fl_key_ls { @@ -189,5 +195,6 @@ void nfp_tunnel_config_stop(struct nfp_app *app); void nfp_tunnel_write_macs(struct nfp_app *app); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 185505140f5e..8c6b88a1306b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -41,6 +42,44 @@ #include "../nfp_net_repr.h" #include "../nfp_net.h" +/** + * struct nfp_tun_neigh - neighbour/route entry on the NFP + * @dst_ipv4: destination IPv4 address + * @src_ipv4: source IPv4 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + __be32 dst_ipv4; + __be32 src_ipv4; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv4_addr: destination ipv4 address for route + * @reserved: reserved for future use + */ +struct nfp_tun_req_route_ipv4 { + __be32 ingress_port; + __be32 ipv4_addr; + __be32 reserved[2]; +}; + +/** + * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP + * @ipv4_addr: destination of route + * @list: list pointer + */ +struct nfp_ipv4_route_entry { + __be32 ipv4_addr; + struct list_head list; +}; + #define NFP_FL_IPV4_ADDRS_MAX 32 /** @@ -137,6 +176,197 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) return 0; } +static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + mutex_unlock(&priv->nfp_neigh_off_lock); + return true; + } + } + mutex_unlock(&priv->nfp_neigh_off_lock); + return false; +} + +static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + mutex_unlock(&priv->nfp_neigh_off_lock); + return; + } + } + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_neigh_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); + return; + } + + entry->ipv4_addr = ipv4_addr; + list_add_tail(&entry->list, &priv->nfp_neigh_off_list); + mutex_unlock(&priv->nfp_neigh_off_lock); +} + +static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + list_del(&entry->list); + kfree(entry); + break; + } + } + mutex_unlock(&priv->nfp_neigh_off_lock); +} + +static void +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh) +{ + struct nfp_tun_neigh payload; + + /* Only offload representor IPv4s for now. */ + if (!nfp_netdev_is_nfp_repr(netdev)) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh)); + payload.dst_ipv4 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID)) { + nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + /* Trigger ARP to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv4 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev)); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&payload); +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct flowi4 flow = {}; + struct neighbour *n; + struct nfp_app *app; + struct rtable *rt; + int err; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } + + flow.daddr = *(__be32 *)n->primary_key; + + /* Only concerned with route changes for representors. */ + if (!nfp_netdev_is_nfp_repr(n->dev)) + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb); + app = app_priv->app; + + /* Only concerned with changes to routes already added to NFP. */ + if (!nfp_tun_has_route(app, flow.daddr)) + return NOTIFY_DONE; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; +#else + return NOTIFY_DONE; +#endif + + flow.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh(n->dev, app, &flow, n); + + return NOTIFY_OK; +} + +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv4 *payload; + struct net_device *netdev; + struct flowi4 flow = {}; + struct neighbour *n; + struct rtable *rt; + int err; + + payload = nfp_flower_cmsg_get_data(skb); + + netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port)); + if (!netdev) + goto route_fail_warning; + + flow.daddr = payload->ipv4_addr; + flow.flowi4_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup on same namespace as ingress port. */ + rt = ip_route_output_key(dev_net(netdev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto route_fail_warning; +#else + goto route_fail_warning; +#endif + + /* Get the neighbour entry for the lookup */ + n = dst_neigh_lookup(&rt->dst, &flow.daddr); + ip_rt_put(rt); + if (!n) + goto route_fail_warning; + nfp_tun_write_neigh(n->dev, app, &flow, n); + neigh_release(n); + return; + +route_fail_warning: + nfp_flower_cmsg_warn(app, "Requested route not found.\n"); +} + static void nfp_tun_write_ipv4_list(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -434,10 +664,19 @@ int nfp_tunnel_config_start(struct nfp_app *app) mutex_init(&priv->nfp_ipv4_off_lock); INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + /* Initialise priv data for neighbour offloading. */ + mutex_init(&priv->nfp_neigh_off_lock); + INIT_LIST_HEAD(&priv->nfp_neigh_off_list); + priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); if (err) goto err_free_mac_ida; + err = register_netevent_notifier(&priv->nfp_tun_neigh_nb); + if (err) + goto err_unreg_mac_nb; + /* Parse netdevs already registered for MACs that need offloaded. */ rtnl_lock(); for_each_netdev(&init_net, netdev) @@ -446,6 +685,8 @@ int nfp_tunnel_config_start(struct nfp_app *app) return 0; +err_unreg_mac_nb: + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); err_free_mac_ida: ida_destroy(&priv->nfp_mac_off_ids); return err; @@ -455,11 +696,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app) { struct nfp_tun_mac_offload_entry *mac_entry; struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *route_entry; struct nfp_tun_mac_non_nfp_idx *mac_idx; struct nfp_ipv4_addr_entry *ip_entry; struct list_head *ptr, *storage; unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); + unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); /* Free any memory that may be occupied by MAC list. */ mutex_lock(&priv->nfp_mac_off_lock); @@ -491,4 +734,14 @@ void nfp_tunnel_config_stop(struct nfp_app *app) kfree(ip_entry); } mutex_unlock(&priv->nfp_ipv4_off_lock); + + /* Free any memory that may be occupied by the route list. */ + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, + list); + list_del(&route_entry->list); + kfree(route_entry); + } + mutex_unlock(&priv->nfp_neigh_off_lock); } From 856f5b135758ad80053a49f7ce9d1dc0166e3006 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:41 +0200 Subject: [PATCH 0335/2177] nfp: flower vxlan neighbour keep-alive Periodically receive messages containing the destination IPs of tunnels that have recently forwarded traffic. Update the neighbour entries 'used' value for these IPs next hop. This prevents the neighbour entry from expiring on timeout but rather signals an ARP to verify the connection. From an NFP perspective, packets will not fall back mid-flow unless the link is verified to be down. Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.c | 3 + .../net/ethernet/netronome/nfp/flower/cmsg.h | 1 + .../net/ethernet/netronome/nfp/flower/main.h | 1 + .../netronome/nfp/flower/tunnel_conf.c | 64 +++++++++++++++++++ 4 files changed, 69 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 862787daaa68..6b71c719deba 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -184,6 +184,9 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: nfp_tunnel_request_route(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: + nfp_tunnel_keep_alive(app, skb); + break; case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: /* Acks from the NFP that the route is added - ignore. */ break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 1dc72a1ed577..504ddaa21701 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -319,6 +319,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 93ad969c3653..12c319a219d8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -196,5 +196,6 @@ void nfp_tunnel_write_macs(struct nfp_app *app); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 8c6b88a1306b..c495f8f38506 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -36,12 +36,36 @@ #include #include #include +#include #include "cmsg.h" #include "main.h" #include "../nfp_net_repr.h" #include "../nfp_net.h" +#define NFP_FL_MAX_ROUTES 32 + +/** + * struct nfp_tun_active_tuns - periodic message of active tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @ipv4: dest IPv4 address of active route + * @egress_port: port the encapsulated packet egressed + * @extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info { + __be32 ipv4; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + /** * struct nfp_tun_neigh - neighbour/route entry on the NFP * @dst_ipv4: destination IPv4 address @@ -147,6 +171,46 @@ struct nfp_tun_mac_non_nfp_idx { struct list_head list; }; +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_active_tuns *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + __be32 ipv4_addr; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_MAX_ROUTES) { + nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != sizeof(struct nfp_tun_active_tuns) + + sizeof(struct route_ip_info) * count) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + for (i = 0; i < count; i++) { + ipv4_addr = payload->tun_info[i].ipv4; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_repr_get(app, port); + if (!netdev) + continue; + + n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } +} + static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) { if (!netdev->rtnl_link_ops) From 85e482285bbbd508483cbe08de69c8fe00cdbbfe Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:11 +0200 Subject: [PATCH 0336/2177] fib: notifier: Add VIF add and delete event types In order for an interface to forward packets according to the kernel multicast routing table, it must be configured with a VIF index according to the mroute user API. The VIF index is then used to refer to that interface in the mroute user API, for example, to set the iif and oifs of an MFC entry. In order to allow drivers to be aware and offload multicast routes, they have to be aware of the VIF add and delete notifications. Due to the fact that a specific VIF can be deleted and re-added pointing to another netdevice, and the MFC routes that point to it will forward the matching packets to the new netdevice, a driver willing to offload MFC cache entries must be aware of the VIF add and delete events in addition to MFC routes notifications. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 669b9716dc7a..54cd6b839d2f 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -20,6 +20,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, FIB_EVENT_NH_ADD, FIB_EVENT_NH_DEL, + FIB_EVENT_VIF_ADD, + FIB_EVENT_VIF_DEL, }; struct fib_notifier_ops { From 310ebbba3b7396b00bce08a33f1d2de2c74fa257 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:12 +0200 Subject: [PATCH 0337/2177] ipmr: Add reference count to MFC entries Next commits will introduce MFC notifications through the atomic fib_notification chain, thus allowing modules to be aware of MFC entries. Due to the fact that modules may need to hold a reference to an MFC entry, add reference count to MFC entries to prevent them from being freed while these modules use them. The reference counting is done only on resolved MFC entries currently. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 +++++++++++++++++++++ net/ipv4/ipmr.c | 8 +++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d7f63339ef0b..10028f208efb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -109,6 +109,7 @@ struct mfc_cache_cmp_arg { * @wrong_if: number of wrong source interface hits * @lastuse: time of last use of the group (traffic or update) * @ttls: OIF TTL threshold array + * @refcount: reference count for this entry * @list: global entry list * @rcu: used for entry destruction */ @@ -138,6 +139,7 @@ struct mfc_cache { unsigned long wrong_if; unsigned long lastuse; unsigned char ttls[MAXVIFS]; + refcount_t refcount; } res; } mfc_un; struct list_head list; @@ -148,4 +150,23 @@ struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); + +#ifdef CONFIG_IP_MROUTE +void ipmr_cache_free(struct mfc_cache *mfc_cache); +#else +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) +{ +} +#endif + +static inline void ipmr_cache_put(struct mfc_cache *c) +{ + if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + ipmr_cache_free(c); +} +static inline void ipmr_cache_hold(struct mfc_cache *c) +{ + refcount_inc(&c->mfc_un.res.refcount); +} + #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c9b3e6e069ae..86dc5f98c5dd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -652,10 +652,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head) kmem_cache_free(mrt_cachep, c); } -static inline void ipmr_cache_free(struct mfc_cache *c) +void ipmr_cache_free(struct mfc_cache *c) { call_rcu(&c->rcu, ipmr_cache_free_rcu); } +EXPORT_SYMBOL(ipmr_cache_free); /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. @@ -949,6 +950,7 @@ static struct mfc_cache *ipmr_cache_alloc(void) if (c) { c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + refcount_set(&c->mfc_un.res.refcount, 1); } return c; } @@ -1162,7 +1164,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); return 0; } @@ -1264,7 +1266,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { From 4d65b9487831170e699b2fc64a91b839d729bd78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:13 +0200 Subject: [PATCH 0338/2177] ipmr: Add FIB notification access functions Make the ipmr module register as a FIB notifier. To do that, implement both the ipmr_seq_read and ipmr_dump ops. The ipmr_seq_read op returns a sequence counter that is incremented on every notification related operation done by the ipmr. To implement that, add a sequence counter in the netns_ipv4 struct and increment it whenever a new MFC route or VIF are added or deleted. The sequence operations are protected by the RTNL lock. The ipmr_dump iterates the list of MFC routes and the list of VIF entries and sends notifications about them. The entries dump is done under RCU where the VIF dump uses the mrt_lock too, as the vif->dev field can change under RCU. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 15 +++++ include/net/netns/ipv4.h | 3 + net/ipv4/ipmr.c | 137 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 10028f208efb..54c5cb82ddcb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE @@ -58,6 +59,14 @@ struct vif_device { int link; /* Physical interface index */ }; +struct vif_entry_notifier_info { + struct fib_notifier_info info; + struct net_device *dev; + vifi_t vif_index; + unsigned short vif_flags; + u32 tb_id; +}; + #define VIFF_STATIC 0x8000 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -146,6 +155,12 @@ struct mfc_cache { struct rcu_head rcu; }; +struct mfc_entry_notifier_info { + struct fib_notifier_info info; + struct mfc_cache *mfc; + u32 tb_id; +}; + struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8387f099115e..abc84d986da4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -163,6 +163,9 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 86dc5f98c5dd..49879c338357 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net) fib_rules_unregister(net->ipv4.mr_rules_ops); rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); +} #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net) net->ipv4.mrt = NULL; rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return 0; +} #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } #endif +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = { }; #endif +static unsigned int ipmr_seq_read(struct net *net) +{ + ASSERT_RTNL(); + + return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); +} + +static int ipmr_dump(struct net *net, struct notifier_block *nb) +{ + struct mr_table *mrt; + int err; + + err = ipmr_rules_dump(net, nb); + if (err) + return err; + + ipmr_for_each_table(mrt, net) { + struct vif_device *v = &mrt->vif_table[0]; + struct mfc_cache *mfc; + int vifi; + + /* Notifiy on table VIF entries */ + read_lock(&mrt_lock); + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { + if (!v->dev) + continue; + + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, + v, vifi, mrt->id); + } + read_unlock(&mrt_lock); + + /* Notify on table MFC entries */ + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + call_ipmr_mfc_entry_notifier(nb, net, + FIB_EVENT_ENTRY_ADD, mfc, + mrt->id); + } + + return 0; +} + +static const struct fib_notifier_ops ipmr_notifier_ops_template = { + .family = RTNL_FAMILY_IPMR, + .fib_seq_read = ipmr_seq_read, + .fib_dump = ipmr_dump, + .owner = THIS_MODULE, +}; + +int __net_init ipmr_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + net->ipv4.ipmr_seq = 0; + + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.ipmr_notifier_ops = ops; + + return 0; +} + +static void __net_exit ipmr_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); + net->ipv4.ipmr_notifier_ops = NULL; +} + /* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; + err = ipmr_notifier_init(net); + if (err) + goto ipmr_notifier_fail; + err = ipmr_rules_init(net); if (err < 0) - goto fail; + goto ipmr_rules_fail; #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -3074,7 +3204,9 @@ proc_cache_fail: proc_vif_fail: ipmr_rules_exit(net); #endif -fail: +ipmr_rules_fail: + ipmr_notifier_exit(net); +ipmr_notifier_fail: return err; } @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_cache", net->proc_net); remove_proc_entry("ip_mr_vif", net->proc_net); #endif + ipmr_notifier_exit(net); ipmr_rules_exit(net); } From b362053a7cc0fcc09b92642ba5dd1ca7fddc9004 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:14 +0200 Subject: [PATCH 0339/2177] ipmr: Send FIB notifications on MFC and VIF entries Use the newly introduced notification chain to send events upon VIF and MFC addition and deletion. The MFC notifications are sent only on resolved MFC entries, as unresolved cannot be offloaded. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 49879c338357..ba71bc402336 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -627,6 +627,27 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, return call_fib_notifier(nb, net, event_type, &info.info); } +static int call_ipmr_vif_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, @@ -644,6 +665,24 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, return call_fib_notifier(nb, net, event_type, &info.info); } +static int call_ipmr_mfc_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call @@ -651,6 +690,7 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { + struct net *net = read_pnet(&mrt->net); struct vif_device *v; struct net_device *dev; struct in_device *in_dev; @@ -660,6 +700,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; + if (VIF_EXISTS(mrt, vifi)) + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, + mrt->id); + write_lock_bh(&mrt_lock); dev = v->dev; v->dev = NULL; @@ -909,6 +953,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); return 0; } @@ -1209,6 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; /* The entries are added/deleted only under RTNL */ @@ -1220,6 +1266,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) return -ENOENT; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); @@ -1248,6 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1297,6 +1346,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, ipmr_cache_resolve(net, mrt, uc, c); ipmr_cache_free(uc); } + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1304,6 +1354,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, /* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, bool all) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c, *tmp; LIST_HEAD(list); int i; @@ -1322,6 +1373,8 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); } From c7c0bbeae9501a7e42f2fd306d6a6399aca688b6 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:15 +0200 Subject: [PATCH 0340/2177] net: ipmr: Add MFC offload indication Allow drivers, registered to the fib notification chain indicate whether a multicast MFC route is offloaded or not, similarly to unicast routes. The indication of whether a route is offloaded is done using the mfc_flags field on an mfc_cache struct, and the information is sent to the userspace via the RTNetlink interface only. Currently, MFC routes are either offloaded or not, thus there is no need to add per-VIF offload indication. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 ++ net/ipv4/ipmr.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 54c5cb82ddcb..5566580811ce 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -90,9 +90,11 @@ struct mr_table { /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) + * MFC_OFFLOAD - the entry was offloaded to the hardware */ enum { MFC_STATIC = BIT(0), + MFC_OFFLOAD = BIT(1), }; struct mfc_cache_cmp_arg { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ba71bc402336..2a795d2c0502 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2268,6 +2268,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) return -EMSGSIZE; + if (c->mfc_flags & MFC_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; + if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) return -EMSGSIZE; From 478e4c2f0067d57d7c17059caafab026ca32084a Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:16 +0200 Subject: [PATCH 0341/2177] net: mroute: Check if rule is a default rule When the ipmr starts, it adds one default FIB rule that matches all packets and sends them to the DEFAULT (multicast) FIB table. A more complex rule can be added by user to specify that for a specific interface, a packet should be look up at either an arbitrary table or according to the l3mdev of the interface. For drivers willing to offload the ipmr logic into a hardware but don't want to offload all the FIB rules functionality, provide a function that can indicate whether the FIB rule is the default multicast rule, thus only one routing table is needed. This way, a driver can register to the FIB notification chain, get notifications about FIB rules added and trigger some kind of an internal abort mechanism when a non default rule is added by the user. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 7 +++++++ net/ipv4/ipmr.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 5566580811ce..b072a84fbe1c 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); +bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, char __user *optval, unsigned int optlen) @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt) { return 0; } + +static inline bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif struct vif_device { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2a795d2c0502..292a8e80bdfa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -274,6 +274,12 @@ static unsigned int ipmr_rules_seq_read(struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); } + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; +} +EXPORT_SYMBOL(ipmr_rule_default); #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) @@ -318,6 +324,12 @@ static unsigned int ipmr_rules_seq_read(struct net *net) { return 0; } + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} +EXPORT_SYMBOL(ipmr_rule_default); #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, From c011ec1bbfd69e091ca8d77e13fc251a07be57dc Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:17 +0200 Subject: [PATCH 0342/2177] mlxsw: spectrum: Add the multicast routing offloading logic Add the multicast router offloading logic, which is in charge of handling the VIF and MFC notifications and translating it to the hardware logic API. The offloading logic has to overcome several obstacles in order to safely comply with the kernel multicast router user API: - It must keep track of the mapping between VIFs to netdevices. The user can add an MFC cache entry pointing to a VIF, delete the VIF and add re-add it with a different netdevice. The offloading logic has to handle this in order to be compatible with the kernel logic. - It must keep track of the mapping between netdevices to spectrum RIFs, as the current hardware implementation assume having a RIF for every port in a multicast router. - It must handle routes pointing to pimreg device to be trapped to the kernel, as the packet should be delivered to userspace. - It must handle routes pointing tunnel VIFs. The current implementation does not support multicast forwarding to tunnels, thus routes that point to a tunnel should be trapped to the kernel. - It must be aware of proxy multicast routes, which include both (*,*) routes and duplicate routes. Currently proxy routes are not offloaded and trigger the abort mechanism: removal of all routes from hardware and triggering the traffic to go through the kernel. The multicast routing offloading logic also updates the counters of the offloaded MFC routes in a periodic work. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 3 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 1014 +++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_mr.h | 133 +++ 4 files changed, 1150 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 4b88158173f3..9b29764905f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -17,7 +17,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o spectrum_acl_flex_actions.o + spectrum_ipip.o spectrum_acl_flex_actions.o \ + spectrum_mr.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9355d914a4c8..44c5259e5548 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -153,6 +153,7 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_sp_mr *mr; struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c new file mode 100644 index 000000000000..09120259a45d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -0,0 +1,1014 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "spectrum_mr.h" +#include "spectrum_router.h" + +struct mlxsw_sp_mr { + const struct mlxsw_sp_mr_ops *mr_ops; + void *catchall_route_priv; + struct delayed_work stats_update_dw; + struct list_head table_list; +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_vif { + struct net_device *dev; + const struct mlxsw_sp_rif *rif; + unsigned long vif_flags; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as one of the egress VIFs + */ + struct list_head route_evif_list; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as an ingress VIF + */ + struct list_head route_ivif_list; +}; + +struct mlxsw_sp_mr_route_vif_entry { + struct list_head vif_node; + struct list_head route_node; + struct mlxsw_sp_mr_vif *mr_vif; + struct mlxsw_sp_mr_route *mr_route; +}; + +struct mlxsw_sp_mr_table { + struct list_head node; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp *mlxsw_sp; + u32 vr_id; + struct mlxsw_sp_mr_vif vifs[MAXVIFS]; + struct list_head route_list; + struct rhashtable route_ht; + char catchall_route_priv[0]; + /* catchall_route_priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_route { + struct list_head node; + struct rhash_head ht_node; + struct mlxsw_sp_mr_route_key key; + enum mlxsw_sp_mr_route_action route_action; + u16 min_mtu; + struct mfc_cache *mfc4; + void *route_priv; + const struct mlxsw_sp_mr_table *mr_table; + /* A list of route_vif_entry structs that point to the egress VIFs */ + struct list_head evif_list; + /* A route_vif_entry struct that point to the ingress VIF */ + struct mlxsw_sp_mr_route_vif_entry ivif; +}; + +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = { + .key_len = sizeof(struct mlxsw_sp_mr_route_key), + .key_offset = offsetof(struct mlxsw_sp_mr_route, key), + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node), + .automatic_shrinking = true, +}; + +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER)); +} + +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; +} + +static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif; +} + +static bool +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) +{ + vifi_t ivif; + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ivif = mr_route->mfc4->mfc_parent; + return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static int +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + int valid_evifs; + + valid_evifs = 0; + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + valid_evifs++; + return valid_evifs; +} + +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_route->key.source_mask.addr4 == INADDR_ANY; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static enum mlxsw_sp_mr_route_action +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* If the ingress port is not regular and resolved, trap the route */ + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* The kernel does not match a (*,G) route that the ingress interface is + * not one of the egress interfaces, so trap these kind of routes. + */ + if (mlxsw_sp_mr_route_starg(mr_route) && + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If the route has no valid eVIFs, trap it. */ + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If either one of the eVIFs is not regular (VIF of type pimreg or + * tunnel) or one of the VIFs has no matching RIF, trap the packet. + */ + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) || + mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + } + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; +} + +static enum mlxsw_sp_mr_route_prio +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route) +{ + return mlxsw_sp_mr_route_starg(mr_route) ? + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG; +} + +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + const struct mfc_cache *mfc) +{ + bool starg = (mfc->mfc_origin == INADDR_ANY); + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = mr_table->proto; + key->group.addr4 = mfc->mfc_mcastgrp; + key->group_mask.addr4 = 0xffffffff; + key->source.addr4 = mfc->mfc_origin; + key->source_mask.addr4 = starg ? 0 : 0xffffffff; +} + +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + rve = kzalloc(sizeof(*rve), GFP_KERNEL); + if (!rve) + return -ENOMEM; + rve->mr_route = mr_route; + rve->mr_vif = mr_vif; + list_add_tail(&rve->route_node, &mr_route->evif_list); + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list); + return 0; +} + +static void +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve) +{ + list_del(&rve->route_node); + list_del(&rve->vif_node); + kfree(rve); +} + +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + mr_route->ivif.mr_route = mr_route; + mr_route->ivif.mr_vif = mr_vif; + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list); +} + +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route) +{ + list_del(&mr_route->ivif.vif_node); +} + +static int +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + u16 *erif_indices; + u16 irif_index; + u16 erif = 0; + + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices), + GFP_KERNEL); + if (!erif_indices) + return -ENOMEM; + + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + + erif_indices[erif++] = rifi; + } + } + + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif); + else + irif_index = 0; + + route_info->irif_index = irif_index; + route_info->erif_indices = erif_indices; + route_info->min_mtu = mr_route->min_mtu; + route_info->route_action = mr_route->route_action; + route_info->erif_num = erif; + return 0; +} + +static void +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info) +{ + kfree(route_info->erif_indices); +} + +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + bool replace) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_info route_info; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + int err; + + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info); + if (err) + return err; + + if (!replace) { + struct mlxsw_sp_mr_route_params route_params; + + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_route->route_priv) { + err = -ENOMEM; + goto out; + } + + route_params.key = mr_route->key; + route_params.value = route_info; + route_params.prio = mlxsw_sp_mr_route_prio(mr_route); + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_route->route_priv, + &route_params); + if (err) + kfree(mr_route->route_priv); + } else { + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv, + &route_info); + } +out: + mlxsw_sp_mr_route_info_destroy(&route_info); + return err; +} + +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv); + kfree(mr_route->route_priv); +} + +static struct mlxsw_sp_mr_route * +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + struct mlxsw_sp_mr_route *mr_route; + int err; + int i; + + /* Allocate and init a new route and fill it with parameters */ + mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL); + if (!mr_route) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&mr_route->evif_list); + mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc); + + /* Find min_mtu and link iVIF and eVIFs */ + mr_route->min_mtu = ETH_MAX_MTU; + ipmr_cache_hold(mfc); + mr_route->mfc4 = mfc; + mr_route->mr_table = mr_table; + for (i = 0; i < MAXVIFS; i++) { + if (mfc->mfc_un.res.ttls[i] != 255) { + err = mlxsw_sp_mr_route_evif_link(mr_route, + &mr_table->vifs[i]); + if (err) + goto err; + if (mr_table->vifs[i].dev && + mr_table->vifs[i].dev->mtu < mr_route->min_mtu) + mr_route->min_mtu = mr_table->vifs[i].dev->mtu; + } + } + mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); + if (err) + goto err; + + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); + return mr_route; +err: + ipmr_cache_put(mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); + return ERR_PTR(err); +} + +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + + mlxsw_sp_mr_route_ivif_unlink(mr_route); + ipmr_cache_put(mr_route->mfc4); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); +} + +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, + bool offload) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (offload) + mr_route->mfc4->mfc_flags |= MFC_OFFLOAD; + else + mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) +{ + bool offload; + + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_set(mr_route, offload); +} + +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + mlxsw_sp_mr_mfc_offload_set(mr_route, false); + mlxsw_sp_mr_route_erase(mr_table, mr_route); + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_route->node); + mlxsw_sp_mr_route_destroy(mr_table, mr_route); +} + +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace) +{ + struct mlxsw_sp_mr_route *mr_orig_route = NULL; + struct mlxsw_sp_mr_route *mr_route; + int err; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return -EINVAL; + } + + /* Create a new route */ + mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc); + if (IS_ERR(mr_route)) + return PTR_ERR(mr_route); + + /* Find any route with a matching key */ + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht, + &mr_route->key, + mlxsw_sp_mr_route_ht_params); + if (replace) { + /* On replace case, make the route point to the new route_priv. + */ + if (WARN_ON(!mr_orig_route)) { + err = -ENOENT; + goto err_no_orig_route; + } + mr_route->route_priv = mr_orig_route->route_priv; + } else if (mr_orig_route) { + /* On non replace case, if another route with the same key was + * found, abort, as duplicate routes are used for proxy routes. + */ + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + err = -EINVAL; + goto err_duplicate_route; + } + + /* Put it in the table data-structures */ + list_add_tail(&mr_route->node, &mr_table->route_list); + err = rhashtable_insert_fast(&mr_table->route_ht, + &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + + /* Destroy the original route */ + if (replace) { + rhashtable_remove_fast(&mr_table->route_ht, + &mr_orig_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_orig_route->node); + mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route); + } + + mlxsw_sp_mr_mfc_offload_update(mr_route); + return 0; + +err_mr_route_write: + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); +err_rhashtable_insert: + list_del(&mr_route->node); +err_no_orig_route: +err_duplicate_route: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + return err; +} + +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route *mr_route; + struct mlxsw_sp_mr_route_key key; + + mlxsw_sp_mr_route4_key(mr_table, &key, mfc); + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, + mlxsw_sp_mr_route_ht_params); + if (mr_route) + __mlxsw_sp_mr_route_del(mr_table, mr_route); +} + +/* Should be called after the VIF struct is updated */ +static int +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 irif_index; + int err; + + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return 0; + + /* rve->mr_vif->rif is guaranteed to be valid at this stage */ + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv, + irif_index); + if (err) + return err; + + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + /* No need to rollback here because the iRIF change only takes + * place after the action has been updated. + */ + return err; + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; +} + +static void +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv, + MLXSW_SP_MR_ROUTE_ACTION_TRAP); + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +/* Should be called after the RIF struct is updated */ +static int +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 erif_index = 0; + int err; + + /* Update the route action, as the new eVIF can be a tunnel or a pimreg + * device which will require updating the action. + */ + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) { + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + return err; + } + + /* Add the eRIF */ + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_erif_add(mlxsw_sp, + rve->mr_route->route_priv, + erif_index); + if (err) + goto err_route_erif_add; + } + + /* Update the minimum MTU */ + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu; + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->min_mtu); + if (err) + goto err_route_min_mtu_update; + } + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; + +err_route_min_mtu_update: + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, + erif_index); +err_route_erif_add: + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->route_action); + return err; +} + +/* Should be called before the RIF struct is updated */ +static void +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 rifi; + + /* If the unresolved RIF was not valid, no need to delete it */ + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif)) + return; + + /* Update the route action: if there is only one valid eVIF in the + * route, set the action to trap as the VIF deletion will lead to zero + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to + * determine the route action. + */ + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1) + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + else + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + + /* Delete the erif from the route */ + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi); + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_route_vif_entry *irve, *erve; + int err; + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = rif; + mr_vif->vif_flags = vif_flags; + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) { + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve); + if (err) + goto err_irif_unresolve; + } + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) { + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve); + if (err) + goto err_erif_unresolve; + } + return 0; + +err_erif_unresolve: + list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list, + vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve); +err_irif_unresolve: + list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list, + vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve); + mr_vif->rif = NULL; + return err; +} + +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve); + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve); + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = NULL; +} + +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return -EINVAL; + if (mr_vif->dev) + return -EEXIST; + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif); +} + +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return; + if (WARN_ON(!mr_vif->dev)) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); +} + +struct mlxsw_sp_mr_vif * +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, + const struct net_device *dev) +{ + vifi_t vif_index; + + for (vif_index = 0; vif_index < MAXVIFS; vif_index++) + if (mr_table->vifs[vif_index].dev == dev) + return &mr_table->vifs[vif_index]; + return NULL; +} + +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return 0; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return 0; + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, + mr_vif->vif_flags, rif); +} + +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); +} + +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_vif_entry *rve; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + /* Search for a VIF that use that RIF */ + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + + /* Update all the routes that uses that VIF as eVIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) { + if (mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = mtu; + mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + mtu); + } + } +} + +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 vr_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_mr_route_params catchall_route_params = { + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + .key = { + .vrid = vr_id, + }, + .value = { + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP, + } + }; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_table *mr_table; + int err; + int i; + + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_table) + return ERR_PTR(-ENOMEM); + + mr_table->vr_id = vr_id; + mr_table->mlxsw_sp = mlxsw_sp; + mr_table->proto = proto; + INIT_LIST_HEAD(&mr_table->route_list); + + err = rhashtable_init(&mr_table->route_ht, + &mlxsw_sp_mr_route_ht_params); + if (err) + goto err_route_rhashtable_init; + + for (i = 0; i < MAXVIFS; i++) { + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list); + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list); + } + + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_table->catchall_route_priv, + &catchall_route_params); + if (err) + goto err_ops_route_create; + list_add_tail(&mr_table->node, &mr->table_list); + return mr_table; + +err_ops_route_create: + rhashtable_destroy(&mr_table->route_ht); +err_route_rhashtable_init: + kfree(mr_table); + return ERR_PTR(err); +} + +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + list_del(&mr_table->node); + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, + &mr_table->catchall_route_priv); + rhashtable_destroy(&mr_table->route_ht); + kfree(mr_table); +} + +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp_mr_route *mr_route, *tmp; + int i; + + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) + __mlxsw_sp_mr_route_del(mr_table, mr_route); + + for (i = 0; i < MAXVIFS; i++) { + mr_table->vifs[i].dev = NULL; + mr_table->vifs[i].rif = NULL; + } +} + +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table) +{ + int i; + + for (i = 0; i < MAXVIFS; i++) + if (mr_table->vifs[i].dev) + return false; + return list_empty(&mr_table->route_list); +} + +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u64 packets, bytes; + + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return; + + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, + &bytes); + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (mr_route->mfc4->mfc_un.res.pkt != packets) + mr_route->mfc4->mfc_un.res.lastuse = jiffies; + mr_route->mfc4->mfc_un.res.pkt = packets; + mr_route->mfc4->mfc_un.res.bytes = bytes; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_stats_update(struct work_struct *work) +{ + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr, + stats_update_dw.work); + struct mlxsw_sp_mr_table *mr_table; + struct mlxsw_sp_mr_route *mr_route; + unsigned long interval; + + rtnl_lock(); + list_for_each_entry(mr_table, &mr->table_list, node) + list_for_each_entry(mr_route, &mr_table->route_list, node) + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, + mr_route); + rtnl_unlock(); + + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); +} + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops) +{ + struct mlxsw_sp_mr *mr; + unsigned long interval; + int err; + + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL); + if (!mr) + return -ENOMEM; + mr->mr_ops = mr_ops; + mlxsw_sp->mr = mr; + INIT_LIST_HEAD(&mr->table_list); + + err = mr_ops->init(mlxsw_sp, mr->priv); + if (err) + goto err; + + /* Create the delayed work for counter updates */ + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update); + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); + return 0; +err: + kfree(mr); + return err; +} + +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + cancel_delayed_work_sync(&mr->stats_update_dw); + mr->mr_ops->fini(mr->priv); + kfree(mr); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h new file mode 100644 index 000000000000..c851b237d253 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -0,0 +1,133 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_H +#define _MLXSW_SPECTRUM_MCROUTER_H + +#include +#include "spectrum_router.h" +#include "spectrum.h" + +enum mlxsw_sp_mr_route_action { + MLXSW_SP_MR_ROUTE_ACTION_FORWARD, + MLXSW_SP_MR_ROUTE_ACTION_TRAP, +}; + +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key { + int vrid; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr group; + union mlxsw_sp_l3addr group_mask; + union mlxsw_sp_l3addr source; + union mlxsw_sp_l3addr source_mask; +}; + +struct mlxsw_sp_mr_route_info { + enum mlxsw_sp_mr_route_action route_action; + u16 irif_index; + u16 *erif_indices; + size_t erif_num; + u16 min_mtu; +}; + +struct mlxsw_sp_mr_route_params { + struct mlxsw_sp_mr_route_key key; + struct mlxsw_sp_mr_route_info value; + enum mlxsw_sp_mr_route_prio prio; +}; + +struct mlxsw_sp_mr_ops { + int priv_size; + int route_priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info); + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u64 *packets, u64 *bytes); + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + enum mlxsw_sp_mr_route_action route_action); + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 min_mtu); + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 irif_index); + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv); + void (*fini)(void *priv); +}; + +struct mlxsw_sp_mr; +struct mlxsw_sp_mr_table; + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops); +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace); +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc); +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index); +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu); +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto); +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table); +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table); +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table); + +#endif From 0e14c7777acb6d58250cb746685dde0a74d60fe8 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:18 +0200 Subject: [PATCH 0343/2177] mlxsw: spectrum: Add the multicast routing hardware logic Implement the multicast routing hardware API introduced in previous patch for the specific spectrum hardware. The spectrum hardware multicast routes are written using the RMFT2 register and point to an ACL flexible action set. The actions used for multicast routes are: - Counter action, which allows counting bytes and packets on multicast routes. - Multicast route action, which provide RPF check and do the actual packet duplication to a list of RIFs. - Trap action, in the case the route action specified by the called is trap. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../mellanox/mlxsw/spectrum_mr_tcam.c | 828 ++++++++++++++++++ .../mellanox/mlxsw/spectrum_mr_tcam.h | 43 + 4 files changed, 873 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9b29764905f3..4816504419fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -18,7 +18,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ - spectrum_mr.o + spectrum_mr.o spectrum_mr_tcam.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 44c5259e5548..ae67e6046098 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -139,6 +139,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_sb; struct mlxsw_sp_bridge; struct mlxsw_sp_router; +struct mlxsw_sp_mr; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c new file mode 100644 index 000000000000..cda9e9ad10e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -0,0 +1,828 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp_mr_tcam { + struct mlxsw_sp_mr_tcam_region ipv4_tcam_region; +}; + +/* This struct maps to one RIGR2 register entry */ +struct mlxsw_sp_mr_erif_sublist { + struct list_head list; + u32 rigr2_kvdl_index; + int num_erifs; + u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS]; + bool synced; +}; + +struct mlxsw_sp_mr_tcam_erif_list { + struct list_head erif_sublists; + u32 kvdl_index; +}; + +static bool +mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MC_ERIF_LIST_ENTRIES); + + return erif_sublist->num_erifs == erif_list_entries; +} + +static void +mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + INIT_LIST_HEAD(&erif_list->erif_sublists); +} + +#define MLXSW_SP_KVDL_RIGR2_SIZE 1 + +static struct mlxsw_sp_mr_erif_sublist * +mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + int err; + + erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); + if (!erif_sublist) + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE, + &erif_sublist->rigr2_kvdl_index); + if (err) { + kfree(erif_sublist); + return ERR_PTR(err); + } + + list_add_tail(&erif_sublist->list, &erif_list->erif_sublists); + return erif_sublist; +} + +static void +mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + list_del(&erif_sublist->list); + mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index); + kfree(erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + u16 erif_index) +{ + struct mlxsw_sp_mr_erif_sublist *sublist; + + /* If either there is no erif_entry or the last one is full, allocate a + * new one. + */ + if (list_empty(&erif_list->erif_sublists)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + erif_list->kvdl_index = sublist->rigr2_kvdl_index; + } else { + sublist = list_last_entry(&erif_list->erif_sublists, + struct mlxsw_sp_mr_erif_sublist, + list); + sublist->synced = false; + if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, + erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + } + } + + /* Add the eRIF to the last entry's last index */ + sublist->erif_indices[sublist->num_erifs++] = erif_index; + return 0; +} + +static void +mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp; + + list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists, + list) + mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *curr_sublist; + char rigr2_pl[MLXSW_REG_RIGR2_LEN]; + int err; + int i; + + list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) { + if (curr_sublist->synced) + continue; + + /* If the sublist is not the last one, pack the next index */ + if (list_is_last(&curr_sublist->list, + &erif_list->erif_sublists)) { + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + false, 0); + } else { + struct mlxsw_sp_mr_erif_sublist *next_sublist; + + next_sublist = list_next_entry(curr_sublist, list); + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + true, + next_sublist->rigr2_kvdl_index); + } + + /* Pack all the erifs */ + for (i = 0; i < curr_sublist->num_erifs; i++) { + u16 erif_index = curr_sublist->erif_indices[i]; + + mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true, + erif_index); + } + + /* Write the entry */ + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2), + rigr2_pl); + if (err) + /* No need of a rollback here because this + * hardware entry should not be pointed yet. + */ + return err; + curr_sublist->synced = true; + } + return 0; +} + +static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to, + struct mlxsw_sp_mr_tcam_erif_list *from) +{ + list_splice(&from->erif_sublists, &to->erif_sublists); + to->kvdl_index = from->kvdl_index; +} + +struct mlxsw_sp_mr_tcam_route { + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + u32 counter_index; + struct parman_item parman_item; + struct parman_prio *parman_prio; + enum mlxsw_sp_mr_route_action action; + struct mlxsw_sp_mr_route_key key; + u16 irif_index; + u16 min_mtu; +}; + +static struct mlxsw_afa_block * +mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_mr_route_action route_action, + u16 irif_index, u32 counter_index, + u16 min_mtu, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_afa_block *afa_block; + int err; + + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (IS_ERR(afa_block)) + return afa_block; + + err = mlxsw_afa_block_append_counter(afa_block, counter_index); + if (err) + goto err; + + switch (route_action) { + case MLXSW_SP_MR_ROUTE_ACTION_TRAP: + err = mlxsw_afa_block_append_trap(afa_block, + MLXSW_TRAP_ID_ACL1); + if (err) + goto err; + break; + case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: + /* If we are about to append a multicast router action, commit + * the erif_list. + */ + err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list); + if (err) + goto err; + + err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index, + min_mtu, false, + erif_list->kvdl_index); + if (err) + goto err; + break; + default: + err = -EINVAL; + goto err; + } + + err = mlxsw_afa_block_commit(afa_block); + if (err) + goto err; + return afa_block; +err: + mlxsw_afa_block_destroy(afa_block); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) +{ + mlxsw_afa_block_destroy(afa_block); +} + +static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid, + struct parman_item *parman_item) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid, + 0, 0, 0, 0, 0, 0, NULL); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int +mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + struct mlxsw_sp_mr_route_info *route_info) +{ + int err; + int i; + + for (i = 0; i < route_info->erif_num; i++) { + u16 erif_index = route_info->erif_indices[i]; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list, + erif_index); + if (err) + return err; + } + return 0; +} + +static int +mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route, + enum mlxsw_sp_mr_route_prio prio) +{ + struct parman_prio *parman_prio = NULL; + int err; + + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio]; + err = parman_item_add(mr_tcam->ipv4_tcam_region.parman, + parman_prio, &route->parman_item); + if (err) + return err; + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + route->parman_prio = parman_prio; + return 0; +} + +static void +mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route) +{ + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_item_remove(mr_tcam->ipv4_tcam_region.parman, + route->parman_prio, &route->parman_item); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } +} + +static int +mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + route->key = route_params->key; + route->irif_index = route_params->value.irif_index; + route->min_mtu = route_params->value.min_mtu; + route->action = route_params->value.route_action; + + /* Create the egress RIFs list */ + mlxsw_sp_mr_erif_list_init(&route->erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list, + &route_params->value); + if (err) + goto err_erif_populate; + + /* Create the flow counter */ + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index); + if (err) + goto err_counter_alloc; + + /* Create the flexible action block */ + route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(route->afa_block)) { + err = PTR_ERR(route->afa_block); + goto err_afa_block_create; + } + + /* Allocate place in the TCAM */ + err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route, + route_params->prio); + if (err) + goto err_parman_item_add; + + /* Write the route to the TCAM */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, route->afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); +err_parman_item_add: + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); +err_afa_block_create: + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); +err_erif_populate: +err_counter_alloc: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + return err; +} + +static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, + void *priv, void *route_priv) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid, + &route->parman_item); + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); +} + +static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u64 *packets, + u64 *bytes) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, + packets, bytes); +} + +static int +mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + enum mlxsw_sp_mr_route_action route_action) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->action = route_action; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 min_mtu) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->min_mtu = min_mtu; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 irif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return -EINVAL; + route->irif_index = irif_index; + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + int err; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list, + erif_index); + if (err) + return err; + + /* Commit the action only if the route action is not TRAP */ + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return mlxsw_sp_mr_erif_list_commit(mlxsw_sp, + &route->erif_list); + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + int i; + + /* Create a copy of the original erif_list without the deleted entry */ + mlxsw_sp_mr_erif_list_init(&erif_list); + list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) { + for (i = 0; i < erif_sublist->num_erifs; i++) { + u16 curr_erif = erif_sublist->erif_indices[i]; + + if (curr_erif == erif_index) + continue; + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list, + curr_erif); + if (err) + goto err_erif_list_add; + } + } + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_list_add: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int +mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new erif_list */ + mlxsw_sp_mr_erif_list_init(&erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info); + if (err) + goto err_erif_populate; + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route_info->route_action, + route_info->irif_index, + route->counter_index, + route_info->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + route->action = route_info->route_action; + route->irif_index = route_info->irif_index; + route->min_mtu = route_info->min_mtu; + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_populate: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_mr_tcam_region_parman_resize, + .move = mlxsw_sp_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) + goto err_parman_prios_alloc; + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + return mlxsw_sp_mr_tcam_region_init(mlxsw_sp, + &mr_tcam->ipv4_tcam_region, + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST); +} + +static void mlxsw_sp_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region); +} + +const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_mr_tcam), + .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route), + .init = mlxsw_sp_mr_tcam_init, + .route_create = mlxsw_sp_mr_tcam_route_create, + .route_update = mlxsw_sp_mr_tcam_route_update, + .route_stats = mlxsw_sp_mr_tcam_route_stats, + .route_action_update = mlxsw_sp_mr_tcam_route_action_update, + .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update, + .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update, + .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add, + .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del, + .route_destroy = mlxsw_sp_mr_tcam_route_destroy, + .fini = mlxsw_sp_mr_tcam_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h new file mode 100644 index 000000000000..f9b59ee25406 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H +#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H + +#include "spectrum.h" +#include "spectrum_mr.h" + +extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops; + +#endif From 7e50d435759accec4e17764a8d5a1ef63b79ffd6 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:19 +0200 Subject: [PATCH 0344/2177] mlxsw: spectrum: router: Squash the default route table to main Currently, the mlxsw Spectrum driver offloads only either the RT_TABLE_MAIN FIB table or the VRF tables, so the RT_TABLE_LOCAL table is squashed to the RT_TABLE_MAIN table to allow local routes to be offloaded too. By default, multicast MFC routes which are not assigned to any user requested table are put in the RT_TABLE_DEFAULT table. Due to the fact that offloading multicast MFC routes support in Spectrum router logic is going to be introduced soon, squash the default table to MAIN too. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 321f7356073c..28c0c84bc966 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -693,8 +693,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, static u32 mlxsw_sp_fix_tb_id(u32 tb_id) { - /* For our purpose, squash main and local table into one */ - if (tb_id == RT_TABLE_LOCAL) + /* For our purpose, squash main, default and local tables into one */ + if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT) tb_id = RT_TABLE_MAIN; return tb_id; } From d42b0965b1d4fe0808a2103a3f7c015515b1112e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:20 +0200 Subject: [PATCH 0345/2177] mlxsw: spectrum_router: Add multicast routes notification handling functionality Add functionality for calling the multicast routing offloading logic upon MFC and VIF add and delete notifications. In addition, call the multicast routing upon RIF addition and deletion events. As the multicast routing offload logic may sleep, the actual calls are done in a deferred work. To ensure the MFC object is not freed in that interval, a reference is held to it. In case of a failure, the abort mechanism is used, which ejects all the routes from the hardware and triggers the traffic to flow through the kernel. Note: At that stage, the FIB notifications are still ignored, and will be enabled in a further patch. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 187 +++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 28c0c84bc966..77584422ed08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -65,6 +65,8 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_ipip.h" +#include "spectrum_mr.h" +#include "spectrum_mr_tcam.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -459,6 +461,7 @@ struct mlxsw_sp_vr { unsigned int rif_count; struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_mr_table *mr4_table; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -653,7 +656,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4 || !!vr->fib6; + return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -744,9 +747,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(vr->fib6); goto err_fib6_create; } + vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->mr4_table)) { + err = PTR_ERR(vr->mr4_table); + goto err_mr_table_create; + } vr->tb_id = tb_id; return vr; +err_mr_table_create: + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; @@ -755,6 +767,8 @@ err_fib6_create: static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_mr_table_destroy(vr->mr4_table); + vr->mr4_table = NULL; mlxsw_sp_fib_destroy(vr->fib6); vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); @@ -775,7 +789,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { if (!vr->rif_count && list_empty(&vr->fib4->node_list) && - list_empty(&vr->fib6->node_list)) + list_empty(&vr->fib6->node_list) && + mlxsw_sp_mr_table_empty(vr->mr4_table)) mlxsw_sp_vr_destroy(vr); } @@ -4731,6 +4746,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info, + bool replace) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace); +} + +static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc); + mlxsw_sp_vr_put(vr); +} + +static int +mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev); + return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev, + ven_info->vif_index, + ven_info->vif_flags, rif); +} + +static void +mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index); + mlxsw_sp_vr_put(vr); +} + static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; @@ -4741,6 +4825,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; + /* The multicast router code does not need an abort trap as by default, + * packets that don't match any routes are trapped to the CPU. + */ + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); @@ -4822,6 +4910,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; + + mlxsw_sp_mr_table_flush(vr->mr4_table); mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); /* If virtual router was only used for IPv4, then it's no @@ -4854,6 +4944,8 @@ struct mlxsw_sp_fib_event_work { struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; + struct mfc_entry_notifier_info men_info; + struct vif_entry_notifier_info ven_info; }; struct mlxsw_sp *mlxsw_sp; unsigned long event; @@ -4940,6 +5032,55 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, + &fib_work->ven_info); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_VIF_DEL: + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!ipmr_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { @@ -4985,6 +5126,30 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, } } +static void +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + ipmr_cache_hold(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: /* fall through */ + case FIB_EVENT_VIF_DEL: + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -5014,6 +5179,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); mlxsw_sp_router_fib6_event(fib_work, info); break; + case RTNL_FAMILY_IPMR: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); + break; } mlxsw_core_schedule_work(&fib_work->work); @@ -5227,12 +5396,18 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; + err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif); + if (err) + goto err_mr_rif_add; + mlxsw_sp_rif_counters_alloc(rif); mlxsw_sp->router->rifs[rif_index] = rif; vr->rif_count++; return rif; +err_mr_rif_add: + ops->deconfigure(rif); err_configure: if (fid) mlxsw_sp_fid_put(fid); @@ -5257,6 +5432,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); + mlxsw_sp_mr_rif_del(vr->mr4_table, rif); ops->deconfigure(rif); if (fid) /* Loopback RIFs are not associated with a FID. */ @@ -6120,6 +6296,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_lpm_init; + err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops); + if (err) + goto err_mr_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -6141,6 +6321,8 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_mr_fini(mlxsw_sp); +err_mr_init: mlxsw_sp_lpm_fini(mlxsw_sp); err_lpm_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); @@ -6162,6 +6344,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->router->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); From fd890fe98f8b026642d39011d03d22fb4aa66b0f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:21 +0200 Subject: [PATCH 0346/2177] mlxsw: spectrum: Notify multicast router on RIF MTU changes Due to the fact that multicast routes hold the minimum MTU of all the egress RIFs and trap packets that don't meet it, notify the mulitcast router code on RIF MTU changes. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 77584422ed08..dbd9c196fcfb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5773,6 +5773,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) if (err) goto err_rif_fdb_op; + if (rif->mtu != dev->mtu) { + struct mlxsw_sp_vr *vr; + + /* The RIF is relevant only to its mr_table instance, as unlike + * unicast routing, in multicast routing a RIF cannot be shared + * between several multicast routing tables. + */ + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu); + } + ether_addr_copy(rif->addr, dev->dev_addr); rif->mtu = dev->mtu; From 664375e9567b5eeece8d9ebf85eaf5107cab382d Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:22 +0200 Subject: [PATCH 0347/2177] mlxsw: spectrum: router: Don't ignore IPMR notifications Make the Spectrum router logic not ignore the RTNL_FAMILY_IPMR FIB notifications. Past commits added the IPMR VIF and MFC add/del notifications via the fib_notifier chain. In addition, a code for handling these notifications in the Spectrum router logic was added. Make the Spectrum router logic not ignore these notifications and forward the requests to the Spectrum multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index dbd9c196fcfb..ef4b86b3aa9b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5159,7 +5159,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6)) + (info->family != AF_INET && info->family != AF_INET6 && + info->family != RTNL_FAMILY_IPMR)) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); From 7e7c1afb67074596f6ff96e5276d6084d1648ec1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 28 Sep 2017 13:44:39 +0200 Subject: [PATCH 0348/2177] batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 05cc7637c064..edb2f239d04d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.3" +#define BATADV_SOURCE_VERSION "2017.4" #endif /* B.A.T.M.A.N. parameters */ From 825ffe1f7b875127bc03faffec0ecfb05906650a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 Aug 2017 21:52:13 +0200 Subject: [PATCH 0349/2177] batman-adv: Remove unnecessary parentheses checkpatch introduced with commit 63b7c73ec86b ("checkpatch: add --strict check for ifs with unnecessary parentheses") an additional test which identifies some unnecessary parentheses. Remove these unnecessary parentheses to avoid the warnings and to unify the coding style slightly more. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 24 ++++++++++++------------ net/batman-adv/bat_v.c | 2 +- net/batman-adv/bat_v_elp.c | 6 +++--- net/batman-adv/bat_v_ogm.c | 12 ++++++------ net/batman-adv/distributed-arp-table.c | 4 ++-- net/batman-adv/gateway_client.c | 8 ++++---- net/batman-adv/gateway_common.c | 18 +++++++++--------- net/batman-adv/hard-interface.c | 12 ++++++------ net/batman-adv/icmp_socket.c | 4 ++-- net/batman-adv/main.c | 4 ++-- net/batman-adv/multicast.c | 2 +- net/batman-adv/originator.c | 26 +++++++++++++------------- net/batman-adv/routing.c | 6 +++--- net/batman-adv/send.c | 6 +++--- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/sysfs.c | 4 ++-- net/batman-adv/tp_meter.c | 2 +- 17 files changed, 71 insertions(+), 71 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 83ba5483455a..1b659ab652fb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) return; /* the interface gets activated here to avoid race conditions between @@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, * drops as they can't send and receive at the same time. */ tq_iface_penalty = BATADV_TQ_MAX_VALUE; - if (if_outgoing && (if_incoming == if_outgoing) && + if (if_outgoing && if_incoming == if_outgoing && batadv_is_wifi_hardif(if_outgoing)) tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, bat_priv); @@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, ret = BATADV_NEIGH_DUP; } else { set_mark = 0; - if (is_dup && (ret != BATADV_NEIGH_DUP)) + if (is_dup && ret != BATADV_NEIGH_DUP) ret = BATADV_ORIG_DUP; } @@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ - if (!is_single_hop_neigh && (!orig_neigh_router)) { + if (!is_single_hop_neigh && !orig_neigh_router) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out_neigh; @@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; - if (is_bidirect && ((dup_status == BATADV_NO_DUP) || + if (is_bidirect && (dup_status == BATADV_NO_DUP || (sameseq && similar_ttl))) { batadv_iv_ogm_orig_update(bat_priv, orig_node, orig_ifinfo, ethhdr, @@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* OGMs from secondary interfaces should only scheduled once * per interface where it has been received, not multiple times */ - if ((ogm_packet->ttl <= 2) && - (if_incoming != if_outgoing)) { + if (ogm_packet->ttl <= 2 && + if_incoming != if_outgoing) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); goto out_neigh; @@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, if_incoming, if_outgoing); out_neigh: - if ((orig_neigh_node) && (!is_single_hop_neigh)) + if (orig_neigh_node && !is_single_hop_neigh) batadv_orig_node_put(orig_neigh_node); out: if (router_ifinfo) @@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) tmp_gw_factor *= 100 * 100; tmp_gw_factor >>= 18; - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { + if (tmp_gw_factor > max_gw_factor || + (tmp_gw_factor == max_gw_factor && + tq_avg > max_tq)) { if (curr_gw) batadv_gw_node_put(curr_gw); curr_gw = gw_node; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 4e2724c5b33d..93ef1c06227e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -767,7 +767,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) goto next; - if (curr_gw && (bw <= max_bw)) + if (curr_gw && bw <= max_bw) goto next; if (curr_gw) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index bd1064d98e16..1de992c58b35 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && (throughput != SPEED_UNKNOWN)) + if (throughput && throughput != SPEED_UNKNOWN) return throughput * 10; } @@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) goto out; /* we are in the process of shutting this interface down */ - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) goto out; /* the interface was enabled but may not be ready yet */ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 8be61734fc43..c251445a42a0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, * due to the store & forward characteristics of WIFI. * Very low throughput values are the exception. */ - if ((throughput > 10) && - (if_incoming == if_outgoing) && + if (throughput > 10 && + if_incoming == if_outgoing && !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) return throughput / 2; @@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, /* drop packets with old seqnos, however accept the first packet after * a host has been rebooted. */ - if ((seq_diff < 0) && !protection_started) + if (seq_diff < 0 && !protection_started) goto out; neigh_node->last_seen = jiffies; @@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, router_throughput = router_ifinfo->bat_v.throughput; neigh_throughput = neigh_ifinfo->bat_v.throughput; - if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && - (router_throughput >= neigh_throughput)) + if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF && + router_throughput >= neigh_throughput) goto out; } @@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, return; /* only unknown & newer OGMs contain TVLVs we are interested in */ - if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT)) + if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT) batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, (unsigned char *)(ogm2 + 1), diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b6cfa78e9381..760c0de72582 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, /* this is an hash collision with the temporary selected node. Choose * the one with the lowest address */ - if ((tmp_max == max) && max_orig_node && - (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) + if (tmp_max == max && max_orig_node && + batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0) goto out; ret = true; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index de9955d5224d..10d521f0b17f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv) } } - if ((curr_gw) && (!next_gw)) { + if (curr_gw && !next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Removing selected gateway - no gateway in range\n"); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL); - } else if ((!curr_gw) && (next_gw)) { + } else if (!curr_gw && next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", next_gw->orig_node->orig, @@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, goto out; } - if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && - (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) + if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) && + gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 33940c5c74a8..2c26039c23fc 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff, if (strncasecmp(tmp_ptr, "mbit", 4) == 0) bw_unit_type = BATADV_BW_UNIT_MBIT; - if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || - (bw_unit_type == BATADV_BW_UNIT_MBIT)) + if (strncasecmp(tmp_ptr, "kbit", 4) == 0 || + bw_unit_type == BATADV_BW_UNIT_MBIT) *tmp_ptr = '\0'; } @@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, if (!up_new) up_new = 1; - if ((down_curr == down_new) && (up_curr == up_new)) + if (down_curr == down_new && up_curr == up_new) return count; batadv_gw_reselect(bat_priv); @@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /* only fetch the tvlv value if the handler wasn't called via the * CIFNOTFND flag and if there is data to fetch */ - if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || - (tvlv_value_len < sizeof(gateway))) { + if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND || + tvlv_value_len < sizeof(gateway)) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } else { gateway_ptr = tvlv_value; gateway.bandwidth_down = gateway_ptr->bandwidth_down; gateway.bandwidth_up = gateway_ptr->bandwidth_up; - if ((gateway.bandwidth_down == 0) || - (gateway.bandwidth_up == 0)) { + if (gateway.bandwidth_down == 0 || + gateway.bandwidth_up == 0) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } @@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); /* restart gateway selection */ - if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) + if (gateway.bandwidth_down != 0 && + atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..d4aa99c060f9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->net_dev == net_dev) @@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->soft_iface != soft_iface) @@ -654,8 +654,8 @@ out: static void batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) return; hard_iface->if_status = BATADV_IF_INACTIVE; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 8ead292886d1..bded31121d12 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, size_t packet_len; int error; - if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) + if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0) return -EAGAIN; - if ((!buf) || (count < sizeof(struct batadv_icmp_packet))) + if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; if (!access_ok(VERIFY_WRITE, buf, count)) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fb381fb26a66..033819aefc39 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -544,8 +544,8 @@ batadv_recv_handler_register(u8 packet_type, struct batadv_hard_iface *); curr = batadv_rx_handler[packet_type]; - if ((curr != batadv_recv_unhandled_packet) && - (curr != batadv_recv_unhandled_unicast_packet)) + if (curr != batadv_recv_unhandled_packet && + curr != batadv_recv_unhandled_unicast_packet) return -EBUSY; batadv_rx_handler[packet_type] = recv_handler; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d327670641ac..e553a8770a89 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, bool orig_initialized; if (orig_mcast_enabled && tvlv_value && - (tvlv_value_len >= sizeof(mcast_flags))) + tvlv_value_len >= sizeof(mcast_flags)) mcast_flags = *(u8 *)tvlv_value; spin_lock_bh(&orig->mcast_handler_lock); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..2967b86c13da 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; - if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) || - (if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { - if ((if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) + if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) || + if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) { + if (if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", orig_node->orig, neigh_node->addr, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f10e3ff26f9d..40d9bf3e5bfe 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ - if ((curr_router) && (!neigh_node)) { + if (curr_router && !neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); batadv_tt_global_del_orig(bat_priv, orig_node, -1, "Deleted route towards originator"); /* route added */ - } else if ((!curr_router) && (neigh_node)) { + } else if (!curr_router && neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); @@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* add record route information if not full */ if ((icmph->msg_type == BATADV_ECHO_REPLY || icmph->msg_type == BATADV_ECHO_REQUEST) && - (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { + skb->len >= sizeof(struct batadv_icmp_packet_rr)) { if (skb_linearize(skb) < 0) goto free_skb; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 054a65e6eb68..7895323fd2a7 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, #ifdef CONFIG_BATMAN_ADV_BATMAN_V hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); - if ((hardif_neigh) && (ret != NET_XMIT_DROP)) + if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; if (hardif_neigh) @@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list, * we delete only packets belonging to the given interface */ if (hard_iface && - (forw_packet->if_incoming != hard_iface) && - (forw_packet->if_outgoing != hard_iface)) + forw_packet->if_incoming != hard_iface && + forw_packet->if_outgoing != hard_iface) continue; hlist_del(&forw_packet->list); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c2c986746d0b..7c8288245f80 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { /* check ranges */ - if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) + if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; dev->mtu = new_mtu; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0ae8b30e4eaa..aa187fd42475 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev, if (hard_iface->if_status == status_tmp) goto out; - if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) + if (hard_iface->soft_iface && + strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0) goto out; if (status_tmp == BATADV_IF_NOT_IN_USE) { diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index bfe8effe9238..4b90033f35a8 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /* send the ack */ r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { + if (unlikely(r < 0) || r == NET_XMIT_DROP) { ret = BATADV_TP_REASON_DST_UNREACHABLE; goto out; } From 48915aed60c9615e5eaa0f1683640635dade788e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 28 Sep 2017 17:16:51 +0200 Subject: [PATCH 0350/2177] batman-adv: Fix "line over 80 characters" checkpatch warning Fixes: 242c1a28eb61 ("net: Remove useless function skb_header_release") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7c8288245f80..3af4b0b29b23 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) int result; /* TODO: We must check if we can release all references to non-payload - * data using __skb_header_release in our skbs to allow skb_cow_header to - * work optimally. This means that those skbs are not allowed to read + * data using __skb_header_release in our skbs to allow skb_cow_header + * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. From 8f1975e31d8ed0c021a1993a4d9123dd39c740ea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Sep 2017 09:14:14 -0700 Subject: [PATCH 0351/2177] inetpeer: speed up inetpeer_invalidate_tree() As measured in my prior patch ("sch_netem: faster rb tree removal"), rbtree_postorder_for_each_entry_safe() is nice looking but much slower than using rb_next() directly, except when tree is small enough to fit in CPU caches (then the cost is the same) From: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e7eb590c86ce..6e5626cc366c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *p, *n; + struct rb_node *p = rb_first(&base->rb_root); - rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { - inet_putpeer(p); + while (p) { + struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node); + + p = rb_next(p); + rb_erase(&peer->rb_node, &base->rb_root); + inet_putpeer(peer); cond_resched(); } - base->rb_root = RB_ROOT; base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); From 76cf546c2802f6e25113ba481d7e85d0298768c6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:49 -0700 Subject: [PATCH 0352/2177] net_sched: use idr to allocate bpf filter handles Instead of calling cls_bpf_get() in a loop to find a unused handle, just switch to idr API to allocate new handles. Cc: Daniel Borkmann Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 57 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 36671b0fb125..6c6b21f6ba62 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); struct cls_bpf_head { struct list_head plist; - u32 hgen; + struct idr handle_idr; struct rcu_head rcu; }; @@ -238,6 +239,7 @@ static int cls_bpf_init(struct tcf_proto *tp) return -ENOBUFS; INIT_LIST_HEAD_RCU(&head->plist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; @@ -264,6 +266,9 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) { + struct cls_bpf_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, prog->handle); cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); @@ -287,6 +292,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp) list_for_each_entry_safe(prog, tmp, &head->plist, link) __cls_bpf_delete(tp, prog); + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } @@ -421,27 +427,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, return 0; } -static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, - struct cls_bpf_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; -} - static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -451,6 +436,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct cls_bpf_prog *oldprog = *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; struct cls_bpf_prog *prog; + unsigned long idr_index; int ret; if (tca[TCA_OPTIONS] == NULL) @@ -476,21 +462,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, } } - if (handle == 0) - prog->handle = cls_bpf_grab_new_handle(tp, head); - else + if (handle == 0) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (ret) + goto errout; + prog->handle = idr_index; + } else { + if (!oldprog) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (ret) + goto errout; + } prog->handle = handle; - if (prog->handle == 0) { - ret = -EINVAL; - goto errout; } ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) - goto errout; + goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); if (ret) { + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); __cls_bpf_delete_prog(prog); return ret; } @@ -499,6 +494,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (oldprog) { + idr_replace_ext(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); @@ -509,6 +505,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = prog; return 0; +errout_idr: + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); errout: tcf_exts_destroy(&prog->exts); kfree(prog); From 1d8134fea2eb460698a281f91c03c400f7e546ce Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:50 -0700 Subject: [PATCH 0353/2177] net_sched: use idr to allocate basic filter handles Instead of calling basic_get() in a loop to find a unused handle, just switch to idr API to allocate new handles. Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d89ebafd2239..cfeb6f158566 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -17,13 +17,14 @@ #include #include #include +#include #include #include #include struct basic_head { - u32 hgenerator; struct list_head flist; + struct idr handle_idr; struct rcu_head rcu; }; @@ -78,6 +79,7 @@ static int basic_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } @@ -99,8 +101,10 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); call_rcu(&f->rcu, basic_delete_filter); } + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } @@ -111,6 +115,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -154,6 +159,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; + unsigned long idr_index; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; @@ -179,30 +185,31 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, err = -EINVAL; if (handle) { fnew->handle = handle; - } else if (fold) { - fnew->handle = fold->handle; - } else { - unsigned int i = 0x80000000; - do { - if (++head->hgenerator == 0x7FFFFFFF) - head->hgenerator = 1; - } while (--i > 0 && basic_get(tp, head->hgenerator)); - - if (i <= 0) { - pr_err("Insufficient number of handles\n"); - goto errout; + if (!fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; } - - fnew->handle = head->hgenerator; + } else { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (err) + goto errout; + fnew->handle = idr_index; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); - if (err < 0) + if (err < 0) { + if (!fold) + idr_remove_ext(&head->handle_idr, fnew->handle); goto errout; + } *arg = fnew; if (fold) { + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, basic_delete_filter); From e7614370d6f04711c4e4b48f7055e5008fa4ed42 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:51 -0700 Subject: [PATCH 0354/2177] net_sched: use idr to allocate u32 filter handles Instead of calling u32_lookup_ht() in a loop to find a unused handle, just switch to idr API to allocate new handles. u32 filters are special as the handle could contain a hash table id and a key id, so we need two IDR to allocate each of them. Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 106 +++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10b8d851fc6b..094d224411a9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -46,6 +46,7 @@ #include #include #include +#include struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -82,6 +83,7 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; + struct idr handle_idr; struct rcu_head rcu; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. @@ -93,7 +95,7 @@ struct tc_u_common { struct tc_u_hnode __rcu *hlist; struct Qdisc *q; int refcnt; - u32 hgenerator; + struct idr handle_idr; struct hlist_node hnode; struct rcu_head rcu; }; @@ -311,19 +313,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle) return u32_lookup_key(ht, handle); } -static u32 gen_new_htid(struct tc_u_common *tp_c) +static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) { - int i = 0x800; + unsigned long idr_index; + int err; - /* hgenerator only used inside rtnl lock it is safe to increment + /* This is only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ - do { - if (++tp_c->hgenerator == 0x7FF) - tp_c->hgenerator = 1; - } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); - - return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; + err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index, + 1, 0x7FF, GFP_KERNEL); + if (err) + return 0; + return (u32)(idr_index | 0x800) << 20; } static struct hlist_head *tc_u_common_hash; @@ -366,8 +368,9 @@ static int u32_init(struct tcf_proto *tp) return -ENOBUFS; root_ht->refcnt++; - root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; + root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; + idr_init(&root_ht->handle_idr); if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); @@ -377,6 +380,7 @@ static int u32_init(struct tcf_proto *tp) } tp_c->q = tp->q; INIT_HLIST_NODE(&tp_c->hnode); + idr_init(&tp_c->handle_idr); h = tc_u_hash(tp); hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); @@ -565,6 +569,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); + idr_remove_ext(&ht->handle_idr, n->handle); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } @@ -586,6 +591,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); + idr_destroy(&ht->handle_idr); + idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -633,6 +640,7 @@ static void u32_destroy(struct tcf_proto *tp) kfree_rcu(ht, rcu); } + idr_destroy(&tp_c->handle_idr); kfree(tp_c); } @@ -701,27 +709,21 @@ ret: return ret; } -#define NR_U32_NODE (1<<12) -static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) +static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) { - struct tc_u_knode *n; - unsigned long i; - unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), - GFP_KERNEL); - if (!bitmap) - return handle | 0xFFF; + unsigned long idr_index; + u32 start = htid | 0x800; + u32 max = htid | 0xFFF; + u32 min = htid; - for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); - n; - n = rtnl_dereference(n->next)) - set_bit(TC_U32_NODE(n->handle), bitmap); + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + start, max + 1, GFP_KERNEL)) { + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + min + 1, max + 1, GFP_KERNEL)) + return max; + } - i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); - if (i >= NR_U32_NODE) - i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); - - kfree(bitmap); - return handle | (i >= NR_U32_NODE ? 0xFFF : i); + return (u32)idr_index; } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -806,6 +808,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, if (pins->handle == n->handle) break; + idr_replace_ext(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } @@ -937,22 +940,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; if (TC_U32_KEY(handle)) return -EINVAL; - if (handle == 0) { - handle = gen_new_htid(tp->data); - if (handle == 0) - return -ENOMEM; - } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; + if (handle == 0) { + handle = gen_new_htid(tp->data, ht); + if (handle == 0) { + kfree(ht); + return -ENOMEM; + } + } else { + err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL, + handle, handle + 1, GFP_KERNEL); + if (err) { + kfree(ht); + return err; + } + } ht->tp_c = tp_c; ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + idr_init(&ht->handle_idr); err = u32_replace_hw_hnode(tp, ht, flags); if (err) { + idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); return err; } @@ -986,24 +1000,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) return -EINVAL; handle = htid | TC_U32_NODE(handle); + err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, + handle, handle + 1, + GFP_KERNEL); + if (err) + return err; } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL] == NULL) - return -EINVAL; + if (tb[TCA_U32_SEL] == NULL) { + err = -EINVAL; + goto erridr; + } s = nla_data(tb[TCA_U32_SEL]); n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); - if (n == NULL) - return -ENOBUFS; + if (n == NULL) { + err = -ENOBUFS; + goto erridr; + } #ifdef CONFIG_CLS_U32_PERF size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); if (!n->pf) { - kfree(n); - return -ENOBUFS; + err = -ENOBUFS; + goto errfree; } #endif @@ -1066,9 +1089,12 @@ errhw: errout: tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF +errfree: free_percpu(n->pf); #endif kfree(n); +erridr: + idr_remove_ext(&ht->handle_idr, handle); return err; } From 61f26d92510ea1e30f9b69097b34b6a522daa1d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 07:40:42 +0200 Subject: [PATCH 0355/2177] selftests: rtnetlink.sh: add rudimentary vrf test Acked-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 4b48de565cae..a048f7a5f94c 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -291,6 +291,47 @@ kci_test_ifalias() echo "PASS: set ifalias $namewant for $devdummy" } +kci_test_vrf() +{ + vrfname="test-vrf" + ret=0 + + ip link show type vrf 2>/dev/null + if [ $? -ne 0 ]; then + echo "SKIP: vrf: iproute2 too old" + return 0 + fi + + ip link add "$vrfname" type vrf table 10 + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: can't add vrf interface, skipping test" + return 0 + fi + + ip -br link show type vrf | grep -q "$vrfname" + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: created vrf device not found" + return 1 + fi + + ip link set dev "$vrfname" up + check_err $? + + ip link set dev "$devdummy" master "$vrfname" + check_err $? + ip link del dev "$vrfname" + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: vrf" + return 1 + fi + + echo "PASS: vrf" +} + kci_test_rtnl() { kci_add_dummy @@ -306,6 +347,7 @@ kci_test_rtnl() kci_test_bridge kci_test_addrlabel kci_test_ifalias + kci_test_vrf kci_del_dummy } From 79110a0426d8179a51bf3cb698a84f6ec98ca60c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:40 +0200 Subject: [PATCH 0356/2177] rtnetlink: add helper to put master and link ifindexes rtnl_fill_ifinfo currently requires caller to hold the rtnl mutex. Unfortunately the function is quite large which makes it harder to see which spots require the lock, which spots assume it and which ones could do without. Add helpers to factor out the ifindex dumping, one can use rcu to avoid rtnl dependency. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a78fd61da0ec..c801212ee40e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1307,6 +1307,31 @@ static u32 rtnl_get_event(unsigned long event) return rtnl_event_type; } +static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device *upper_dev; + int ret = 0; + + rcu_read_lock(); + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + + rcu_read_unlock(); + return ret; +} + +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +{ + int ifindex = dev_get_iflink(dev); + + if (dev->ifindex == ifindex) + return 0; + + return nla_put_u32(skb, IFLA_LINK, ifindex); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, @@ -1316,7 +1341,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *af_spec; struct rtnl_af_ops *af_ops; - struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1345,10 +1369,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || - (upper_dev && - nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_iflink(skb, dev) || + put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || From 250fc3dfdbd3e8b5c751ce7b5a26176ec62ca0f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:41 +0200 Subject: [PATCH 0357/2177] rtnetlink: add helpers to dump vf information similar to earlier patches, split out more parts of this function to better see what is happening and where we assume rtnl is locked. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 50 +++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c801212ee40e..d504e78cd01f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1211,6 +1211,36 @@ nla_put_vfinfo_failure: return -EMSGSIZE; } +static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, + struct net_device *dev, + u32 ext_filter_mask) +{ + struct nlattr *vfinfo; + int i, num_vfs; + + if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0)) + return 0; + + num_vfs = dev_num_vf(dev->dev.parent); + if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs)) + return -EMSGSIZE; + + if (!dev->netdev_ops->ndo_get_vf_config) + return 0; + + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + return -EMSGSIZE; + + for (i = 0; i < num_vfs; i++) { + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + return -EMSGSIZE; + } + + nla_nest_end(skb, vfinfo); + return 0; +} + static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { struct rtnl_link_ifmap map; @@ -1407,27 +1437,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && - nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) + if (rtnl_fill_vf(skb, dev, ext_filter_mask)) goto nla_put_failure; - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && - ext_filter_mask & RTEXT_FILTER_VF) { - int i; - struct nlattr *vfinfo; - int num_vfs = dev_num_vf(dev->dev.parent); - - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); - if (!vfinfo) - goto nla_put_failure; - for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) - goto nla_put_failure; - } - - nla_nest_end(skb, vfinfo); - } - if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; From b1e66b9a67d67d0e73091b04b51e524581c8c887 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:42 +0200 Subject: [PATCH 0358/2177] rtnetlink: add helpers to dump netnsid information Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d504e78cd01f..d524609c587c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1362,6 +1362,23 @@ static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) return nla_put_u32(skb, IFLA_LINK, ifindex); } +static int rtnl_fill_link_netnsid(struct sk_buff *skb, + const struct net_device *dev) +{ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id_alloc(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + return -EMSGSIZE; + } + } + + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, @@ -1451,17 +1468,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (dev->rtnl_link_ops && - dev->rtnl_link_ops->get_link_net) { - struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); - - if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); - - if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) - goto nla_put_failure; - } - } + if (rtnl_fill_link_netnsid(skb, dev)) + goto nla_put_failure; if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; From 4c82a95e523721637d44e0e8d879fa11fa825eec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:43 +0200 Subject: [PATCH 0359/2177] rtnetlink: rtnl_have_link_slave_info doesn't need rtnl it can be switched to rcu. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d524609c587c..e6955da0d58d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -522,11 +522,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, static bool rtnl_have_link_slave_info(const struct net_device *dev) { struct net_device *master_dev; + bool ret = false; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (master_dev && master_dev->rtnl_link_ops) - return true; - return false; + ret = true; + rcu_read_unlock(); + return ret; } static int rtnl_link_slave_info_fill(struct sk_buff *skb, From 4d8806fd14e1492cd4fb2021f709b163ea3364ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 26 Sep 2017 16:14:09 +0100 Subject: [PATCH 0360/2177] cxgb4: make function ch_flower_stats_cb, fixes warning The function ch_flower_stats_cb is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warnings: symbol 'ch_flower_stats_cb' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index a36bd66d2834..92a311767381 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -366,7 +366,7 @@ err: return ret; } -void ch_flower_stats_cb(unsigned long data) +static void ch_flower_stats_cb(unsigned long data) { struct adapter *adap = (struct adapter *)data; struct ch_tc_flower_entry *flower_entry; From 2b7c6ba945fd3c10ca3e030be402098aff2f89d3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 26 Sep 2017 16:35:13 +0100 Subject: [PATCH 0361/2177] bpf/verifier: improve disassembly of BPF_END instructions print_bpf_insn() was treating all BPF_ALU[64] the same, but BPF_END has a different structure: it has a size in insn->imm (even if it's BPF_X) and uses the BPF_SRC (X or K) to indicate which endianness to use. So it needs different code to print it. Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f849eca36052..e8d7bb8e6b98 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -332,26 +332,40 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; +static void print_bpf_end_insn(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + verbose("(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, + BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", + insn->imm, insn->dst_reg); +} + static void print_bpf_insn(const struct bpf_verifier_env *env, const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); if (class == BPF_ALU || class == BPF_ALU64) { - if (BPF_SRC(insn->code) == BPF_X) + if (BPF_OP(insn->code) == BPF_END) { + if (class == BPF_ALU64) + verbose("BUG_alu64_%02x\n", insn->code); + else + print_bpf_end_insn(env, insn); + } else if (BPF_SRC(insn->code) == BPF_X) { verbose("(%02x) %sr%d %s %sr%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], class == BPF_ALU ? "(u32) " : "", insn->src_reg); - else + } else { verbose("(%02x) %sr%d %s %s%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], class == BPF_ALU ? "(u32) " : "", insn->imm); + } } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_MEM) verbose("(%02x) *(%s *)(r%d %+d) = r%d\n", From 73c864b38383f4abc9b559025cd663f4a81afa89 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 26 Sep 2017 16:35:29 +0100 Subject: [PATCH 0362/2177] bpf/verifier: improve disassembly of BPF_NEG instructions BPF_NEG takes only one operand, unlike the bulk of BPF_ALU[64] which are compound-assignments. So give it its own format in print_bpf_insn(). Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e8d7bb8e6b98..4cf9b72c59a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -351,6 +351,11 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, verbose("BUG_alu64_%02x\n", insn->code); else print_bpf_end_insn(env, insn); + } else if (BPF_OP(insn->code) == BPF_NEG) { + verbose("(%02x) r%d = %s-r%d\n", + insn->code, insn->dst_reg, + class == BPF_ALU ? "(u32) " : "", + insn->dst_reg); } else if (BPF_SRC(insn->code) == BPF_X) { verbose("(%02x) %sr%d %s %sr%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", From 352f58b0d9f26d283b10f4c9f21e8717141c1334 Mon Sep 17 00:00:00 2001 From: Aviad Krawczyk Date: Wed, 27 Sep 2017 01:57:50 +0800 Subject: [PATCH 0363/2177] net-next/hinic: Set Rxq irq to specific cpu for NUMA Set Rxq irq to specific cpu for allocating and receiving the skb from the same node. Signed-off-by: Aviad Krawczyk Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 1d4f712b15a8..e2e5cdc7119c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "hinic_common.h" @@ -171,11 +172,10 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) struct hinic_sge sge; dma_addr_t dma_addr; struct sk_buff *skb; - int i, alloc_more; u16 prod_idx; + int i; free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq); - alloc_more = 0; /* Limit the allocation chunks */ if (free_wqebbs > nic_dev->rx_weight) @@ -185,7 +185,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) skb = rx_alloc_skb(rxq, &dma_addr); if (!skb) { netdev_err(rxq->netdev, "Failed to alloc Rx skb\n"); - alloc_more = 1; goto skb_out; } @@ -195,7 +194,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) &prod_idx); if (!rq_wqe) { rx_free_skb(rxq, skb, dma_addr); - alloc_more = 1; goto skb_out; } @@ -211,9 +209,7 @@ skb_out: hinic_rq_update(rxq->rq, prod_idx); } - if (alloc_more) - tasklet_schedule(&rxq->rx_task); - + tasklet_schedule(&rxq->rx_task); return i; } @@ -357,7 +353,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) } if (pkts) - tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */ + tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */ u64_stats_update_begin(&rxq->rxq_stats.syncp); rxq->rxq_stats.pkts += pkts; @@ -417,6 +413,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; + struct hinic_qp *qp; + struct cpumask mask; int err; rx_add_napi(rxq); @@ -432,7 +430,9 @@ static int rx_request_irq(struct hinic_rxq *rxq) return err; } - return 0; + qp = container_of(rq, struct hinic_qp, rq); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); + return irq_set_affinity_hint(rq->irq, &mask); } static void rx_free_irq(struct hinic_rxq *rxq) From bbdc9e687fb3c2920961d7716f1c5519ff7bc595 Mon Sep 17 00:00:00 2001 From: Aviad Krawczyk Date: Wed, 27 Sep 2017 02:11:33 +0800 Subject: [PATCH 0364/2177] net-next/hinic: Fix a case of Tx Queue is Stopped forever Fix the following scenario: 1. tx_free_poll is running on cpu X 2. xmit function is running on cpu Y and fails to get sq wqe 3. tx_free_poll frees wqes on cpu X and checks the queue is not stopped 4. xmit function stops the queue after failed to get sq wqe 5. The queue is stopped forever Signed-off-by: Aviad Krawczyk Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index abe3e38cd342..9128858479c4 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -212,10 +212,19 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); if (!sq_wqe) { - tx_unmap_skb(nic_dev, skb, txq->sges); - netif_stop_subqueue(netdev, qp->q_id); + /* Check for the case free_tx_poll is called in another cpu + * and we stopped the subqueue after free_tx_poll check. + */ + sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); + if (sq_wqe) { + netif_wake_subqueue(nic_dev->netdev, qp->q_id); + goto process_sq_wqe; + } + + tx_unmap_skb(nic_dev, skb, txq->sges); + u64_stats_update_begin(&txq->txq_stats.syncp); txq->txq_stats.tx_busy++; u64_stats_update_end(&txq->txq_stats.syncp); @@ -223,6 +232,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) goto flush_skbs; } +process_sq_wqe: hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges); hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size); From 6ade97da601f8af793f6c7a861af754d0f0b6767 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Sep 2017 23:12:28 +0300 Subject: [PATCH 0365/2177] arp: make arp_hdr_len() return unsigned int Negative ARP header length are not a thing. Constify arguments while I'm at it. Space savings: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3 (-3) function old new delta arpt_do_table 1163 1160 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 ++- include/linux/if_arp.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..d2e94b8559f0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2491,7 +2491,8 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; - int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); + int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); + unsigned int alen; if (!slave_do_arp_validate(bond, slave)) { if ((slave_do_arp_validate_only(bond) && is_arp) || diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 3355efc89781..6756fea18b69 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -31,7 +31,7 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) return (struct arphdr *)skb_network_header(skb); } -static inline int arp_hdr_len(struct net_device *dev) +static inline unsigned int arp_hdr_len(const struct net_device *dev) { switch (dev->type) { #if IS_ENABLED(CONFIG_FIREWIRE_NET) From 9ffe79a9c2eec0f30687c2fd8b452bda5c8287b0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:23 +0800 Subject: [PATCH 0366/2177] net: hns3: Support for dynamically assigning tx buffer to TC This patch add support of dynamically assigning tx buffer to TC when the TC is enabled. It will save buffer for rx direction to avoid packet loss. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../hisilicon/hns3/hns3pf/hclge_main.c | 64 ++++++++++++++++--- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 758cf3948131..a81c6cb93ed5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -311,6 +311,7 @@ struct hclge_tc_thrd { struct hclge_priv_buf { struct hclge_waterline wl; /* Waterline for low and high*/ u32 buf_size; /* TC private buffer size */ + u32 tx_buf_size; u32 enable; /* Enable TC private buffer or not */ }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e0685e630afe..eaa3fc355568 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1324,7 +1324,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) return 0; } -static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) +static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) { /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 @@ -1337,10 +1337,13 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) req = (struct hclge_tx_buff_alloc *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); - for (i = 0; i < HCLGE_TC_NUM; i++) + for (i = 0; i < HCLGE_TC_NUM; i++) { + u32 buf_size = hdev->priv_buf[i].tx_buf_size; + req->tx_pkt_buff[i] = cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) | HCLGE_BUF_SIZE_UPDATE_EN_MSK); + } ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -1352,9 +1355,9 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) return 0; } -static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size) +static int hclge_tx_buffer_alloc(struct hclge_dev *hdev) { - int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size); + int ret = hclge_cmd_alloc_tx_buff(hdev); if (ret) { dev_err(&hdev->pdev->dev, @@ -1433,6 +1436,16 @@ static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev) return rx_priv; } +static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev) +{ + u32 i, total_tx_size = 0; + + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) + total_tx_size += hdev->priv_buf[i].tx_buf_size; + + return total_tx_size; +} + static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) { u32 shared_buf_min, shared_buf_tc, shared_std; @@ -1477,18 +1490,43 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) return true; } +static int hclge_tx_buffer_calc(struct hclge_dev *hdev) +{ + u32 i, total_size; + + total_size = hdev->pkt_buf_size; + + /* alloc tx buffer for all enabled tc */ + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { + struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + + if (total_size < HCLGE_DEFAULT_TX_BUF) + return -ENOMEM; + + if (hdev->hw_tc_map & BIT(i)) + priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF; + else + priv->tx_buf_size = 0; + + total_size -= priv->tx_buf_size; + } + + return 0; +} + /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev - * @tx_size: the allocated tx buffer for all TCs * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) +int hclge_rx_buffer_calc(struct hclge_dev *hdev) { - u32 rx_all = hdev->pkt_buf_size - tx_size; + u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; struct hclge_priv_buf *priv; int i; + rx_all -= hclge_get_tx_buff_alloced(hdev); + /* When DCB is not supported, rx private * buffer is not allocated. */ @@ -1771,7 +1809,6 @@ static int hclge_common_wl_config(struct hclge_dev *hdev) int hclge_buffer_alloc(struct hclge_dev *hdev) { - u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF; int ret; hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM, @@ -1780,14 +1817,21 @@ int hclge_buffer_alloc(struct hclge_dev *hdev) if (!hdev->priv_buf) return -ENOMEM; - ret = hclge_tx_buffer_alloc(hdev, tx_buf_size); + ret = hclge_tx_buffer_calc(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "could not calc tx buffer size for all TCs %d\n", ret); + return ret; + } + + ret = hclge_tx_buffer_alloc(hdev); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc tx buffers %d\n", ret); return ret; } - ret = hclge_rx_buffer_calc(hdev, tx_buf_size); + ret = hclge_rx_buffer_calc(hdev); if (ret) { dev_err(&hdev->pdev->dev, "could not calc rx priv buffer size for all TCs %d\n", From acf61ecd44feae2a78c13d0d7cb8e386741c5cf0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:24 +0800 Subject: [PATCH 0367/2177] net: hns3: Add support for dynamically buffer reallocation Current buffer allocation can only happen at init, when doing buffer reallocation after init, care must be taken care of memory which priv_buf points to. This patch fixes it by using a dynamic allocated temporary memory. Because we only do buffer reallocation at init or when setting up the DCB parameter, and priv_buf is only used at buffer allocation process, so it is ok to use a dynamic allocated temporary memory. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 5 + .../hisilicon/hns3/hns3pf/hclge_main.c | 150 ++++++++++-------- .../hisilicon/hns3/hns3pf/hclge_main.h | 2 - 3 files changed, 87 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index a81c6cb93ed5..6b6d28eff664 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -322,6 +322,11 @@ struct hclge_shared_buf { u32 buf_size; }; +struct hclge_pkt_buf_alloc { + struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM]; + struct hclge_shared_buf s_buf; +}; + #define HCLGE_RX_COM_WL_EN_B 15 struct hclge_rx_com_wl_buf { __le16 high_wl; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index eaa3fc355568..61632feb8c4e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1324,7 +1324,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) return 0; } -static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) +static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 @@ -1338,7 +1339,7 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); for (i = 0; i < HCLGE_TC_NUM; i++) { - u32 buf_size = hdev->priv_buf[i].tx_buf_size; + u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size; req->tx_pkt_buff[i] = cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) | @@ -1355,9 +1356,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) return 0; } -static int hclge_tx_buffer_alloc(struct hclge_dev *hdev) +static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - int ret = hclge_cmd_alloc_tx_buff(hdev); + int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc); if (ret) { dev_err(&hdev->pdev->dev, @@ -1390,13 +1392,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev) } /* Get the number of pfc enabled TCs, which have private buffer */ -static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if ((hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) cnt++; @@ -1406,13 +1409,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) } /* Get the number of pfc disabled TCs, which have private buffer */ -static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) @@ -1422,31 +1426,33 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) return cnt; } -static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev) +static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; u32 rx_priv = 0; int i; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (priv->enable) rx_priv += priv->buf_size; } return rx_priv; } -static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev) +static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) { u32 i, total_tx_size = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) - total_tx_size += hdev->priv_buf[i].tx_buf_size; + total_tx_size += buf_alloc->priv_buf[i].tx_buf_size; return total_tx_size; } -static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) +static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc, + u32 rx_all) { u32 shared_buf_min, shared_buf_tc, shared_std; int tc_num, pfc_enable_num; @@ -1467,30 +1473,31 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) hdev->mps; shared_std = max_t(u32, shared_buf_min, shared_buf_tc); - rx_priv = hclge_get_rx_priv_buff_alloced(hdev); + rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc); if (rx_all <= rx_priv + shared_std) return false; shared_buf = rx_all - rx_priv; - hdev->s_buf.buf_size = shared_buf; - hdev->s_buf.self.high = shared_buf; - hdev->s_buf.self.low = 2 * hdev->mps; + buf_alloc->s_buf.buf_size = shared_buf; + buf_alloc->s_buf.self.high = shared_buf; + buf_alloc->s_buf.self.low = 2 * hdev->mps; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { if ((hdev->hw_tc_map & BIT(i)) && (hdev->tm_info.hw_pfc_map & BIT(i))) { - hdev->s_buf.tc_thrd[i].low = hdev->mps; - hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps; } else { - hdev->s_buf.tc_thrd[i].low = 0; - hdev->s_buf.tc_thrd[i].high = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = 0; + buf_alloc->s_buf.tc_thrd[i].high = hdev->mps; } } return true; } -static int hclge_tx_buffer_calc(struct hclge_dev *hdev) +static int hclge_tx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { u32 i, total_size; @@ -1498,7 +1505,7 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev) /* alloc tx buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; if (total_size < HCLGE_DEFAULT_TX_BUF) return -ENOMEM; @@ -1516,22 +1523,24 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev) /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev + * @buf_alloc: pointer to buffer calculation data * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev) +int hclge_rx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; struct hclge_priv_buf *priv; int i; - rx_all -= hclge_get_tx_buff_alloced(hdev); + rx_all -= hclge_get_tx_buff_alloced(buf_alloc); /* When DCB is not supported, rx private * buffer is not allocated. */ if (!hnae3_dev_dcb_supported(hdev)) { - if (!hclge_is_rx_buf_ok(hdev, rx_all)) + if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return -ENOMEM; return 0; @@ -1539,7 +1548,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) /* step 1, try to alloc private buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i)) { priv->enable = 1; if (hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1560,14 +1569,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 2, try to decrease the buffer size of * no pfc TC's private buffer */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; priv->enable = 0; priv->wl.low = 0; @@ -1590,18 +1599,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 3, try to reduce the number of pfc disabled TCs, * which have private buffer */ /* get the total no pfc enable TC number, which have private buffer */ - no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev); + no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i))) { @@ -1613,22 +1622,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) no_pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || no_pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 4, try to reduce the number of pfc enabled TCs * which have private buffer. */ - pfc_priv_num = hclge_get_pfc_priv_num(hdev); + pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1640,17 +1649,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; return -ENOMEM; } -static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) +static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_rx_priv_buff *req; struct hclge_desc desc; @@ -1662,7 +1672,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) /* Alloc private buffer TCs */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; req->buf_num[i] = cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S); @@ -1671,7 +1681,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) } req->shared_buf = - cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | + cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | (1 << HCLGE_TC0_PRI_BUF_EN_B)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -1686,7 +1696,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) #define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0) -static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) +static int hclge_rx_priv_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_rx_priv_wl_buf *req; struct hclge_priv_buf *priv; @@ -1706,7 +1717,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT); for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) { - priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j]; + u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j; + + priv = &buf_alloc->priv_buf[idx]; req->tc_wl[j].high = cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S); req->tc_wl[j].high |= @@ -1731,9 +1744,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_thrd_config(struct hclge_dev *hdev) +static int hclge_common_thrd_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *s_buf = &hdev->s_buf; + struct hclge_shared_buf *s_buf = &buf_alloc->s_buf; struct hclge_rx_com_thrd *req; struct hclge_desc desc[2]; struct hclge_tc_thrd *tc; @@ -1777,9 +1791,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_wl_config(struct hclge_dev *hdev) +static int hclge_common_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *buf = &hdev->s_buf; + struct hclge_shared_buf *buf = &buf_alloc->s_buf; struct hclge_rx_com_wl *req; struct hclge_desc desc; int ret; @@ -1809,69 +1824,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev) int hclge_buffer_alloc(struct hclge_dev *hdev) { + struct hclge_pkt_buf_alloc *pkt_buf; int ret; - hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM, - sizeof(struct hclge_priv_buf), - GFP_KERNEL | __GFP_ZERO); - if (!hdev->priv_buf) + pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL); + if (!pkt_buf) return -ENOMEM; - ret = hclge_tx_buffer_calc(hdev); + ret = hclge_tx_buffer_calc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not calc tx buffer size for all TCs %d\n", ret); - return ret; + goto out; } - ret = hclge_tx_buffer_alloc(hdev); + ret = hclge_tx_buffer_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc tx buffers %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_buffer_calc(hdev); + ret = hclge_rx_buffer_calc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not calc rx priv buffer size for all TCs %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_priv_buf_alloc(hdev); + ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n", ret); - return ret; + goto out; } if (hnae3_dev_dcb_supported(hdev)) { - ret = hclge_rx_priv_wl_config(hdev); + ret = hclge_rx_priv_wl_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure rx private waterline %d\n", ret); - return ret; + goto out; } - ret = hclge_common_thrd_config(hdev); + ret = hclge_common_thrd_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure common threshold %d\n", ret); - return ret; + goto out; } } - ret = hclge_common_wl_config(hdev); - if (ret) { + ret = hclge_common_wl_config(hdev, pkt_buf); + if (ret) dev_err(&hdev->pdev->dev, "could not configure common waterline %d\n", ret); - return ret; - } - return 0; +out: + kfree(pkt_buf); + return ret; } static int hclge_init_roce_base_info(struct hclge_vport *vport) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9fcfd9395424..4fc36f04c971 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -463,8 +463,6 @@ struct hclge_dev { u32 pkt_buf_size; /* Total pf buf size for tx/rx */ u32 mps; /* Max packet size */ - struct hclge_priv_buf *priv_buf; - struct hclge_shared_buf s_buf; enum hclge_mta_dmac_sel_type mta_mac_sel_type; bool enable_mta; /* Mutilcast filter enable */ From 9dc2145d910e94d1987fd165ac7643777fcf17c4 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:25 +0800 Subject: [PATCH 0368/2177] net: hns3: Add support for PFC setting in TM module This patch add a pfc_pause_en cmd, and use it to configure PFC option according to fc_mode in hdev->tm_info. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 68 +++++++++++++++++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 5 ++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 73a75d7cc551..0b4b5d9b0798 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx) return hclge_cmd_send(&hdev->hw, &desc, 1); } +static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap, + u8 pfc_bitmap) +{ + struct hclge_desc desc; + struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false); + + pfc->tx_rx_en_bitmap = tx_rx_bitmap; + pfc->pri_en_bitmap = pfc_bitmap; + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id) { u8 tc; @@ -969,20 +983,64 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) return hclge_tm_schd_mode_hw(hdev); } +static int hclge_pfc_setup_hw(struct hclge_dev *hdev) +{ + u8 enable_bitmap = 0; + + if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) + enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK | + HCLGE_RX_MAC_PAUSE_EN_MSK; + + return hclge_pfc_pause_en_cfg(hdev, enable_bitmap, + hdev->tm_info.hw_pfc_map); +} + +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +{ + bool tx_en, rx_en; + + switch (hdev->tm_info.fc_mode) { + case HCLGE_FC_NONE: + tx_en = false; + rx_en = false; + break; + case HCLGE_FC_RX_PAUSE: + tx_en = false; + rx_en = true; + break; + case HCLGE_FC_TX_PAUSE: + tx_en = true; + rx_en = false; + break; + case HCLGE_FC_FULL: + tx_en = true; + rx_en = true; + break; + default: + tx_en = true; + rx_en = true; + } + + return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en); +} + int hclge_pause_setup_hw(struct hclge_dev *hdev) { - bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC; int ret; u8 i; - ret = hclge_mac_pause_en_cfg(hdev, en, en); - if (ret) - return ret; + if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) + return hclge_mac_pause_setup_hw(hdev); - /* Only DCB-supported dev supports qset back pressure setting */ + /* Only DCB-supported dev supports qset back pressure and pfc cmd */ if (!hnae3_dev_dcb_supported(hdev)) return 0; + /* When MAC is GE Mode, hdev does not support pfc setting */ + ret = hclge_pfc_setup_hw(hdev); + if (ret) + dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); + for (i = 0; i < hdev->tm_info.num_tc; i++) { ret = hclge_tm_qs_bp_cfg(hdev, i); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 85158b0d73fe..8ecd83c50f47 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -94,6 +94,11 @@ struct hclge_bp_to_qs_map_cmd { u32 rsvd1; }; +struct hclge_pfc_en_cmd { + u8 tx_rx_en_bitmap; + u8 pri_en_bitmap; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) From 0a5677d39ef12739c9c10ef6e8e5f4b0805bfe71 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:26 +0800 Subject: [PATCH 0369/2177] net: hns3: Add support for port shaper setting in TM module This patch add a tm_port_shaper cmd and set port shaper to HCLGE_ETHER_MAX_RATE on TM initialization process. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 32 +++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 +++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 0b4b5d9b0798..f79cebd7c95b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -301,6 +301,34 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, return hclge_cmd_send(&hdev->hw, &desc, 1); } +static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) +{ + struct hclge_port_shapping_cmd *shap_cfg_cmd; + struct hclge_desc desc; + u32 shapping_para = 0; + u8 ir_u, ir_b, ir_s; + int ret; + + ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE, + HCLGE_SHAPER_LVL_PORT, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false); + shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data; + + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF); + hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF); + + shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para); + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pri_id, u8 ir_b, u8 ir_u, u8 ir_s, @@ -864,6 +892,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_tm_port_shaper_cfg(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_shaper_cfg(hdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 8ecd83c50f47..19a01e41c8b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -99,6 +99,10 @@ struct hclge_pfc_en_cmd { u8 pri_en_bitmap; }; +struct hclge_port_shapping_cmd { + __le32 port_shapping_para; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) From cc9bb43ab394f14096a55ee6101af0e804c05f0f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:27 +0800 Subject: [PATCH 0370/2177] net: hns3: Add tc-based TM support for sriov enabled port When sriov is enabled and TM is in tc-based mode, vf's TM parameters is not set in TM initialization process. This patch add the tc_based TM support for sriov enabled using the information in vport struct. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index f79cebd7c95b..ea94d23a79f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -388,13 +388,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id) return hclge_cmd_send(&hdev->hw, &desc, 1); } -static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id) +static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode) { struct hclge_desc desc; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false); - if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR) + if (mode == HCLGE_SCH_MODE_DWRR) desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK); else desc.data[1] = 0; @@ -638,17 +638,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u32 i; + u32 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { /* Cfg qs -> pri mapping, one by one mapping */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i); - if (ret) - return ret; - } + for (k = 0; k < hdev->num_alloc_vport; k++) + for (i = 0; i < hdev->tm_info.num_tc; i++) { + ret = hclge_tm_qs_to_pri_map_cfg( + hdev, vport[k].qs_offset + i, i); + if (ret) + return ret; + } } else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) { - int k; /* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */ for (k = 0; k < hdev->num_alloc_vport; k++) for (i = 0; i < HNAE3_MAX_TC; i++) { @@ -797,10 +798,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev) static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) { + struct hclge_vport *vport = hdev->vport; struct hclge_pg_info *pg_info; u8 dwrr; int ret; - u32 i; + u32 i, k; for (i = 0; i < hdev->tm_info.num_tc; i++) { pg_info = @@ -811,9 +813,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_weight_cfg( + hdev, vport[k].qs_offset + i, + vport[k].dwrr); + if (ret) + return ret; + } } return 0; @@ -944,7 +950,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport) return ret; for (i = 0; i < kinfo->num_tc; i++) { - ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i); + u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode; + + ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i, + sch_mode); if (ret) return ret; } @@ -956,7 +965,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u8 i; + u8 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { for (i = 0; i < hdev->tm_info.num_tc; i++) { @@ -964,9 +973,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_schd_mode_cfg(hdev, i); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_schd_mode_cfg( + hdev, vport[k].qs_offset + i, + HCLGE_SCH_MODE_DWRR); + if (ret) + return ret; + } } } else { for (i = 0; i < hdev->num_alloc_vport; i++) { From 77f255c1c695c72acb1d1c47d30323a273774ae6 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:28 +0800 Subject: [PATCH 0371/2177] net: hns3: Add some interface for the support of DCB feature This patch add some interface and export some interface from hclge_tm and hclgc_main to support the upcoming DCB feature. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 3 +- .../hisilicon/hns3/hns3pf/hclge_main.h | 3 ++ .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 48 +++++++++++++++++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 6 +++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 61632feb8c4e..644f7ff54081 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -30,7 +30,6 @@ #define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f)) #define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f)) -static int hclge_rss_init_hw(struct hclge_dev *hdev); static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable); @@ -2655,7 +2654,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle) return hdev->rss_size_max; } -static int hclge_rss_init_hw(struct hclge_dev *hdev) +int hclge_rss_init_hw(struct hclge_dev *hdev) { const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ; struct hclge_vport *vport = hdev->vport; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 4fc36f04c971..394b58788065 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -515,4 +515,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue) int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex); int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto); + +int hclge_buffer_alloc(struct hclge_dev *hdev); +int hclge_rss_init_hw(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index ea94d23a79f7..8295684da5ba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -883,10 +883,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_map_cfg(struct hclge_dev *hdev) +int hclge_tm_map_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_up_to_tc_map(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_to_pri_map(hdev); if (ret) return ret; @@ -994,7 +998,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) { int ret; @@ -1092,7 +1096,45 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev) return ret; } - return hclge_up_to_tc_map(hdev); + return 0; +} + +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + u32 i, k; + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + if (prio_tc[i] >= hdev->tm_info.num_tc) + return -EINVAL; + hdev->tm_info.prio_tc[i] = prio_tc[i]; + + for (k = 0; k < hdev->num_alloc_vport; k++) { + kinfo = &vport[k].nic.kinfo; + kinfo->prio_tc[i] = prio_tc[i]; + } + } + return 0; +} + +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) +{ + u8 i, bit_map = 0; + + hdev->tm_info.num_tc = num_tc; + + for (i = 0; i < hdev->tm_info.num_tc; i++) + bit_map |= BIT(i); + + if (!bit_map) { + bit_map = 1; + hdev->tm_info.num_tc = 1; + } + + hdev->hw_tc_map = bit_map; + + hclge_tm_schd_info_init(hdev); } int hclge_tm_init_hw(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 19a01e41c8b0..bf59961918ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -112,4 +112,10 @@ struct hclge_port_shapping_cmd { int hclge_tm_schd_init(struct hclge_dev *hdev); int hclge_pause_setup_hw(struct hclge_dev *hdev); +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc); +int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); +int hclge_tm_map_cfg(struct hclge_dev *hdev); +int hclge_tm_init_hw(struct hclge_dev *hdev); #endif From cacde272dd00496c2c1c36606a56b340cd967603 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:29 +0800 Subject: [PATCH 0372/2177] net: hns3: Add hclge_dcb module for the support of DCB feature The hclge_dcb module calls the interface from hclge_main/tm and provide interface for the dcb netlink interface. This patch also update Makefiles required to build the DCB supported code in HNS3 Ethernet driver and update the existing Kconfig file in the hisilicon folder. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/Kconfig | 9 + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 17 + .../ethernet/hisilicon/hns3/hns3pf/Makefile | 2 + .../hisilicon/hns3/hns3pf/hclge_dcb.c | 304 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++ .../hisilicon/hns3/hns3pf/hclge_main.c | 25 +- .../hisilicon/hns3/hns3pf/hclge_main.h | 3 + 7 files changed, 375 insertions(+), 6 deletions(-) create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 91c7bdb9b43c..9d7cb0387bf7 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -103,4 +103,13 @@ config HNS3_ENET family of SoCs. This module depends upon HNAE3 driver to access the HNAE3 devices and their associated operations. +config HNS3_DCB + bool "Hisilicon HNS3 Data Center Bridge Support" + default n + depends on HNS3 && HNS3_HCLGE && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver. + + If unsure, say N. + endif # NET_VENDOR_HISILICON diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 1a01cadfe5f3..c677530841cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -28,6 +28,7 @@ */ #include +#include #include #include #include @@ -131,6 +132,7 @@ struct hnae3_client_ops { int (*init_instance)(struct hnae3_handle *handle); void (*uninit_instance)(struct hnae3_handle *handle, bool reset); void (*link_status_change)(struct hnae3_handle *handle, bool state); + int (*setup_tc)(struct hnae3_handle *handle, u8 tc); }; #define HNAE3_CLIENT_NAME_LENGTH 16 @@ -363,6 +365,20 @@ struct hnae3_ae_ops { u16 vlan, u8 qos, __be16 proto); }; +struct hnae3_dcb_ops { + /* IEEE 802.1Qaz std */ + int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *); + int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *); + + /* DCBX configuration */ + u8 (*getdcbx)(struct hnae3_handle *); + u8 (*setdcbx)(struct hnae3_handle *, u8); + + int (*map_update)(struct hnae3_handle *); +}; + struct hnae3_ae_algo { const struct hnae3_ae_ops *ops; struct list_head node; @@ -394,6 +410,7 @@ struct hnae3_knic_private_info { u16 num_tqps; /* total number of TQPs in this handle */ struct hnae3_queue **tqp; /* array base of all TQPs in this instance */ + const struct hnae3_dcb_ops *dcb_ops; }; struct hnae3_roce_private_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index 162e8a42acd0..7023dc878086 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -7,5 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o +hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o + obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c new file mode 100644 index 000000000000..1b30a6f966d8 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hclge_main.h" +#include "hclge_tm.h" +#include "hnae3.h" + +#define BW_PERCENT 100 + +static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_SP; + hdev->tm_info.pg_info[0].tc_dwrr[i] = 0; + break; + case IEEE_8021QAZ_TSA_ETS: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_DWRR; + hdev->tm_info.pg_info[0].tc_dwrr[i] = + ets->tc_tx_bw[i]; + break; + default: + /* Hardware only supports SP (strict priority) + * or ETS (enhanced transmission selection) + * algorithms, if we receive some other value + * from dcbnl, then throw an error. + */ + return -EINVAL; + } + } + + return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc); +} + +static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u32 i; + + memset(ets, 0, sizeof(*ets)); + ets->willing = 1; + ets->ets_cap = hdev->tc_max; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + ets->prio_tc[i] = hdev->tm_info.prio_tc[i]; + ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + + if (hdev->tm_info.tc_info[i].tc_sch_mode == + HCLGE_SCH_MODE_SP) + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT; + else + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + } +} + +/* IEEE std */ +static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + hclge_tm_info_to_ieee_ets(hdev, ets); + + return 0; +} + +static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, + u8 *tc, bool *changed) +{ + u32 total_ets_bw = 0; + u8 max_tc = 0; + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (ets->prio_tc[i] >= hdev->tc_max || + i >= hdev->tc_max) + return -EINVAL; + + if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i]) + *changed = true; + + if (ets->prio_tc[i] > max_tc) + max_tc = ets->prio_tc[i]; + + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_SP) + *changed = true; + break; + case IEEE_8021QAZ_TSA_ETS: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_DWRR) + *changed = true; + + total_ets_bw += ets->tc_tx_bw[i]; + break; + default: + return -EINVAL; + } + } + + if (total_ets_bw != BW_PERCENT) + return -EINVAL; + + *tc = max_tc + 1; + if (*tc != hdev->tm_info.num_tc) + *changed = true; + + return 0; +} + +static int hclge_map_update(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_tm_map_cfg(hdev); + if (ret) + return ret; + + ret = hclge_tm_schd_mode_hw(hdev); + if (ret) + return ret; + + ret = hclge_pause_setup_hw(hdev); + if (ret) + return ret; + + ret = hclge_buffer_alloc(hdev); + if (ret) + return ret; + + return hclge_rss_init_hw(hdev); +} + +static int hclge_client_setup_tc(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_client *client; + struct hnae3_handle *handle; + int ret; + u32 i; + + for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + handle = &vport[i].nic; + client = handle->client; + + if (!client || !client->ops || !client->ops->setup_tc) + continue; + + ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + bool map_changed = false; + u8 num_tc = 0; + int ret; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); + if (ret) + return ret; + + hclge_tm_schd_info_update(hdev, num_tc); + + ret = hclge_ieee_ets_to_tm_info(hdev, ets); + if (ret) + return ret; + + if (map_changed) { + ret = hclge_client_setup_tc(hdev); + if (ret) + return ret; + } + + return hclge_tm_dwrr_cfg(hdev); +} + +static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + memset(pfc, 0, sizeof(*pfc)); + pfc->pfc_cap = hdev->pfc_max; + prio_tc = hdev->tm_info.prio_tc; + pfc_map = hdev->tm_info.hw_pfc_map; + + /* Pfc setting is based on TC */ + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc_map & BIT(i))) + pfc->pfc_en |= BIT(j); + } + } + + return 0; +} + +static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + prio_tc = hdev->tm_info.prio_tc; + pfc_map = 0; + + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) { + pfc_map |= BIT(i); + break; + } + } + } + + if (pfc_map == hdev->tm_info.hw_pfc_map) + return 0; + + hdev->tm_info.hw_pfc_map = pfc_map; + + return hclge_pause_setup_hw(hdev); +} + +/* DCBX configuration */ +static u8 hclge_getdcbx(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + return hdev->dcbx_cap; +} + +static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + /* No support for LLD_MANAGED modes or CEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + (mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_HOST)) + return 1; + + hdev->dcbx_cap = mode; + + return 0; +} + +static const struct hnae3_dcb_ops hns3_dcb_ops = { + .ieee_getets = hclge_ieee_getets, + .ieee_setets = hclge_ieee_setets, + .ieee_getpfc = hclge_ieee_getpfc, + .ieee_setpfc = hclge_ieee_setpfc, + .getdcbx = hclge_getdcbx, + .setdcbx = hclge_setdcbx, + .map_update = hclge_map_update, +}; + +void hclge_dcb_ops_set(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + + /* Hdev does not support DCB or vport is + * not a pf, then dcb_ops is not set. + */ + if (!hnae3_dev_dcb_supported(hdev) || + vport->vport_id != 0) + return; + + kinfo = &vport->nic.kinfo; + kinfo->dcb_ops = &hns3_dcb_ops; + hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h new file mode 100644 index 000000000000..7d808ee96694 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016~2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __HCLGE_DCB_H__ +#define __HCLGE_DCB_H__ + +#include "hclge_main.h" + +#ifdef CONFIG_HNS3_DCB +void hclge_dcb_ops_set(struct hclge_dev *hdev); +#else +static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {} +#endif + +#endif /* __HCLGE_DCB_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 644f7ff54081..dd220eab7f53 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -19,6 +19,7 @@ #include #include "hclge_cmd.h" +#include "hclge_dcb.h" #include "hclge_main.h" #include "hclge_mdio.h" #include "hclge_tm.h" @@ -1057,7 +1058,7 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->hw.mac.phy_addr = cfg.phy_addr; hdev->num_desc = cfg.tqp_desc_num; hdev->tm_info.num_pg = 1; - hdev->tm_info.num_tc = cfg.tc_num; + hdev->tc_max = cfg.tc_num; hdev->tm_info.hw_pfc_map = 0; ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); @@ -1066,15 +1067,25 @@ static int hclge_configure(struct hclge_dev *hdev) return ret; } - if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) || - (hdev->tm_info.num_tc < 1)) { + if ((hdev->tc_max > HNAE3_MAX_TC) || + (hdev->tc_max < 1)) { dev_warn(&hdev->pdev->dev, "TC num = %d.\n", - hdev->tm_info.num_tc); - hdev->tm_info.num_tc = 1; + hdev->tc_max); + hdev->tc_max = 1; } + /* Dev does not support DCB */ + if (!hnae3_dev_dcb_supported(hdev)) { + hdev->tc_max = 1; + hdev->pfc_max = 0; + } else { + hdev->pfc_max = hdev->tc_max; + } + + hdev->tm_info.num_tc = hdev->tc_max; + /* Currently not support uncontiuous tc */ - for (i = 0; i < cfg.tc_num; i++) + for (i = 0; i < hdev->tm_info.num_tc; i++) hnae_set_bit(hdev->hw_tc_map, i, 1); if (!hdev->num_vmdq_vport && !hdev->num_req_vfs) @@ -4238,6 +4249,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + hclge_dcb_ops_set(hdev); + setup_timer(&hdev->service_timer, hclge_service_timer, (unsigned long)hdev); INIT_WORK(&hdev->service_task, hclge_service_task); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 394b58788065..7c66c00e8a3e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -421,8 +421,11 @@ struct hclge_dev { #define HCLGE_FLAG_TC_BASE_SCH_MODE 1 #define HCLGE_FLAG_VNET_BASE_SCH_MODE 2 u8 tx_sch_mode; + u8 tc_max; + u8 pfc_max; u8 default_up; + u8 dcbx_cap; struct hclge_tm_info tm_info; u16 num_msi; From 986743dbf0a70211bba594b5abee33b6661feaa9 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:30 +0800 Subject: [PATCH 0373/2177] net: hns3: Add dcb netlink interface for the support of DCB feature This patch add dcb netlink interface by calling the interface from hclge_dcb module. This patch also update Makefile in order to build hns3_dcbnl module. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/Makefile | 2 + .../hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hns3_enet.c | 2 + .../hisilicon/hns3/hns3pf/hns3_enet.h | 7 ++ 4 files changed, 117 insertions(+) create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index 7023dc878086..d2b20d01a58c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -11,3 +11,5 @@ hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o + +hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c new file mode 100644 index 000000000000..9832172bfb08 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hnae3.h" +#include "hns3_enet.h" + +static +int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_getets) + return h->kinfo.dcb_ops->ieee_getets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_setets) + return h->kinfo.dcb_ops->ieee_setets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_getpfc) + return h->kinfo.dcb_ops->ieee_getpfc(h, pfc); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_setpfc) + return h->kinfo.dcb_ops->ieee_setpfc(h, pfc); + + return -EOPNOTSUPP; +} + +/* DCBX configuration */ +static u8 hns3_dcbnl_getdcbx(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->getdcbx) + return h->kinfo.dcb_ops->getdcbx(h); + + return 0; +} + +/* return 0 if successful, otherwise fail */ +static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->setdcbx) + return h->kinfo.dcb_ops->setdcbx(h, mode); + + return 1; +} + +static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = { + .ieee_getets = hns3_dcbnl_ieee_getets, + .ieee_setets = hns3_dcbnl_ieee_setets, + .ieee_getpfc = hns3_dcbnl_ieee_getpfc, + .ieee_setpfc = hns3_dcbnl_ieee_setpfc, + .getdcbx = hns3_dcbnl_getdcbx, + .setdcbx = hns3_dcbnl_setdcbx, +}; + +/* hclge_dcbnl_setup - DCBNL setup + * @handle: the corresponding vport handle + * Set up DCBNL + */ +void hns3_dcbnl_setup(struct hnae3_handle *handle) +{ + struct net_device *dev = handle->kinfo.netdev; + + if (!handle->kinfo.dcb_ops) + return; + + dev->dcbnl_ops = &hns3_dcbnl_ops; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 35369e1c8036..11dab26f3543 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2790,6 +2790,8 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_reg_netdev_fail; } + hns3_dcbnl_setup(handle); + /* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */ netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 7e8746189747..481eada73e2d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -590,4 +590,11 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) void hns3_ethtool_set_ops(struct net_device *netdev); int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); + +#ifdef CONFIG_HNS3_DCB +void hns3_dcbnl_setup(struct hnae3_handle *handle); +#else +static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {} +#endif + #endif From 7979a223305016625d211dd051569933c433f81e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:31 +0800 Subject: [PATCH 0374/2177] net: hns3: Setting for fc_mode and dcb enable flag in TM module After the DCB feature is supported, fc_mode and dcb enable flag must be set according to the DCB parameter. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 8295684da5ba..359ee670d1e1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -486,7 +486,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) hdev->tm_info.prio_tc[i] = (i >= hdev->tm_info.num_tc) ? 0 : i; - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + /* DCB is enabled if we have more than 1 TC */ + if (hdev->tm_info.num_tc > 1) + hdev->flag |= HCLGE_FLAG_DCB_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; } static void hclge_tm_pg_info_init(struct hclge_dev *hdev) @@ -512,6 +516,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) } } +static void hclge_pfc_info_init(struct hclge_dev *hdev) +{ + if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) { + if (hdev->fc_mode_last_time == HCLGE_FC_PFC) + dev_warn(&hdev->pdev->dev, + "DCB is disable, but last mode is FC_PFC\n"); + + hdev->tm_info.fc_mode = hdev->fc_mode_last_time; + } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { + /* fc_mode_last_time record the last fc_mode when + * DCB is enabled, so that fc_mode can be set to + * the correct value when DCB is disabled. + */ + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hdev->tm_info.fc_mode = HCLGE_FC_PFC; + } +} + static int hclge_tm_schd_info_init(struct hclge_dev *hdev) { if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) && @@ -524,8 +546,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev) hclge_tm_vport_info_update(hdev); - hdev->tm_info.fc_mode = HCLGE_FC_NONE; - hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hclge_pfc_info_init(hdev); return 0; } @@ -1158,8 +1179,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev) int hclge_tm_schd_init(struct hclge_dev *hdev) { - int ret = hclge_tm_schd_info_init(hdev); + int ret; + /* fc_mode is HCLGE_FC_FULL on reset */ + hdev->tm_info.fc_mode = HCLGE_FC_FULL; + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + + ret = hclge_tm_schd_info_init(hdev); if (ret) return ret; From 9df8f79a4d2957fa3083e8fda0843d8c010351a7 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:32 +0800 Subject: [PATCH 0375/2177] net: hns3: Add DCB support when interacting with network stack When using lldptool to configure DCB parameter, hclge_dcb module call the client_ops->setup_tc to tell network stack which queue and priority is using for specific tc. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_enet.c | 102 +++++++++++++++--- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 11dab26f3543..4a0890f98b70 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -196,6 +196,32 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector) tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW; } +static int hns3_nic_set_real_num_queue(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + struct hnae3_knic_private_info *kinfo = &h->kinfo; + unsigned int queue_size = kinfo->rss_size * kinfo->num_tc; + int ret; + + ret = netif_set_real_num_tx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_tx_queues fail, ret=%d!\n", + ret); + return ret; + } + + ret = netif_set_real_num_rx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); + return ret; + } + + return 0; +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -232,26 +258,13 @@ out_start_err: static int hns3_nic_net_open(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; int ret; netif_carrier_off(netdev); - ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_tx_queues fail, ret=%d!\n", - ret); + ret = hns3_nic_set_real_num_queue(netdev); + if (ret) return ret; - } - - ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); - return ret; - } ret = hns3_nic_net_up(netdev); if (ret) { @@ -2848,10 +2861,69 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup) } } +static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct net_device *ndev = kinfo->netdev; + bool if_running = netif_running(ndev); + int ret; + u8 i; + + if (tc > HNAE3_MAX_TC) + return -EINVAL; + + if (!ndev) + return -ENODEV; + + ret = netdev_set_num_tc(ndev, tc); + if (ret) + return ret; + + if (if_running) { + (void)hns3_nic_net_stop(ndev); + msleep(100); + } + + ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? + kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; + if (ret) + goto err_out; + + if (tc <= 1) { + netdev_reset_tc(ndev); + goto out; + } + + for (i = 0; i < HNAE3_MAX_TC; i++) { + struct hnae3_tc_info *tc_info = &kinfo->tc_info[i]; + + if (tc_info->enable) + netdev_set_tc_queue(ndev, + tc_info->tc, + tc_info->tqp_count, + tc_info->tqp_offset); + } + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + netdev_set_prio_tc_map(ndev, i, + kinfo->prio_tc[i]); + } + +out: + ret = hns3_nic_set_real_num_queue(ndev); + +err_out: + if (if_running) + (void)hns3_nic_net_open(ndev); + + return ret; +} + const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, + .setup_tc = hns3_client_setup_tc, }; /* hns3_init_module - Driver registration routine From 5af48b59f35cf712793badabe1a574a0d0ce3bd3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 27 Sep 2017 16:12:44 +0300 Subject: [PATCH 0376/2177] net: bridge: add per-port group_fwd_mask with less restrictions We need to be able to transparently forward most link-local frames via tunnels (e.g. vxlan, qinq). Currently the bridge's group_fwd_mask has a mask which restricts the forwarding of STP and LACP, but we need to be able to forward these over tunnels and control that forwarding on a per-port basis thus add a new per-port group_fwd_mask option which only disallows mac pause frames to be forwarded (they're always dropped anyway). The patch does not change the current default situation - all of the others are still restricted unless configured for forwarding. We have successfully tested this patch with LACP and STP forwarding over VxLAN and qinq tunnels. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_input.c | 1 + net/bridge/br_netlink.c | 14 +++++++++++++- net/bridge/br_private.h | 10 +++++++++- net/bridge/br_sysfs_if.c | 18 ++++++++++++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8d062c58d5cb..ea87bd708ee9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -325,6 +325,7 @@ enum { IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7637f58c1226..7cb613776b31 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -289,6 +289,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) * * Others reserved for future standardization */ + fwd_mask |= p->group_fwd_mask; switch (dest[5]) { case 0x00: /* Bridge Group Address */ /* If STP is turned off, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3bc890716c89..dea88a255d26 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void) #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + 0; } @@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, p->topology_change_ack) || nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & - BR_VLAN_TUNNEL))) + BR_VLAN_TUNNEL)) || + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask)) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -637,6 +639,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, }; /* Change the state of the port and notify spanning tree */ @@ -773,6 +776,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) return err; } #endif + + if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { + u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]); + + if (fwd_mask & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = fwd_mask; + } + br_port_flags_change(p, old_flags ^ p->flags); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e870cfc85b14..020c709a017f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -36,7 +36,14 @@ /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ -#define BR_GROUPFWD_RESTRICTED 0x0007u +enum { + BR_GROUPFWD_STP = BIT(0), + BR_GROUPFWD_MACPAUSE = BIT(1), + BR_GROUPFWD_LACP = BIT(2), +}; + +#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \ + BR_GROUPFWD_LACP) /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u @@ -268,6 +275,7 @@ struct net_bridge_port { #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif + u16 group_fwd_mask; }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 5d5d413a6cf8..9110d5e56085 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) } static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); +static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%#x\n", p->group_fwd_mask); +} + +static int store_group_fwd_mask(struct net_bridge_port *p, + unsigned long v) +{ + if (v & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = v; + + return 0; +} +static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, + store_group_fwd_mask); + BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); @@ -223,6 +240,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_proxyarp_wifi, &brport_attr_multicast_flood, &brport_attr_broadcast_flood, + &brport_attr_group_fwd_mask, NULL }; From cf5d74b85ef40c202c76d90959db4d850f301b95 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Wed, 27 Sep 2017 18:30:58 +0200 Subject: [PATCH 0377/2177] tcp: fix under-evaluated ssthresh in TCP Vegas With the commit 76174004a0f19785 (tcp: do not slow start when cwnd equals ssthresh), the comparison to the reduced cwnd in tcp_vegas_ssthresh() would under-evaluate the ssthresh. Signed-off-by: Hoang Tran Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 218cfcc77650..ee113ff15fd0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd-1); + return min(tp->snd_ssthresh, tp->snd_cwnd); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) From cb4d2b3f03d8eed90be3a194e5b54b734ec4bbe9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:52 -0700 Subject: [PATCH 0378/2177] bpf: Add name, load_time, uid and map_ids to bpf_prog_info The patch adds name and load_time to struct bpf_prog_aux. They are also exported to bpf_prog_info. The bpf_prog's name is passed by userspace during BPF_PROG_LOAD. The kernel only stores the first (BPF_PROG_NAME_LEN - 1) bytes and the name stored in the kernel is always \0 terminated. The kernel will reject name that contains characters other than isalnum() and '_'. It will also reject name that is not null terminated. The existing 'user->uid' of the bpf_prog_aux is also exported to the bpf_prog_info as created_by_uid. The existing 'used_maps' of the bpf_prog_aux is exported to the newly added members 'nr_map_ids' and 'map_ids' of the bpf_prog_info. On the input, nr_map_ids tells how big the userspace's map_ids buffer is. On the output, nr_map_ids tells the exact user_map_cnt and it will only copy up to the userspace's map_ids buffer is allowed. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 8 +++++++ kernel/bpf/syscall.c | 51 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b672c50f160..33ccc474fb04 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -187,6 +187,8 @@ struct bpf_prog_aux { struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; + u64 load_time; /* ns since boottime */ + u8 name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e43491ac4823..bd6348269bf5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -210,6 +212,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +815,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 25d074920a00..45970df3f820 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -312,6 +315,30 @@ int bpf_map_new_fd(struct bpf_map *map) offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ sizeof(attr->CMD##_LAST_FIELD)) != NULL +/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. + * Return 0 on success and < 0 on error. + */ +static int bpf_obj_name_cpy(char *dst, const char *src) +{ + const char *end = src + BPF_OBJ_NAME_LEN; + + /* Copy all isalnum() and '_' char */ + while (src < end && *src) { + if (!isalnum(*src) && *src != '_') + return -EINVAL; + *dst++ = *src++; + } + + /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ + if (src == end) + return -EINVAL; + + /* '\0' terminates dst */ + *dst = 0; + + return 0; +} + #define BPF_MAP_CREATE_LAST_FIELD numa_node /* called via syscall */ static int map_create(union bpf_attr *attr) @@ -973,7 +1000,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_flags +#define BPF_PROG_LOAD_LAST_FIELD prog_name static int bpf_prog_load(union bpf_attr *attr) { @@ -1037,6 +1064,11 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_prog; + prog->aux->load_time = ktime_get_boot_ns(); + err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); + if (err) + goto free_prog; + /* run eBPF verifier */ err = bpf_check(&prog, attr); if (err < 0) @@ -1358,8 +1390,25 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.type = prog->type; info.id = prog->aux->id; + info.load_time = prog->aux->load_time; + info.created_by_uid = from_kuid_munged(current_user_ns(), + prog->aux->user->uid); memcpy(info.tag, prog->tag, sizeof(prog->tag)); + memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); + + ulen = info.nr_map_ids; + info.nr_map_ids = prog->aux->used_map_cnt; + ulen = min_t(u32, info.nr_map_ids, ulen); + if (ulen) { + u32 *user_map_ids = (u32 *)info.map_ids; + u32 i; + + for (i = 0; i < ulen; i++) + if (put_user(prog->aux->used_maps[i]->id, + &user_map_ids[i])) + return -EFAULT; + } if (!capable(CAP_SYS_ADMIN)) { info.jited_prog_len = 0; From ad5b177bd73f5107d97c36f56395c4281fb6f089 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:53 -0700 Subject: [PATCH 0379/2177] bpf: Add map_name to bpf_map_info This patch allows userspace to specify a name for a map during BPF_MAP_CREATE. The map's name can later be exported to user space via BPF_OBJ_GET_INFO_BY_FD. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 7 ++++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 33ccc474fb04..252f4bc9eb25 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,6 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; + u8 name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd6348269bf5..6d2137b4cf38 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -190,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -829,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 45970df3f820..11a7f82a55d1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -339,7 +339,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD numa_node +#define BPF_MAP_CREATE_LAST_FIELD map_name /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -361,6 +361,10 @@ static int map_create(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + err = bpf_obj_name_cpy(map->name, attr->map_name); + if (err) + goto free_map_nouncharge; + atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); @@ -1462,6 +1466,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + memcpy(info.name, map->name, sizeof(map->name)); if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) From 88cda1c9da02c8aa31e1d5dcf22e8a35cc8c19f2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:54 -0700 Subject: [PATCH 0380/2177] bpf: libbpf: Provide basic API support to specify BPF obj name This patch extends the libbpf to provide API support to allow specifying BPF object name. In tools/lib/bpf/libbpf, the C symbol of the function and the map is used. Regarding section name, all maps are under the same section named "maps". Hence, section name is not a good choice for map's name. To be consistent with map, bpf_prog also follows and uses its function symbol as the prog's name. This patch adds logic to collect function's symbols in libbpf. There is existing codes to collect the map's symbols and no change is needed. The bpf_load_program_name() and bpf_map_create_name() are added to take the name argument. For the other bpf_map_create_xxx() variants, a name argument is directly added to them. In samples/bpf, bpf_load.c in particular, the symbol is also used as the map's name and the map symbols has already been collected in the existing code. For bpf_prog, bpf_load.c does not collect the function symbol name. We can consider to collect them later if there is a need to continue supporting the bpf_load.c. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/bpf_load.c | 2 + samples/bpf/map_perf_test_user.c | 1 + tools/include/uapi/linux/bpf.h | 10 ++ tools/lib/bpf/bpf.c | 57 +++++++--- tools/lib/bpf/bpf.h | 23 +++-- tools/lib/bpf/libbpf.c | 109 +++++++++++++++----- tools/testing/selftests/bpf/test_verifier.c | 2 +- 7 files changed, 157 insertions(+), 47 deletions(-) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 6aa50098dfb8..18b1c8dd0391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -221,6 +221,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, @@ -228,6 +229,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, numa_node); } else { map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, maps[i].def.value_size, maps[i].def.max_entries, diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index a0310fc70057..519d9af4b04a 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu) inner_lru_map_fds[cpu] = bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], sizeof(uint32_t), sizeof(long), inner_lru_hash_size, 0, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e43491ac4823..6d2137b4cf38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -188,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -210,6 +213,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +816,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -821,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1d6907d379c9..daf624e4c720 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # endif #endif +#define min(x, y) ((x) < (y) ? (x) : (y)) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -57,10 +59,11 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node) +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -70,6 +73,8 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -81,14 +86,23 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, key_size, value_size, + return bpf_create_map_node(map_type, NULL, key_size, value_size, max_entries, map_flags, -1); } -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_node(map_type, name, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -99,6 +113,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -107,19 +123,24 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, - max_entries, map_flags, -1); + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); } -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz) +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) { int fd; union bpf_attr attr; + __u32 name_len = name ? strlen(name) : 0; bzero(&attr, sizeof(attr)); attr.prog_type = type; @@ -130,6 +151,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_size = 0; attr.log_level = 0; attr.kern_version = kern_version; + memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (fd >= 0 || !log_buf || !log_buf_sz) @@ -143,6 +165,15 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + return bpf_load_program_name(type, NULL, insns, insns_cnt, license, + kern_version, log_buf, log_buf_sz); +} + int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b8ea5843c39e..118d00535a0d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,19 +24,28 @@ #include #include -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node); +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags); +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 35f6dfcdc565..4f402dcdf372 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -171,6 +171,7 @@ int libbpf_strerror(int err, char *buf, size_t size) struct bpf_program { /* Index in elf obj file, for relocation use. */ int idx; + char *name; char *section_name; struct bpf_insn *insns; size_t insns_cnt; @@ -283,6 +284,7 @@ static void bpf_program__exit(struct bpf_program *prog) prog->clear_priv = NULL; bpf_program__unload(prog); + zfree(&prog->name); zfree(&prog->section_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -293,26 +295,27 @@ static void bpf_program__exit(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, char *name, int idx, - struct bpf_program *prog) +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) { if (size < sizeof(struct bpf_insn)) { - pr_warning("corrupted section '%s'\n", name); + pr_warning("corrupted section '%s'\n", section_name); return -EINVAL; } bzero(prog, sizeof(*prog)); - prog->section_name = strdup(name); + prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog %s\n", - name); + pr_warning("failed to alloc name for prog under section %s\n", + section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for %s\n", name); + pr_warning("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / sizeof(struct bpf_insn); @@ -331,12 +334,12 @@ errout: static int bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *name, int idx) + char *section_name, int idx) { struct bpf_program prog, *progs; int nr_progs, err; - err = bpf_program__init(data, size, name, idx, &prog); + err = bpf_program__init(data, size, section_name, idx, &prog); if (err) return err; @@ -350,8 +353,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program '%s'\n", - name); + pr_warning("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -364,6 +367,54 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, return 0; } +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warning("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name) { + pr_warning("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warning("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, void *obj_buf, size_t obj_buf_sz) @@ -766,8 +817,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (obj->efile.maps_shndx >= 0) + if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj); + if (err) + goto out; + } + err = bpf_object__init_prog_names(obj); out: return err; } @@ -870,11 +925,12 @@ bpf_object__create_maps(struct bpf_object *obj) struct bpf_map_def *def = &obj->maps[i].def; int *pfd = &obj->maps[i].fd; - *pfd = bpf_create_map(def->type, - def->key_size, - def->value_size, - def->max_entries, - 0); + *pfd = bpf_create_map_name(def->type, + obj->maps[i].name, + def->key_size, + def->value_size, + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; @@ -982,7 +1038,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, struct bpf_insn *insns, +load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, int insns_cnt, char *license, u32 kern_version, int *pfd) { int ret; @@ -995,8 +1051,8 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program(type, insns, insns_cnt, license, - kern_version, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_name(type, name, insns, insns_cnt, license, + kern_version, log_buf, BPF_LOG_BUF_SIZE); if (ret >= 0) { *pfd = ret; @@ -1021,9 +1077,9 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (type != BPF_PROG_TYPE_KPROBE) { int fd; - fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, - insns_cnt, license, kern_version, - NULL, 0); + fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, + insns, insns_cnt, license, + kern_version, NULL, 0); if (fd >= 0) { close(fd); ret = -LIBBPF_ERRNO__PROGTYPE; @@ -1067,8 +1123,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + err = load_program(prog->type, prog->name, prog->insns, + prog->insns_cnt, license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1096,7 +1152,8 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, result.new_insn_ptr, + err = load_program(prog->type, prog->name, + result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a0426147523d..290d5056c165 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6939,7 +6939,7 @@ static int create_map_in_map(void) return inner_map_fd; } - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); if (outer_map_fd < 0) printf("Failed to create array of maps '%s'!\n", From 6e525d06674a21468b4e728ef880770a0ca5c60d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:55 -0700 Subject: [PATCH 0381/2177] bpf: Swap the order of checking prog_info and map_info This patch swaps the checking order. It now checks the map_info first and then prog_info. It is a prep work for adding test to the newly added fields (the map_ids of prog_info field in particular). Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_progs.c | 58 ++++++++++++------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 11ee25cea227..31ae27dc8d04 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -316,6 +316,36 @@ static void test_bpf_obj_id(void) error_cnt++; assert(!err); + /* Insert a magic value to the map */ + map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); + assert(map_fds[i] >= 0); + err = bpf_map_update_elem(map_fds[i], &array_key, + &array_magic_value, 0); + assert(!err); + + /* Check getting map info */ + info_len = sizeof(struct bpf_map_info) * 2; + bzero(&map_infos[i], info_len); + err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + &info_len); + if (CHECK(err || + map_infos[i].type != BPF_MAP_TYPE_ARRAY || + map_infos[i].key_size != sizeof(__u32) || + map_infos[i].value_size != sizeof(__u64) || + map_infos[i].max_entries != 1 || + map_infos[i].map_flags != 0 || + info_len != sizeof(struct bpf_map_info), + "get-map-info(fd)", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + err, errno, + map_infos[i].type, BPF_MAP_TYPE_ARRAY, + info_len, sizeof(struct bpf_map_info), + map_infos[i].key_size, + map_infos[i].value_size, + map_infos[i].max_entries, + map_infos[i].map_flags)) + goto done; + /* Check getting prog info */ info_len = sizeof(struct bpf_prog_info) * 2; bzero(&prog_infos[i], info_len); @@ -347,34 +377,6 @@ static void test_bpf_obj_id(void) !!memcmp(xlated_insns, zeros, sizeof(zeros)))) goto done; - map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - assert(map_fds[i] >= 0); - err = bpf_map_update_elem(map_fds[i], &array_key, - &array_magic_value, 0); - assert(!err); - - /* Check getting map info */ - info_len = sizeof(struct bpf_map_info) * 2; - bzero(&map_infos[i], info_len); - err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], - &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags)) - goto done; } /* Check bpf_prog_get_next_id() */ From 3a8ad560a9674235d1dd2e174434f540924e249f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:56 -0700 Subject: [PATCH 0382/2177] bpf: Test new fields in bpf_attr and bpf_{prog, map}_info This patch tests newly added fields of the bpf_attr, bpf_prog_info and bpf_map_info. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_progs.c | 143 +++++++++++++++++++++-- 1 file changed, 132 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 31ae27dc8d04..69427531408d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include typedef __u16 __sum16; @@ -19,6 +20,8 @@ typedef __u16 __sum16; #include #include #include +#include +#include #include #include @@ -273,16 +276,26 @@ static void test_bpf_obj_id(void) const int nr_iters = 2; const char *file = "./test_obj_id.o"; const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + const char *expected_prog_name = "test_obj_id"; + const char *expected_map_name = "test_map_id"; + const __u64 nsec_per_sec = 1000000000; struct bpf_object *objs[nr_iters]; int prog_fds[nr_iters], map_fds[nr_iters]; /* +1 to test for the info_len returned by kernel */ struct bpf_prog_info prog_infos[nr_iters + 1]; struct bpf_map_info map_infos[nr_iters + 1]; + /* Each prog only uses one map. +1 to test nr_map_ids + * returned by kernel. + */ + __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; + struct timespec real_time_ts, boot_time_ts; int sysctl_fd, jit_enabled = 0, err = 0; __u64 array_value; + uid_t my_uid = getuid(); + time_t now, load_time; sysctl_fd = open(jit_sysctl, 0, O_RDONLY); if (sysctl_fd != -1) { @@ -307,6 +320,7 @@ static void test_bpf_obj_id(void) /* Check bpf_obj_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { + now = time(NULL); err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail @@ -334,16 +348,18 @@ static void test_bpf_obj_id(void) map_infos[i].value_size != sizeof(__u64) || map_infos[i].max_entries != 1 || map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info), + info_len != sizeof(struct bpf_map_info) || + strcmp((char *)map_infos[i].name, expected_map_name), "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", err, errno, map_infos[i].type, BPF_MAP_TYPE_ARRAY, info_len, sizeof(struct bpf_map_info), map_infos[i].key_size, map_infos[i].value_size, map_infos[i].max_entries, - map_infos[i].map_flags)) + map_infos[i].map_flags, + map_infos[i].name, expected_map_name)) goto done; /* Check getting prog info */ @@ -355,8 +371,16 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len = sizeof(jited_insns); prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); prog_infos[i].xlated_prog_len = sizeof(xlated_insns); + prog_infos[i].map_ids = ptr_to_u64(map_ids + i); + prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); + assert(!err); + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); + assert(!err); err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); + load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + + (prog_infos[i].load_time / nsec_per_sec); if (CHECK(err || prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || @@ -364,9 +388,14 @@ static void test_bpf_obj_id(void) (jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))) || !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)), + !memcmp(xlated_insns, zeros, sizeof(zeros)) || + load_time < now - 60 || load_time > now + 60 || + prog_infos[i].created_by_uid != my_uid || + prog_infos[i].nr_map_ids != 1 || + *(int *)prog_infos[i].map_ids != map_infos[i].id || + strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -374,9 +403,13 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len, prog_infos[i].xlated_prog_len, !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)))) + !!memcmp(xlated_insns, zeros, sizeof(zeros)), + load_time, now, + prog_infos[i].created_by_uid, my_uid, + prog_infos[i].nr_map_ids, 1, + *(int *)prog_infos[i].map_ids, map_infos[i].id, + prog_infos[i].name, expected_prog_name)) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -384,6 +417,7 @@ static void test_bpf_obj_id(void) next_id = 0; while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; + __u32 saved_map_id; int prog_fd; info_len = sizeof(prog_info); @@ -406,16 +440,33 @@ static void test_bpf_obj_id(void) nr_id_found++; + /* Negative test: + * prog_info.nr_map_ids = 1 + * prog_info.map_ids = NULL + */ + prog_info.nr_map_ids = 1; + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (CHECK(!err || errno != EFAULT, + "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", + err, errno, EFAULT)) + break; + bzero(&prog_info, sizeof(prog_info)); + info_len = sizeof(prog_info); + + saved_map_id = *(int *)(prog_infos[i].map_ids); + prog_info.map_ids = prog_infos[i].map_ids; + prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len), + memcmp(&prog_info, &prog_infos[i], info_len) || + *(int *)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d\n", + "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len)); - + memcmp(&prog_info, &prog_infos[i], info_len), + *(int *)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -497,6 +548,75 @@ static void test_pkt_md_access(void) bpf_object__close(obj); } +static void test_obj_name(void) +{ + struct { + const char *name; + int success; + int expected_errno; + } tests[] = { + { "", 1, 0 }, + { "_123456789ABCDE", 1, 0 }, + { "_123456789ABCDEF", 0, EINVAL }, + { "_123456789ABCD\n", 0, EINVAL }, + }; + struct bpf_insn prog[] = { + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 duration = 0; + int i; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + size_t name_len = strlen(tests[i].name) + 1; + union bpf_attr attr; + size_t ncopy; + int fd; + + /* test different attr.prog_name during BPF_PROG_LOAD */ + ncopy = name_len < sizeof(attr.prog_name) ? + name_len : sizeof(attr.prog_name); + bzero(&attr, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.insn_cnt = 2; + attr.insns = ptr_to_u64(prog); + attr.license = ptr_to_u64(""); + memcpy(attr.prog_name, tests[i].name, ncopy); + + fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-prog-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + + /* test different attr.map_name during BPF_MAP_CREATE */ + ncopy = name_len < sizeof(attr.map_name) ? + name_len : sizeof(attr.map_name); + bzero(&attr, sizeof(attr)); + attr.map_type = BPF_MAP_TYPE_ARRAY; + attr.key_size = 4; + attr.value_size = 4; + attr.max_entries = 1; + attr.map_flags = 0; + memcpy(attr.map_name, tests[i].name, ncopy); + fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-map-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + } +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -509,6 +629,7 @@ int main(void) test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); + test_obj_name(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; From c7c3e5913bf18eda3cf38932bebdce48351baac9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Sep 2017 19:08:00 -0700 Subject: [PATCH 0383/2177] net: ipv4: remove fib_weight fib_weight in fib_info is set but not used. Remove it and the helpers for setting it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 --- net/ipv4/fib_semantics.c | 9 --------- 2 files changed, 12 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1a7f7e424320..f80524396c06 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -122,9 +122,6 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_weight; -#endif struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 57a5d48acee8..be0874620ecc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi) atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); } endfor_nexthops(fi); } - -static inline void fib_add_weight(struct fib_info *fi, - const struct fib_nh *nh) -{ - fi->fib_weight += nh->nh_weight; -} - #else /* CONFIG_IP_ROUTE_MULTIPATH */ #define fib_rebalance(fi) do { } while (0) -#define fib_add_weight(fi, nh) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); - fib_add_weight(fi, nexthop_nh); } endfor_nexthops(fi) fib_rebalance(fi); From fa8fefaa678ea390b873195d19c09930da84a4bb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Sep 2017 20:41:59 -0700 Subject: [PATCH 0384/2177] net: ipv4: remove fib_info arg to fib_check_nh fib_check_nh does not use the fib_info arg; remove t. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index be0874620ecc..467b3c15395f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -766,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) * | * |-> {local prefix} (terminal node) */ -static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh, struct netlink_ext_ack *extack) +static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, + struct netlink_ext_ack *extack) { int err = 0; struct net *net; @@ -1250,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, int linkdown = 0; change_nexthops(fi) { - err = fib_check_nh(cfg, fi, nexthop_nh, extack); + err = fib_check_nh(cfg, nexthop_nh, extack); if (err != 0) goto failure; if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) From 2b634bb0686e43a6338fe779fbabd72b6b928fdc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:10:14 -0400 Subject: [PATCH 0385/2177] i40e/i40evf: rename bytes_per_int to bytes_per_usec This value is not calculating bytes_per_int, which would actually just be bytes/ITR_COUNTDOWN_START, but rather it's calculating bytes/usecs. Rename the variable for clarity so that future developers understand what the value is actually calculating. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++++++------ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f426762bd83a..d9fdf69bbc6e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -960,14 +960,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -993,18 +993,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index c32c62462c84..37e1de886d48 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -358,14 +358,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -391,18 +391,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } From 16badf758b25bd00528246ab9af938296b9d368d Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:15 -0400 Subject: [PATCH 0386/2177] i40e: Fix unqualified module message while bringing link up In current driver, when ifconfig ethx up is done, the link state doesn't transition to UP inside i40e_open(). It changes after AQ command response is handled in i40e_handle_link_event(). When pf->hw.phy.link_info.link_info is DOWN inside i40e_open(), The state is transient and invalid. So log message gets printed based on incorrect info (i.e link_info and an_info). This commit removes check for unqualified module inside i40e_up_complete(). The existing check in i40e_handle_link_event() logs the error message based on correct link state information. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6498da8806cb..b235a27232a8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5470,15 +5470,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi) i40e_print_link_message(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - } else if (vsi->netdev) { - i40e_print_link_message(vsi, false); - /* need to check for qualified module here*/ - if ((pf->hw.phy.link_info.link_info & - I40E_AQ_MEDIA_AVAILABLE) && - (!(pf->hw.phy.link_info.an_info & - I40E_AQ_QUALIFIED_MODULE))) - netdev_err(vsi->netdev, - "the driver failed to link because an unqualified module was detected."); } /* replay FDIR SB filters */ From 9a03449d3ea0f6b497ff3a3bf6203a5e72c7e6be Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:16 -0400 Subject: [PATCH 0387/2177] i40e: Fix link down message when interface is brought up i40e_print_link_message() is intended to compare new link state with current link state and print log message only if the new state is different from current state. However in current driver the new state does not get updated when link is going down because of the if condition. When an interface is brought down, vsi->state is set to I40E_VSI_DOWN in i40e_vsi_close() and later i40e_print_link_message() does not get invoked in i40e_link_event due to if condition. Hence link down message doesn't appear when link is going down. The down state is seen later during i40e_open() and old state gets printed. The actual link state doesn't get updated in i40e_close() or i40e_open() but when i40e_handle_link_event is called inside i40e_clean_adminq_subtask. This change allows i40e_print_link_message() to be called when interface is going down and keeps the state information updated. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b235a27232a8..2e8fe6186b38 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6420,8 +6420,7 @@ static void i40e_link_event(struct i40e_pf *pf) new_link == netif_carrier_ok(vsi->netdev))) return; - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - i40e_print_link_message(vsi, new_link); + i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. From 3fded4663b07f8fa99b9424ca3d5c46b79f6b27e Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:18 -0400 Subject: [PATCH 0388/2177] i40e: simplify member variable accesses This commit replaces usage of vsi->back in i40e_print_link_message() (which is actually a PF pointer) with temp variable. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2e8fe6186b38..3c650917b54f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5346,13 +5346,14 @@ out: void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { enum i40e_aq_link_speed new_speed; + struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; char *req_fec = ""; char *an = ""; - new_speed = vsi->back->hw.phy.link_info.link_speed; + new_speed = pf->hw.phy.link_info.link_speed; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -5366,13 +5367,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) /* Warn user if link speed on NPAR enabled partition is not at * least 10GB */ - if (vsi->back->hw.func_caps.npar_enable && - (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || - vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) + if (pf->hw.func_caps.npar_enable && + (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || + pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n"); - switch (vsi->back->hw.phy.link_info.link_speed) { + switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: speed = "40 G"; break; @@ -5395,7 +5396,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - switch (vsi->back->hw.fc.current_mode) { + switch (pf->hw.fc.current_mode) { case I40E_FC_FULL: fc = "RX/TX"; break; @@ -5410,18 +5411,18 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { req_fec = ", Requested FEC: None"; fec = ", FEC: None"; an = ", Autoneg: False"; - if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) an = ", Autoneg: True"; - if (vsi->back->hw.phy.link_info.fec_info & + if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) fec = ", FEC: CL74 FC-FEC/BASE-R"; - else if (vsi->back->hw.phy.link_info.fec_info & + else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) fec = ", FEC: CL108 RS-FEC"; From e04ea00217904fc3f6fddac0b74e74e5ac488fda Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Fri, 14 Jul 2017 09:10:19 -0400 Subject: [PATCH 0389/2177] i40e: relax warning message in case of version mismatch Fortville and Fort Park devices are often on different firmware release schedules. This change relaxes the minor version warning message, so it is only displayed for older FW warning version for old firmware Fortville 3 or earlier. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3c650917b54f..a887087d08cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11374,8 +11374,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) dev_info(&pdev->dev, "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); - else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || - hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) + else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); From 0dc8692e914ac49931d69b5217d5fe0171fc026e Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Fri, 14 Jul 2017 09:27:00 -0400 Subject: [PATCH 0390/2177] i40e: fix for flow director counters not wrapping as expected An errata with GLQF_PCNT causes it to not wrap as expected. This can cause an error in flow director statistics. This patch resets affected counters just after reading. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 35 ++++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a887087d08cd..638f5bad0bd7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -599,6 +599,20 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg, *stat = (u32)((new_data + BIT_ULL(32)) - *offset); } +/** + * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat + * @hw: ptr to the hardware info + * @reg: the hw reg to read and clear + * @stat: ptr to the stat + **/ +static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) +{ + u32 new_data = rd32(hw, reg); + + wr32(hw, reg, 1); /* must write a nonzero value to clear register */ + *stat += new_data; +} + /** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated @@ -1040,18 +1054,15 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) &osd->rx_jabber, &nsd->rx_jabber); /* FDIR stats */ - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_match, &nsd->fd_atr_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_sb_match, &nsd->fd_sb_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)), + &nsd->fd_sb_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_tunnel_match); val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = From 905770fa3e6f30b393829ba1c238554e7f238aee Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 14 Jul 2017 09:27:01 -0400 Subject: [PATCH 0391/2177] i40evf: lower message level We see this message regularly on VF reset or unload (which invokes a reset). It's essentially meaningless unless it's happening constantly. To prevent consternation, lower the log level to debug so it's not seen under normal circumstance. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 85876f4fb1fb..2bb0fe00361f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -52,7 +52,7 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter, err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); if (err) - dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", op, i40evf_stat_str(hw, err), i40evf_aq_str(hw, hw->aq.asq_last_status)); return err; From c17401a1dd210a5f22ab1ec7c7366037c158a14c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:02 -0400 Subject: [PATCH 0392/2177] i40e: use separate state bit for miscellaneous IRQ setup We currently (mis)use the __I40E_RECOVERY_PENDING bit to determine when we should actually request a new IRQ in i40e_setup_misc_vector(). This led to a design mistake where we open-coded the re-setup of the miscellaneous vector in i40e_resume() instead of using the function provided. If we did not open-code this and instead tried to use the i40e_setup_misc_vector() function, it would lead to never reallocating the IRQ. This would lead to a second i40e_suspend() call failing to free the vector due to a NULL pointer dereference. A future patch is going to re-work how the i40e_suspend() and i40e_resume() flows work to clear all IRQ vectors, which would require us to use i40e_setup_misc_vector() directly. Since during this time the __I40E_RECOVERY_PENDING bit is set, we'll never re-allocate the vector. Rather than leaving the open-coded setup in i40e_resume() lets just fix the problem properly in i40e_setup_misc_vector(). Introduce a new state bit which indicates when the IRQ has been assigned, which will be set when i40e_setup_misc_vector is first called. This ultimately resolves the issue of re-requesting the vector, without overloading the __I40E_RECOVERY_PENDING state. This ensures that the suspend/resume cycle can use the setup function instead of open-coding the re-request during resume. Additionally, since the only callers of i40e_stop_misc_vector also want to free it, move this code directly into the function to avoid duplication. Due to the new functionality, rename it to i40e_free_misc_vector(). This lets us drop the extra calls to free and re-enable the vector during i40e_suspend() and i40e_resume(). We don't need to call i40e_setup_misc_Vector() in i40e_resume() because it gets called by the i40e_rebuild() call. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 39 ++++++++------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d0c1bf5441d8..b7a539cdca00 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -136,6 +136,7 @@ enum i40e_state_t { __I40E_MDD_EVENT_PENDING, __I40E_VFLR_EVENT_PENDING, __I40E_RESET_RECOVERY_PENDING, + __I40E_MISC_IRQ_REQUESTED, __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 638f5bad0bd7..3ea4f8b942c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3604,14 +3604,20 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) } /** - * i40e_stop_misc_vector - Stop the vector that handles non-queue events + * i40e_free_misc_vector - Free the vector that handles non-queue events * @pf: board private structure **/ -static void i40e_stop_misc_vector(struct i40e_pf *pf) +static void i40e_free_misc_vector(struct i40e_pf *pf) { /* Disable ICR 0 */ wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); + + if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); + } } /** @@ -4466,11 +4472,7 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); @@ -8365,13 +8367,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; int err = 0; - /* Only request the irq if this is the first time through, and - * not when we're rebuilding after a Reset - */ - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Only request the IRQ once, the first time through. */ + if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n", pf->int_name, err); @@ -12069,11 +12070,8 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - i40e_stop_misc_vector(pf); - if (pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + i40e_free_misc_vector(pf); + retval = pci_save_state(pdev); if (retval) return retval; @@ -12113,15 +12111,6 @@ static int i40e_resume(struct pci_dev *pdev) /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { clear_bit(__I40E_DOWN, pf->state); - if (pf->msix_entries) { - err = request_irq(pf->msix_entries[0].vector, - i40e_intr, 0, pf->int_name, pf); - if (err) { - dev_err(&pf->pdev->dev, - "request_irq for %s failed: %d\n", - pf->int_name, err); - } - } i40e_reset_and_rebuild(pf, false, false); } From 0e5d3da400558b7d30586a2cc1afe02276445636 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:03 -0400 Subject: [PATCH 0393/2177] i40e: use newer generic PM support instead of legacy PM callbacks Stop using the old legacy PM support, since we now have stable support for the newer generic PM callbacks. This has several advantages. First, we no longer have to manage our own pci_save_state() and power changes, as it's preferred to have the PCI stack do this. Second, these routines get called for both hibernate and suspend to ram, so we can have the driver properly handle all the suspend/resume flows that it needs to. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 54 +++++++-------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3ea4f8b942c3..c82360437024 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12050,14 +12050,14 @@ static void i40e_shutdown(struct pci_dev *pdev) #ifdef CONFIG_PM /** - * i40e_suspend - PCI callback for moving to D3 - * @pdev: PCI device information struct + * i40e_suspend - PM callback for moving to D3 + * @dev: generic device information structure **/ -static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) +static int i40e_suspend(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - int retval = 0; set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); @@ -12072,41 +12072,17 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) i40e_free_misc_vector(pf); - retval = pci_save_state(pdev); - if (retval) - return retval; - - pci_wake_from_d3(pdev, pf->wol_en); - pci_set_power_state(pdev, PCI_D3hot); - - return retval; + return 0; } /** - * i40e_resume - PCI callback for waking up from D3 - * @pdev: PCI device information struct + * i40e_resume - PM callback for waking up from D3 + * @dev: generic device information structure **/ -static int i40e_resume(struct pci_dev *pdev) +static int i40e_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* pci_restore_state() clears dev->state_saves, so - * call pci_save_state() again to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - /* no wakeup events while running */ - pci_wake_from_d3(pdev, false); /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { @@ -12117,22 +12093,26 @@ static int i40e_resume(struct pci_dev *pdev) return 0; } -#endif +#endif /* CONFIG_PM */ + static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, .resume = i40e_pci_error_resume, }; +static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); + static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, #ifdef CONFIG_PM - .suspend = i40e_suspend, - .resume = i40e_resume, -#endif + .driver = { + .pm = &i40e_pm_ops, + }, +#endif /* CONFIG_PM */ .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, From 401586c2b9bb16147f3dcc64d3596013625e2c44 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:04 -0400 Subject: [PATCH 0394/2177] i40e: don't clear suspended state until we finish resuming When handling suspend and resume callbacks we want to make sure that (a) we don't suspend again if we're already suspended and (b) we don't resume again if we're already resuming. Lets make sure we test_and_set the __I40E_SUSPENDED bit in i40e_suspend which ensures that a suspend call when already suspended will exit early. Additionally, if __I40E_SUSPENDED is not set when we begin resuming, exit early as well. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c82360437024..494cafde6b26 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12059,7 +12059,10 @@ static int i40e_suspend(struct device *dev) struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - set_bit(__I40E_SUSPENDED, pf->state); + /* If we're already suspended, then there is nothing to do */ + if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) + return 0; + set_bit(__I40E_DOWN, pf->state); if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) @@ -12084,11 +12087,15 @@ static int i40e_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); - /* handling the reset will rebuild the device state */ - if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { - clear_bit(__I40E_DOWN, pf->state); - i40e_reset_and_rebuild(pf, false, false); - } + /* If we're not suspended, then there is nothing to do */ + if (!test_bit(__I40E_SUSPENDED, pf->state)) + return 0; + + clear_bit(__I40E_DOWN, pf->state); + i40e_reset_and_rebuild(pf, false, false); + + /* Clear suspended state last after everything is recovered */ + clear_bit(__I40E_SUSPENDED, pf->state); return 0; } From 5c499228803a77bd4e878c7119fbd40a1dc6d773 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:05 -0400 Subject: [PATCH 0395/2177] i40e: prevent service task from running while we're suspended Although the service task does check the suspended status before running, it might already be part way through running when we go to suspend. Lets ensure that the service task is stopped and will not be restarted again until we finish resuming. This ensures that service task code does not cause strange interactions with the suspend/resume handlers. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 494cafde6b26..368373459ad5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12065,6 +12065,10 @@ static int i40e_suspend(struct device *dev) set_bit(__I40E_DOWN, pf->state); + /* Ensure service task will not be running */ + del_timer_sync(&pf->service_timer); + cancel_work_sync(&pf->service_task); + if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); @@ -12097,6 +12101,10 @@ static int i40e_resume(struct device *dev) /* Clear suspended state last after everything is recovered */ clear_bit(__I40E_SUSPENDED, pf->state); + /* Restart the service task */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); + return 0; } From b980c0634fe56928a45cc3c0f688d96e36705403 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:06 -0400 Subject: [PATCH 0396/2177] i40e: shutdown all IRQs and disable MSI-X when suspended On some platforms with a large number of CPUs, we will allocate many IRQ vectors. When hibernating, the system will attempt to migrate all of the vectors back to CPU0 when shutting down all the other CPUs. It is possible that we have so many vectors that it cannot re-assign them to CPU0. This is even more likely if we have many devices installed in one platform. The end result is failure to hibernate, as it is not possible to shutdown the CPUs. We can avoid this by disabling MSI-X and clearing our interrupt scheme when the device is suspended. A more ideal solution would be some method for the stack to properly handle this for all drivers, rather than on a case-by-case basis for each driver to fix itself. However, until this more ideal solution exists, we can do our part and shutdown our IRQs during suspend, which should allow systems with a large number of CPUs to safely suspend or hibernate. It may be worth investigating if we should shut down even further when we suspend as it may make the path cleaner, but this was the minimum fix for the hibernation issue mentioned here. Testing-hints: This affects systems with a large number of CPUs, and with multiple devices enabled. Without this change, those platforms are unable to hibernate at all. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 68 ++++++++++++++++++- .../net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 368373459ad5..8a44793d5390 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8354,6 +8354,57 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return 0; } +#ifdef CONFIG_PM +/** + * i40e_restore_interrupt_scheme - Restore the interrupt scheme + * @pf: private board data structure + * + * Restore the interrupt scheme that was cleared when we suspended the + * device. This should be called during resume to re-allocate the q_vectors + * and reacquire IRQs. + */ +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) +{ + int err, i; + + /* We cleared the MSI and MSI-X flags when disabling the old interrupt + * scheme. We need to re-enabled them here in order to attempt to + * re-acquire the MSI or MSI-X vectors + */ + pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + + err = i40e_init_interrupt_scheme(pf); + if (err) + return err; + + /* Now that we've re-acquired IRQs, we need to remap the vectors and + * rings together again. + */ + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (pf->vsi[i]) { + err = i40e_vsi_alloc_q_vectors(pf->vsi[i]); + if (err) + goto err_unwind; + i40e_vsi_map_rings_to_vectors(pf->vsi[i]); + } + } + + err = i40e_setup_misc_vector(pf); + if (err) + goto err_unwind; + + return 0; + +err_unwind: + while (i--) { + if (pf->vsi[i]) + i40e_vsi_free_q_vectors(pf->vsi[i]); + } + + return err; +} +#endif /* CONFIG_PM */ + /** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure @@ -12077,7 +12128,12 @@ static int i40e_suspend(struct device *dev) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - i40e_free_misc_vector(pf); + /* Clear the interrupt scheme and release our IRQs so that the system + * can safely hibernate even when there are a large number of CPUs. + * Otherwise hibernation might fail when mapping all the vectors back + * to CPU0. + */ + i40e_clear_interrupt_scheme(pf); return 0; } @@ -12090,11 +12146,21 @@ static int i40e_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); + int err; /* If we're not suspended, then there is nothing to do */ if (!test_bit(__I40E_SUSPENDED, pf->state)) return 0; + /* We cleared the interrupt scheme when we suspended, so we need to + * restore it now to resume device functionality. + */ + err = i40e_restore_interrupt_scheme(pf); + if (err) { + dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n", + err); + } + clear_bit(__I40E_DOWN, pf->state); i40e_reset_and_rebuild(pf, false, false); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index c243f9da95ae..80ade6510279 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -46,7 +46,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 3 #define DRV_VERSION_MINOR 0 -#define DRV_VERSION_BUILD 0 +#define DRV_VERSION_BUILD 1 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ From c97fc9b6a798f4253c176231ba0aceda6b59b058 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Fri, 14 Jul 2017 09:27:07 -0400 Subject: [PATCH 0397/2177] i40evf: fix ring to vector mapping The current implementation for mapping queues to vectors is broken because it attempts to map each Tx and Rx ring to its own vector, however we use combined queues so we should actually be mapping the Tx/Rx rings together on one vector. Also in the current implementation, in the case where we have more queues than vectors, we attempt to group the queues together into 'chunks' and map each 'chunk' of queues to a vector. Chunking them together would be more ideal if, and only if, we only had RSS because of the way the hashing algorithm works but in the case of a future patch that enables VF ADq, round robin assignment is better and still works with RSS. This patch resolves both those issues and simplifies the code needed to accomplish this. Instead of treating the case where we have more queues than vectors as special, if we notice our vector index is greater than vectors, reset the vector index to zero and continue mapping. This should ensure that in both cases, whether we have enough vectors for each queue or not, the queues get appropriately mapped. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40evf/i40evf_main.c | 48 ++++--------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 80ade6510279..69ef6c1d5364 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -432,52 +432,24 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) **/ static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) { + int rings_remaining = adapter->num_active_queues; + int ridx = 0, vidx = 0; int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_active_queues; - int txr_remaining = adapter->num_active_queues; - int i, j; - int rqpv, tqpv; int err = 0; q_vectors = adapter->num_msix_vectors - NONQ_VECS; - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors >= (rxr_remaining * 2)) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - i40evf_map_vector_to_rxq(adapter, v_start, rxr_idx); + for (; ridx < rings_remaining; ridx++) { + i40evf_map_vector_to_rxq(adapter, vidx, ridx); + i40evf_map_vector_to_txq(adapter, vidx, ridx); - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - i40evf_map_vector_to_txq(adapter, v_start, txr_idx); - goto out; + /* In the case where we have more queues than vectors, continue + * round-robin on vectors until all queues are mapped. + */ + if (++vidx >= q_vectors) + vidx = 0; } - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - * Re-adjusting *qpv takes care of the remainder. - */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - i40evf_map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - i40evf_map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } - } - -out: adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; return err; From a3f5aa907340b5d7b54223ddbaa90410f168864d Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Fri, 14 Jul 2017 09:27:08 -0400 Subject: [PATCH 0398/2177] i40e: Enable VF to negotiate number of allocated queues Currently the PF allocates a default number of queues for each VF and cannot be changed. This patch enables the VF to request a different number of queues allocated to it. This patch also adds a new virtchnl op and capability flag to facilitate this negotiation. After the PF receives a request message, it will set a requested number of queues for that VF. Then when the VF resets, its VSI will get a new number of queues allocated to it. This is a best effort request and since we only allocate a guaranteed default number, if the VF tries to ask for more than the guaranteed number, there may not be enough in HW to accommodate it unless other queues for other VFs are freed. It should also be noted decreasing the number queues allocated to a VF to below the default will NOT enable the allocation of more than 32 VFs per PF and will not free queues guaranteed to each VF by default. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 75 +++++++++++++++++++ .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 + include/linux/avf/virtchnl.h | 20 +++++ 4 files changed, 97 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b7a539cdca00..439c63cb2a0c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -77,6 +77,7 @@ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1) #define I40E_DEFAULT_QUEUES_PER_VF 4 +#define I40E_MAX_VF_QUEUES 16 #define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ #define i40e_pf_get_max_q_per_tc(pf) \ (((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d1e670f490e..a75396c157d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -815,6 +815,14 @@ static void i40e_free_vf_res(struct i40e_vf *vf) */ clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); + /* It's possible the VF had requeuested more queues than the default so + * do the accounting here when we're about to free them. + */ + if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) { + pf->queues_left += vf->num_queue_pairs - + I40E_DEFAULT_QUEUES_PER_VF; + } + /* free vsi & disconnect it from the parent uplink */ if (vf->lan_vsi_idx) { i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]); @@ -868,12 +876,27 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) int total_queue_pairs = 0; int ret; + if (vf->num_req_queues && + vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) + pf->num_vf_qps = vf->num_req_queues; + else + pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; + /* allocate hw vsi context & associated resources */ ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* We account for each VF to get a default number of queue pairs. If + * the VF has now requested more, we need to account for that to make + * certain we never request more queues than we actually have left in + * HW. + */ + if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) + pf->queues_left -= + total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF; + if (vf->trusted) set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); else @@ -1579,6 +1602,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1986,6 +2012,52 @@ error_param: aq_ret); } +/** + * i40e_vc_request_queues_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * VFs get a default number of queues but can use this message to request a + * different number. Will respond with either the number requested or the + * maximum we can support. + **/ +static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) +{ + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + int req_pairs = vfres->num_queue_pairs; + int cur_pairs = vf->num_queue_pairs; + struct i40e_pf *pf = vf->pf; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + return -EINVAL; + + if (req_pairs <= 0) { + dev_err(&pf->pdev->dev, + "VF %d tried to request %d queues. Ignoring.\n", + vf->vf_id, req_pairs); + } else if (req_pairs > I40E_MAX_VF_QUEUES) { + dev_err(&pf->pdev->dev, + "VF %d tried to request more than %d queues.\n", + vf->vf_id, + I40E_MAX_VF_QUEUES); + vfres->num_queue_pairs = I40E_MAX_VF_QUEUES; + } else if (req_pairs - cur_pairs > pf->queues_left) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but only %d left.\n", + vf->vf_id, + req_pairs - cur_pairs, + pf->queues_left); + vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else { + vf->num_req_queues = req_pairs; + } + + return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, + (u8 *)vfres, sizeof(vfres)); +} + /** * i40e_vc_get_stats_msg * @vf: pointer to the VF info @@ -2708,6 +2780,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen); break; + case VIRTCHNL_OP_REQUEST_QUEUES: + ret = i40e_vc_request_queues_msg(vf, msg, msglen); + break; case VIRTCHNL_OP_UNKNOWN: default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 1f4b0c504368..5111d05d5f2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -97,6 +97,7 @@ struct i40e_vf { u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ + u8 num_req_queues; /* num of requested qps */ u64 num_mdd_events; /* num of mdd events detected */ /* num of continuous malformed or invalid msgs detected */ u64 num_invalid_msgs; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2b038442c352..60e5d90cb18a 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -135,6 +135,7 @@ enum virtchnl_ops { VIRTCHNL_OP_SET_RSS_HENA = 26, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, + VIRTCHNL_OP_REQUEST_QUEUES = 29, }; /* This macro is used to generate a compilation error if a structure @@ -235,6 +236,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 #define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040 #define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 @@ -325,6 +327,21 @@ struct virtchnl_vsi_queue_config_info { struct virtchnl_queue_pair_info qpair[1]; }; +/* VIRTCHNL_OP_REQUEST_QUEUES + * VF sends this message to request the PF to allocate additional queues to + * this VF. Each VF gets a guaranteed number of queues on init but asking for + * additional queues must be negotiated. This is a best effort request as it + * is possible the PF does not have enough queues left to support the request. + * If the PF cannot support the number requested it will respond with the + * maximum number it is able to support; otherwise it will respond with the + * number requested. + */ + +/* VF resource request */ +struct virtchnl_vf_res_request { + u16 num_queue_pairs; +}; + VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); /* VIRTCHNL_OP_CONFIG_IRQ_MAP @@ -691,6 +708,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: break; + case VIRTCHNL_OP_REQUEST_QUEUES: + valid_len = sizeof(struct virtchnl_vf_res_request); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: From 22b96551f213d7e7d743442c923c266a10306b9b Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 14 Jul 2017 09:27:09 -0400 Subject: [PATCH 0399/2177] i40e: refactor FW version checking The i40e driver now supports two different devices with two different firmware versions. So be smart about how we handle these. Move the FW version macros to the appropriate header file, and add a convenience macro that checks the version based on the device. Then use this macro to check whether or not the driver can use the new link info API. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 10 +++++++++- drivers/net/ethernet/intel/i40e/i40e_common.c | 6 ++++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 10 +++++++++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5d5f422cbae5..e2a9ec80a623 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 111426ba5fbc..7346d8850c8e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1593,8 +1593,10 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, status = I40E_ERR_UNKNOWN_PHY; if (report_init) { - hw->phy.phy_types = le32_to_cpu(abilities->phy_type); - hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32); + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) + status = i40e_aq_get_link_info(hw, true, NULL, NULL); } return status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8a44793d5390..47f71d7c3ae0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11434,7 +11434,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_nvm_version_str(hw)); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) + hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) dev_info(&pdev->dev, "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 83e63e55c4b4..f9f48d1900b0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; From 1f372c7bfb23286d2bf4ce0423ab488e86b74bb2 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 25 Sep 2017 22:01:36 +0100 Subject: [PATCH 0400/2177] net: ipv6: send NS for DAD when link operationally up The NS for DAD are sent on admin up as long as a valid qdisc is found. A race condition exists by which these packets will not egress the interface if the operational state of the lower device is not yet up. The solution is to delay DAD until the link is operationally up according to RFC2863. Rather than only doing this, follow the existing code checks by deferring IPv6 device initialization altogether. The fix allows DAD on devices like tunnels that are controlled by userspace control plane. The fix has no impact on regular deployments, but means that there is no IPv6 connectivity until the port has been opened in the case of port-based network access control, which should be desirable. Signed-off-by: Mike Manning Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 13c3b697f8c0..f553f72d0bee 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_policy = 0, }; -/* Check if a valid qdisc is available */ -static inline bool addrconf_qdisc_ok(const struct net_device *dev) +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool addrconf_link_ready(const struct net_device *dev) { - return !qdisc_tx_is_noop(dev); + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); } static void addrconf_del_rs_timer(struct inet6_dev *idev) @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->token = in6addr_any; - if (netif_running(dev) && addrconf_qdisc_ok(dev)) + if (netif_running(dev) && addrconf_link_ready(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -3403,7 +3403,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, /* restore routes for permanent addresses */ addrconf_permanent_addr(dev); - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); @@ -3418,7 +3418,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } } else if (event == NETDEV_CHANGE) { - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is still not ready. */ break; } From 84e14fe353de7624872e582887712079ba0b2d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 26 Sep 2017 21:32:42 -0700 Subject: [PATCH 0401/2177] net-ipv6: add support for sockopt(SOL_IPV6, IPV6_FREEBIND) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we've been relying on sockopt(SOL_IP, IP_FREEBIND) being usable even on IPv6 sockets. However, it turns out it is perfectly reasonable to want to set freebind on an AF_INET6 SOCK_RAW socket - but there is no way to set any SOL_IP socket option on such a socket (they're all blindly errored out). One use case for this is to allow spoofing src ip on a raw socket via sendmsg cmsg. Tested: built, and booted # python >>> import socket >>> SOL_IP = socket.SOL_IP >>> SOL_IPV6 = socket.IPPROTO_IPV6 >>> IP_FREEBIND = 15 >>> IPV6_FREEBIND = 78 >>> s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM, 0) >>> s.getsockopt(SOL_IP, IP_FREEBIND) 0 >>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND) 0 >>> s.setsockopt(SOL_IPV6, IPV6_FREEBIND, 1) >>> s.getsockopt(SOL_IP, IP_FREEBIND) 1 >>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND) 1 Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + net/ipv6/ipv6_sockglue.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 46444f8fbee4..4f8f3eb0699f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -284,6 +284,7 @@ struct in6_flowlabel_req { #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 #define IPV6_RECVFRAGSIZE 77 +#define IPV6_FREEBIND 78 /* * Multicast Routing: diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a5e466d4e093..b9404feabd78 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_FREEBIND: + if (optlen < sizeof(int)) + goto e_inval; + /* we also don't have a separate freebind bit for IPV6 */ + inet_sk(sk)->freebind = valbool; + retv = 0; + break; + case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; @@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = inet_sk(sk)->transparent; break; + case IPV6_FREEBIND: + val = inet_sk(sk)->freebind; + break; + case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; From 45c1fd61d5cead2ae23880e0677b8660ab9006a4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 Sep 2017 22:45:13 +0100 Subject: [PATCH 0402/2177] mkiss: remove redundant check on len being zero The check on len is redundant as it is always greater than 1, so just remove it and make the printk less complex. Detected by CoverityScan, CID#1226729 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index aec6c26563cf..54bf8e6e4a09 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -477,7 +477,8 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) cmd = 0; } ax->crcauto = (cmd ? 0 : 1); - printk(KERN_INFO "mkiss: %s: crc mode %s %d\n", ax->dev->name, (len) ? "set to" : "is", cmd); + printk(KERN_INFO "mkiss: %s: crc mode set to %d\n", + ax->dev->name, cmd); } spin_unlock_bh(&ax->buflock); netif_start_queue(dev); From 706cc9f51d9a22528af18d4b3ffbd17b30a1d3b0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 30 Sep 2017 09:24:28 +0200 Subject: [PATCH 0403/2177] batman-adv: Add argument names for function ptr definitions checkpatch started to report unnamed arguments in function pointer definitions. Add the corresponding names to these definitions to avoid this warning. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 033819aefc39..4daed7ad46f2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -73,8 +73,8 @@ * list traversals just rcu-locked */ struct list_head batadv_hardif_list; -static int (*batadv_rx_handler[256])(struct sk_buff *, - struct batadv_hard_iface *); +static int (*batadv_rx_handler[256])(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -540,8 +540,8 @@ batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { - int (*curr)(struct sk_buff *, - struct batadv_hard_iface *); + int (*curr)(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); curr = batadv_rx_handler[packet_type]; if (curr != batadv_recv_unhandled_packet && From 21a2774ef5d4fde772fbcb9d0eabb76f585e5af5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 28 Sep 2017 11:19:06 -0700 Subject: [PATCH 0404/2177] Revert "net: dsa: bcm_sf2: Defer port enabling to calling port_enable" This reverts commit e85ec74ace29 ("net: dsa: bcm_sf2: Defer port enabling to calling port_enable") because this now makes an unbind followed by a bind to fail connecting to the ingrated PHY. What this patch missed is that we need the PHY to be enabled with bcm_sf2_gphy_enable_set() before probing it on the MDIO bus. This is correctly done in the ops->setup() function, but by the time ops->port_enable() runs, this is too late. Upon unbind we would power down the PHY, and so when we would bind again, the PHY would be left powered off. Fixes: e85ec74ace29 ("net: dsa: bcm_sf2: Defer port enabling to calling port_enable") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 898d5642b516..7aecc98d0a18 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -754,11 +754,14 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; - /* Disable unused ports and configure IMP port */ + /* Enable all valid ports and disable those unused */ for (port = 0; port < priv->hw_params.num_ports; port++) { - if (dsa_is_cpu_port(ds, port)) + /* IMP port receives special treatment */ + if ((1 << port) & ds->enabled_port_mask) + bcm_sf2_port_setup(ds, port, NULL); + else if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); - else if (!((1 << port) & ds->enabled_port_mask)) + else bcm_sf2_port_disable(ds, port, NULL); } From e876a8a7e9dd89dc88c12ca2e81beb478dbe9897 Mon Sep 17 00:00:00 2001 From: Mick Tarsel Date: Thu, 28 Sep 2017 13:53:18 -0700 Subject: [PATCH 0405/2177] ibmvnic: Set state UP State is initially reported as UNKNOWN. Before register call netif_carrier_off(). Once the device is opened, call netif_carrier_on() in order to set the state to UP. Signed-off-by: Mick Tarsel Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cb8182f4fdfa..4bc14a901571 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev) } rc = __ibmvnic_open(netdev); + netif_carrier_on(netdev); mutex_unlock(&adapter->reset_lock); return rc; @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (rc) goto ibmvnic_init_fail; + netif_carrier_off(netdev); rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); From ef739d8aabaa26dd99f5661e38a86f4d85d8dfed Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 14:34:22 +0100 Subject: [PATCH 0406/2177] net: ipmr: make function ipmr_notifier_init static The function ipmr_notifier_init is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: warning: symbol 'ipmr_notifier_init' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 292a8e80bdfa..a844738b38bd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3224,7 +3224,7 @@ static const struct fib_notifier_ops ipmr_notifier_ops_template = { .owner = THIS_MODULE, }; -int __net_init ipmr_notifier_init(struct net *net) +static int __net_init ipmr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; From b1c49d14200dae47544f7ea6ee9040ded01f8425 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 15:01:16 +0100 Subject: [PATCH 0407/2177] net_sched: remove redundant assignment to ret The assignment of -EINVAL to variable ret is redundant as it is being overwritten on the following error exit paths or to the return value from the following call to basic_set_parms. Fix this up by removing it. Cleans up clang warning message: net/sched/cls_basic.c:185:2: warning: Value stored to 'err' is never read Fixes: 1d8134fea2eb ("net_sched: use idr to allocate basic filter handles") Signed-off-by: Colin Ian King Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index cfeb6f158566..700b345b07f9 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -182,7 +182,6 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = -EINVAL; if (handle) { fnew->handle = handle; if (!fold) { From 721e08dad17e226ef68819d0a23dc53c25fe8ea5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 29 Sep 2017 10:52:17 -0700 Subject: [PATCH 0408/2177] bpf: Fix compiler warning on info.map_ids for 32bit platform This patch uses u64_to_user_ptr() to cast info.map_ids to a userspace ptr. It also tags the user_map_ids with '__user' for sparse check. Fixes: cb4d2b3f03d8 ("bpf: Add name, load_time, uid and map_ids to bpf_prog_info") Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 11a7f82a55d1..b927da66f653 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1405,7 +1405,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.nr_map_ids = prog->aux->used_map_cnt; ulen = min_t(u32, info.nr_map_ids, ulen); if (ulen) { - u32 *user_map_ids = (u32 *)info.map_ids; + u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); u32 i; for (i = 0; i < ulen; i++) From 09af87d18f6ba05588e6316c47fdacf06e28cce8 Mon Sep 17 00:00:00 2001 From: Simon Xiao Date: Fri, 29 Sep 2017 11:39:46 -0700 Subject: [PATCH 0409/2177] hv_netvsc: report stop_queue and wake_queue Report the numbers of events for stop_queue and wake_queue in ethtool stats. Example: ethtool -S eth0 NIC statistics: ... stop_queue: 7 wake_queue: 7 ... Signed-off-by: Simon Xiao Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc.c | 12 ++++++++++-- drivers/net/hyperv/netvsc_drv.c | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5176be76ca7d..6f550e15a41c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -686,6 +686,8 @@ struct netvsc_ethtool_stats { unsigned long tx_busy; unsigned long tx_send_full; unsigned long rx_comp_busy; + unsigned long stop_queue; + unsigned long wake_queue; }; struct netvsc_vf_pcpu_stats { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b0d323e24978..6e5194916bbe 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -609,6 +609,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -643,8 +644,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || - queue_sends < 1)) + queue_sends < 1)) { netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); + ndev_ctx->eth_stats.wake_queue++; + } } static void netvsc_send_completion(struct netvsc_device *net_device, @@ -749,6 +752,7 @@ static inline int netvsc_send_pkt( &net_device->chan_table[packet->q_idx]; struct vmbus_channel *out_channel = nvchan->channel; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; @@ -789,12 +793,16 @@ static inline int netvsc_send_pkt( if (ret == 0) { atomic_inc_return(&nvchan->queue_sends); - if (ring_avail < RING_AVAIL_PERCENT_LOWATER) + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; + } } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; if (atomic_read(&nvchan->queue_sends) < 1) { netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; ret = -ENOSPC; } } else { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e9d54c9ee78c..f300ae61c6c6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1126,6 +1126,8 @@ static const struct { { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, + { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) }, + { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) }, }, vf_stats[] = { { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) }, From 075cfdd659cb1e86f948f11ba577f27706f0756e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 20:51:23 +0100 Subject: [PATCH 0410/2177] net: hns3: fix null pointer dereference before null check pointer ndev is being dereferenced with the call to netif_running before it is being null checked. Re-order the code to only dereference ndev after it has been null checked. Detected by CoverityScan, CID#1457206 ("Dereference before null check") Fixes: 9df8f79a4d29 ("net: hns3: Add DCB support when interacting with network stack") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 4a0890f98b70..c31506514e5d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2865,7 +2865,7 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; struct net_device *ndev = kinfo->netdev; - bool if_running = netif_running(ndev); + bool if_running; int ret; u8 i; @@ -2875,6 +2875,8 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) if (!ndev) return -ENODEV; + if_running = netif_running(ndev); + ret = netdev_set_num_tc(ndev, tc); if (ret) return ret; From 3775b1b7f0c330e59c434d1852d7762ae0a9c164 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:15 -0400 Subject: [PATCH 0411/2177] net: dsa: add master helper to look up slaves The DSA tagging code does not need to know about the DSA architecture, it only needs to return the slave device corresponding to the source port index (and eventually the source device index for cascade-capable switches) parsed from the frame received on the master device. For this purpose, provide an inline dsa_master_get_slave helper which validates the device and port indexes and look up the slave device. This makes the tagging rcv functions more concise and robust, and also makes dsa_get_cpu_port obsolete. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 24 +++++++++++++++++++----- net/dsa/tag_brcm.c | 9 ++------- net/dsa/tag_dsa.c | 18 ++---------------- net/dsa/tag_edsa.c | 18 ++---------------- net/dsa/tag_ksz.c | 9 +++------ net/dsa/tag_lan9303.c | 20 ++------------------ net/dsa/tag_mtk.c | 16 +++------------- net/dsa/tag_qca.c | 17 +++-------------- net/dsa/tag_trailer.c | 9 +++------ 9 files changed, 39 insertions(+), 101 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index eccc62776283..d429505dc4e7 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -113,6 +113,25 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int dsa_master_ethtool_setup(struct net_device *dev); void dsa_master_ethtool_restore(struct net_device *dev); +static inline struct net_device *dsa_master_get_slave(struct net_device *dev, + int device, int port) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + + if (device < 0 || device >= DSA_MAX_SWITCHES) + return NULL; + + ds = dst->ds[device]; + if (!ds) + return NULL; + + if (port < 0 || port >= ds->num_ports) + return NULL; + + return ds->ports[port].netdev; +} + /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); @@ -182,9 +201,4 @@ static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) return p->dp->cpu_dp->netdev; } -static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst) -{ - return dst->cpu_dp; -} - #endif diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index dbb016434ace..8e4bdb9d9ae3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -92,9 +92,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; int source_port; u8 *brcm_tag; @@ -117,8 +114,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - /* Validate port against switch setup, either the port is totally */ - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; /* Remove Broadcom tag and update checksum */ @@ -129,8 +126,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index fbf9ca954773..c77218f173d1 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *dsa_header; int source_device; int source_port; @@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -153,8 +141,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 76367ba1b2e2..0b83cbe0c9e8 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *edsa_header; int source_device; int source_port; @@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = edsa_header[0] & 0x1f; source_port = (edsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -172,8 +160,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 010ca0a336c4..b241c990cde0 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *tag; int source_port; tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; source_port = tag[0] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 0b9826105e42..4f211e56c81f 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -71,17 +71,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u16 *lan9303_tag; - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; unsigned int source_port; - ds = dst->ds[0]; - - if (unlikely(!ds)) { - dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n"); - return NULL; - } - if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { dev_warn_ratelimited(&dev->dev, "Dropping packet, cannot pull\n"); @@ -103,16 +94,12 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, source_port = ntohs(lan9303_tag[1]) & 0x3; - if (source_port >= ds->num_ports) { + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; } - if (!ds->ports[source_port].netdev) { - dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n"); - return NULL; - } - /* remove the special VLAN tag between the MAC addresses * and the current ethertype field. */ @@ -120,9 +107,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); - /* forward the packet to the dedicated interface */ - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index ec8ee5f43255..968586c5d40e 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; int port; __be16 *phdr, hdr; @@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - MTK_HDR_LEN, 2 * ETH_ALEN); - /* This protocol doesn't support cascading multiple - * switches so it's safe to assume the switch is first - * in the tree. - */ - ds = dst->ds[0]; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_get_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 1d4c70711c0f..8d33d9ebf910 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -65,9 +65,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds; u8 ver; int port; __be16 *phdr, hdr; @@ -92,20 +89,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, ETH_HLEN - QCA_HDR_LEN); - /* This protocol doesn't support cascading multiple switches so it's - * safe to assume the switch is first in the tree - */ - ds = cpu_dp->ds; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - /* Update skb & forward the frame accordingly */ - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_get_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d2fd4923aa3e..61668be267f5 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *trailer; int source_port; @@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; source_port = trailer[1] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - 4); - skb->dev = ds->ports[source_port].netdev; - return skb; } From 7ec764eef934409c4e15539440c31bca0b8de005 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:16 -0400 Subject: [PATCH 0412/2177] net: dsa: use cpu_dp in master code Make it clear that the master device is linked to a CPU port by using "cpu_dp" for the dsa_port variable in master.c instead of "port", then use a "port" variable to describe the port index, as usually seen in other places of DSA core. This will make the future patch touching dsa_ptr more readable. There is no functional changes. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/master.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 5e5147ec5a44..ef15d35f1574 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -17,9 +17,10 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; int count = 0; if (ops && ops->get_sset_count && ops->get_ethtool_stats) { @@ -28,15 +29,15 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, } if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, port->index, data + count); + ds->ops->get_ethtool_stats(ds, port, data + count); } static int dsa_master_get_sset_count(struct net_device *dev, int sset) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; int count = 0; if (ops && ops->get_sset_count) @@ -52,16 +53,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; unsigned int i; uint8_t pfx[4]; uint8_t *ndata; - snprintf(pfx, sizeof(pfx), "p%.2d", port->index); + snprintf(pfx, sizeof(pfx), "p%.2d", port); /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; @@ -76,7 +78,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->ops->get_strings(ds, port->index, ndata); + ds->ops->get_strings(ds, port, ndata); count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), @@ -89,17 +91,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; + struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); if (!ops) return -ENOMEM; - port->orig_ethtool_ops = dev->ethtool_ops; - if (port->orig_ethtool_ops) - memcpy(ops, port->orig_ethtool_ops, sizeof(*ops)); + cpu_dp->orig_ethtool_ops = dev->ethtool_ops; + if (cpu_dp->orig_ethtool_ops) + memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); ops->get_sset_count = dsa_master_get_sset_count; ops->get_ethtool_stats = dsa_master_get_ethtool_stats; @@ -113,8 +115,8 @@ int dsa_master_ethtool_setup(struct net_device *dev) void dsa_master_ethtool_restore(struct net_device *dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; + struct dsa_port *cpu_dp = dst->cpu_dp; - dev->ethtool_ops = port->orig_ethtool_ops; - port->orig_ethtool_ops = NULL; + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; } From 62fc95876298987c35e9fb10badd467f4787aae7 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:17 -0400 Subject: [PATCH 0413/2177] net: dsa: use temporary dsa_device_ops variable When resolving the DSA tagging protocol used by a CPU switch, use a temporary "tag_ops" variable to store the dsa_device_ops instead of using directly dst->tag_ops. This will make the future patches moving this pointer around easier to read. There is no functional changes. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 8 +++++--- net/dsa/legacy.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcccaebde708..6a10c5c1639f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -485,6 +485,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) { + const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; @@ -514,13 +515,14 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, ds->cpu_port_mask |= BIT(index); tag_protocol = ds->ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) { + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); ds->cpu_port_mask &= ~BIT(index); - return PTR_ERR(dst->tag_ops); + return PTR_ERR(tag_ops); } + dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; return 0; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index ae505d8e4417..8e849013f69d 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -144,13 +144,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, * switch. */ if (dst->cpu_dp->ds == ds) { + const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; tag_protocol = ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) - return PTR_ERR(dst->tag_ops); + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) + return PTR_ERR(tag_ops); + dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; } From 152402483ed75b167d5628d414e876ffa7a6d4c4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:18 -0400 Subject: [PATCH 0414/2177] net: dsa: add tagging ops to port The DSA tagging protocol operations are specific to each CPU port, thus the dsa_device_ops pointer belongs to the dsa_port structure. >From now on assign a slave's xmit copy from its CPU port tagging operations. This will ease the future support for multiple CPU ports. Also keep the tag_ops at the beginning of the dsa_port structure so that we ensure copies for hot path are in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ net/dsa/dsa2.c | 1 + net/dsa/dsa_priv.h | 2 +- net/dsa/legacy.c | 1 + net/dsa/slave.c | 3 +-- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 8dee216a5a9b..4d1df2f086e8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -175,6 +175,9 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* CPU port tagging operations used by master or slave devices */ + const struct dsa_device_ops *tag_ops; + struct dsa_switch *ds; unsigned int index; const char *name; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6a10c5c1639f..9eac4726dc0c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -522,6 +522,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, return PTR_ERR(tag_ops); } + dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d429505dc4e7..9397291bb3aa 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info { }; struct dsa_slave_priv { - /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ + /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 8e849013f69d..4d374541815a 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -152,6 +152,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, if (IS_ERR(tag_ops)) return PTR_ERR(tag_ops); + dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bf8800de13c1..4b634db05cee 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1117,7 +1117,6 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port, const char *name) { struct dsa_switch *ds = port->ds; - struct dsa_switch_tree *dst = ds->dst; struct net_device *master; struct net_device *slave_dev; struct dsa_slave_priv *p; @@ -1162,7 +1161,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) } p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - p->xmit = dst->tag_ops->xmit; + p->xmit = cpu_dp->tag_ops->xmit; p->old_pause = -1; p->old_link = -1; From 3e41f93b358a8800194b87995ad076fc50919719 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:19 -0400 Subject: [PATCH 0415/2177] net: dsa: prepare master receive hot path In preparation to make DSA master devices point to their corresponding CPU port instead of the whole tree, add copies of dst and rcv in the dsa_port structure so that we keep fast access in the receive hot path. Also keep the copies at the beginning of the dsa_port structure in order to ensure they are available in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ net/dsa/dsa2.c | 4 ++++ net/dsa/legacy.c | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 4d1df2f086e8..6bda01fa5747 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -178,6 +178,11 @@ struct dsa_port { /* CPU port tagging operations used by master or slave devices */ const struct dsa_device_ops *tag_ops; + /* Copies for faster access in master receive hot path */ + struct dsa_switch_tree *dst; + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt); + struct dsa_switch *ds; unsigned int index; const char *name; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9eac4726dc0c..b71e3bb478e4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -524,7 +524,11 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; + + /* Make a few copies for faster access in master receive hot path */ + dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; dst->rcv = dst->tag_ops->rcv; + dst->cpu_dp->dst = dst; return 0; } diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 4d374541815a..96c7e3f8b8bb 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -154,7 +154,11 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; + + /* Few copies for faster access in master receive hot path */ + dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; dst->rcv = dst->tag_ops->rcv; + dst->cpu_dp->dst = dst; } memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); From 2f657a600409f1961d67642fe384a9d4be71d36a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:20 -0400 Subject: [PATCH 0416/2177] net: dsa: change dsa_ptr for a dsa_port With DSA, a master net device (CPU facing interface) has a dsa_ptr pointer to which hangs a dsa_switch_tree. This is not correct because a master interface is wired to a dedicated switch port, and because we can theoretically have several master interfaces pointing to several CPU ports of the same switch fabric. Change the master interface's dsa_ptr for the CPU dsa_port pointer. This is a step towards supporting multiple CPU ports. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/dsa/dsa.c | 6 +++--- net/dsa/dsa2.c | 2 +- net/dsa/dsa_priv.h | 3 ++- net/dsa/legacy.c | 2 +- net/dsa/master.c | 15 +++++---------- 6 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..e1d6ef130611 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -55,7 +55,7 @@ struct netpoll_info; struct device; struct phy_device; -struct dsa_switch_tree; +struct dsa_port; /* 802.11 specific */ struct wireless_dev; @@ -1752,7 +1752,7 @@ struct net_device { struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) - struct dsa_switch_tree *dsa_ptr; + struct dsa_port *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 81c852e32821..51ca2a524a27 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -160,12 +160,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { - struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; struct pcpu_sw_netstats *s; struct dsa_slave_priv *p; - if (unlikely(dst == NULL)) { + if (unlikely(!cpu_dp)) { kfree_skb(skb); return 0; } @@ -174,7 +174,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = dst->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev, pt); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b71e3bb478e4..62302558f38c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -438,7 +438,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) * sent to the tag format's receive function. */ wmb(); - dst->cpu_dp->netdev->dsa_ptr = dst; + dst->cpu_dp->netdev->dsa_ptr = dst->cpu_dp; err = dsa_master_ethtool_setup(dst->cpu_dp->netdev); if (err) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9397291bb3aa..2850077cc9cc 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -116,7 +116,8 @@ void dsa_master_ethtool_restore(struct net_device *dev); static inline struct net_device *dsa_master_get_slave(struct net_device *dev, int device, int port) { - struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_switch *ds; if (device < 0 || device >= DSA_MAX_SWITCHES) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 96c7e3f8b8bb..71917505a5cc 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -607,7 +607,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, * sent to the tag format's receive function. */ wmb(); - dev->dsa_ptr = dst; + dev->dsa_ptr = dst->cpu_dp; return dsa_master_ethtool_setup(dst->cpu_dp->netdev); } diff --git a/net/dsa/master.c b/net/dsa/master.c index ef15d35f1574..5f3f57e372e0 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -16,8 +16,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; @@ -34,8 +33,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, static int dsa_master_get_sset_count(struct net_device *dev, int sset) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int count = 0; @@ -52,8 +50,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset) static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; @@ -90,8 +87,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, int dsa_master_ethtool_setup(struct net_device *dev) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; @@ -114,8 +110,7 @@ int dsa_master_ethtool_setup(struct net_device *dev) void dsa_master_ethtool_restore(struct net_device *dev) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; From aa193d9b1d7ea6893ce24a9d141f676950563987 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:21 -0400 Subject: [PATCH 0417/2177] net: dsa: remove tag ops from the switch tree Now that the dsa_ptr is a dsa_port instance, there is no need to keep the tag operations in the dsa_switch_tree structure. Remove it. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 11 ----------- net/dsa/dsa2.c | 2 -- net/dsa/legacy.c | 2 -- 3 files changed, 15 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 6bda01fa5747..10dceccd9ce8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -130,11 +130,6 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* Copy of tag_ops->rcv for faster access in hot path */ - struct sk_buff * (*rcv)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt); - /* * The switch port to which the CPU is attached. */ @@ -144,12 +139,6 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; - - /* - * Tagging protocol operations for adding and removing an - * encapsulation tag. - */ - const struct dsa_device_ops *tag_ops; }; /* TC matchall action types, only mirroring for now */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 62302558f38c..54ed054777bd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -523,11 +523,9 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, } dst->cpu_dp->tag_ops = tag_ops; - dst->tag_ops = tag_ops; /* Make a few copies for faster access in master receive hot path */ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->rcv = dst->tag_ops->rcv; dst->cpu_dp->dst = dst; return 0; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 71917505a5cc..19ff6e0a21dc 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -153,11 +153,9 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, return PTR_ERR(tag_ops); dst->cpu_dp->tag_ops = tag_ops; - dst->tag_ops = tag_ops; /* Few copies for faster access in master receive hot path */ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->rcv = dst->tag_ops->rcv; dst->cpu_dp->dst = dst; } From e1cfcbe82b4534bd0f99fef92a6d33843fd85e0e Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:40 +0800 Subject: [PATCH 0418/2177] ipv4: Namespaceify tcp_fastopen knob Different namespace application might require enable TCP Fast Open feature independently of the host. This patch series continues making more of the TCP Fast Open related sysctl knobs be per net-namespace. Reported-by: Luca BRUNO Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/af_inet.c | 7 ++++--- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_fastopen.c | 11 +++++------ net/ipv4/tcp_ipv4.c | 2 ++ 7 files changed, 21 insertions(+), 19 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index abc84d986da4..16420ccaef15 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; + int sysctl_tcp_fastopen; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index 770b608c8439..9e414a99034f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e31108e5ef79..ddd126d120ac 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; - int err; + int err, tcp_fastopen; lock_sock(sk); @@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && - (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0d3c038d7b04..e31e853cf486 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen", - .data = &sysctl_tcp_fastopen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_fastopen_key", .mode = 0600, @@ -1085,6 +1078,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fastopen", + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5091402720ab..dac56c4ad357 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3c33220c418..31b08ec38cb8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,8 +9,6 @@ #include #include -int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; - struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); @@ -279,21 +277,22 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc) { - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } - if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ @@ -347,7 +346,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, return false; } - if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { cookie->len = -1; return true; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9416b5162bc..88409b13c9d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2472,6 +2472,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; + return 0; fail: tcp_sk_exit(net); From dd000598a39b6937fcefdf143720ec9fb5250e72 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:41 +0800 Subject: [PATCH 0419/2177] ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp: Do not call tcp_fastopen_reset_cipher from interrupt context"), because in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 5 ----- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 4 ++-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9e414a99034f..d9376e2458e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1555,7 +1555,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(bool publish); +void tcp_fastopen_init_key_once(void); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ddd126d120ac..e73ce79d7176 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog) (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(); } err = inet_csk_listen_start(sk, backlog); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e31e853cf486..f6324ead0e19 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -282,11 +282,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } - /* Generate a dummy secret but don't publish it. This - * is needed so we don't regenerate a new key on the - * first invocation of tcp_fastopen_cookie_gen - */ - tcp_fastopen_init_key_once(false); tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dac56c4ad357..4e395452d69f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(); fastopen_queue_tune(sk, val); } else { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 31b08ec38cb8..8c8f0f0af59d 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -13,7 +13,7 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); -void tcp_fastopen_init_key_once(bool publish) +void tcp_fastopen_init_key_once(void) { static u8 key[TCP_FASTOPEN_KEY_LENGTH]; @@ -23,7 +23,7 @@ void tcp_fastopen_init_key_once(bool publish) * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ - if (net_get_random_once(key, sizeof(key)) && publish) + if (net_get_random_once(key, sizeof(key))) tcp_fastopen_reset_cipher(key, sizeof(key)); } From 437138485656c41e32b8c63c0987cfa0348be0e6 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:42 +0800 Subject: [PATCH 0420/2177] ipv4: Namespaceify tcp_fastopen_key knob Different namespace application might require different tcp_fastopen_key independently of the host. David Miller pointed out there is a leak without releasing the context of tcp_fastopen_key during netns teardown. So add the release action in exit_batch path. Tested: 1. Container namespace: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 2817fff2-f803cf97-eadfd1f3-78c0992b cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: 1e5dd82a8c492ca9 2. Host: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 107d7c5f-68eb2ac7-02fb06e6-ed341702 cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: e213c02bf0afbc8a Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 4 +++ include/net/tcp.h | 6 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 21 +++++++------ net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 64 +++++++++++++++++++++++++------------- net/ipv4/tcp_ipv4.c | 6 ++++ 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 16420ccaef15..7bb9603ff66c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -36,6 +36,8 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +struct tcp_fastopen_context; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -129,6 +131,8 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; + struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + spinlock_t tcp_fastopen_ctx_lock; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index d9376e2458e9..6d25d8305054 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1549,13 +1549,13 @@ struct tcp_fastopen_request { }; void tcp_free_fastopen_req(struct tcp_sock *tp); -extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; -int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_ctx_destroy(struct net *net); +int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(void); +void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e73ce79d7176..43a1bbed7a42 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog) (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); - tcp_fastopen_init_key_once(); + tcp_fastopen_init_key_once(sock_net(sk)); } err = inet_csk_listen_start(sk, backlog); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f6324ead0e19..20e19fe78dbd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } -static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, +static int proc_tcp_fastopen_key(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen); struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; @@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, return -ENOMEM; rcu_read_lock(); - ctxt = rcu_dereference(tcp_fastopen_ctx); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctxt) memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); else @@ -282,7 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH); } bad_key: @@ -395,12 +397,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen_key", - .mode = 0600, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), - .proc_handler = proc_tcp_fastopen_key, - }, { .procname = "tcp_fastopen_blackhole_timeout_sec", .data = &sysctl_tcp_fastopen_blackhole_timeout, @@ -1080,6 +1076,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e395452d69f..23225c98d287 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - tcp_fastopen_init_key_once(); + tcp_fastopen_init_key_once(net); fastopen_queue_tune(sk, val); } else { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8c8f0f0af59d..4eae44ac3cb0 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,13 +9,18 @@ #include #include -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); - -void tcp_fastopen_init_key_once(void) +void tcp_fastopen_init_key_once(struct net *net) { - static u8 key[TCP_FASTOPEN_KEY_LENGTH]; + u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct tcp_fastopen_context *ctxt; + + rcu_read_lock(); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctxt) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); /* tcp_fastopen_reset_cipher publishes the new context * atomically, so we allow this race happening here. @@ -23,8 +28,8 @@ void tcp_fastopen_init_key_once(void) * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ - if (net_get_random_once(key, sizeof(key))) - tcp_fastopen_reset_cipher(key, sizeof(key)); + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(net, key, sizeof(key)); } static void tcp_fastopen_ctx_free(struct rcu_head *head) @@ -35,7 +40,22 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head) kfree(ctx); } -int tcp_fastopen_reset_cipher(void *key, unsigned int len) +void tcp_fastopen_ctx_destroy(struct net *net) +{ + struct tcp_fastopen_context *ctxt; + + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); + + ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + + if (ctxt) + call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); +} + +int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len) { int err; struct tcp_fastopen_context *ctx, *octx; @@ -59,26 +79,27 @@ error: kfree(ctx); } memcpy(ctx->key, key, len); - spin_lock(&tcp_fastopen_ctx_lock); + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - octx = rcu_dereference_protected(tcp_fastopen_ctx, - lockdep_is_held(&tcp_fastopen_ctx_lock)); - rcu_assign_pointer(tcp_fastopen_ctx, ctx); - spin_unlock(&tcp_fastopen_ctx_lock); + octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); return err; } -static bool __tcp_fastopen_cookie_gen(const void *path, +static bool __tcp_fastopen_cookie_gen(struct net *net, + const void *path, struct tcp_fastopen_cookie *foc) { struct tcp_fastopen_context *ctx; bool ok = false; rcu_read_lock(); - ctx = rcu_dereference(tcp_fastopen_ctx); + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctx) { crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; @@ -94,7 +115,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path, * * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. */ -static bool tcp_fastopen_cookie_gen(struct request_sock *req, +static bool tcp_fastopen_cookie_gen(struct net *net, + struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) { @@ -102,7 +124,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct iphdr *iph = ip_hdr(syn); __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - return __tcp_fastopen_cookie_gen(path, foc); + return __tcp_fastopen_cookie_gen(net, path, foc); } #if IS_ENABLED(CONFIG_IPV6) @@ -110,13 +132,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct ipv6hdr *ip6h = ipv6_hdr(syn); struct tcp_fastopen_cookie tmp; - if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) { struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - return __tcp_fastopen_cookie_gen(buf, foc); + return __tcp_fastopen_cookie_gen(net, buf, foc); } } #endif @@ -296,7 +318,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ - tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 88409b13c9d2..49c74c0d0d21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2473,6 +2473,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; + spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); return 0; fail: @@ -2483,7 +2484,12 @@ fail: static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { + struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + + list_for_each_entry(net, net_exit_list, exit_list) + tcp_fastopen_ctx_destroy(net); } static struct pernet_operations __net_initdata tcp_sk_ops = { From 3733be14a32bae288b61ed28341e593baba983af Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:43 +0800 Subject: [PATCH 0421/2177] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob Different namespace application might require different time period in second to disable Fastopen on active TCP sockets. Tested: Simulate following similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] And then print netstat of TCPFastOpenBlackhole, the counter increased as expected when the firewall blackhole issue is detected and active TFO is disabled. # cat /proc/net/netstat | awk '{print $91}' TCPFastOpenBlackhole 1 Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++--------- net/ipv4/tcp_fastopen.c | 30 +++++++++++------------------- net/ipv4/tcp_ipv4.c | 2 ++ 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7bb9603ff66c..2c4222a5d102 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,9 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; + atomic_t tfo_active_disable_times; + unsigned long tfo_active_disable_stamp; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 20e19fe78dbd..cac8dd309f39 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen_blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - tcp_fastopen_active_timeout_reset(); + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return ret; } @@ -397,14 +399,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, @@ -1083,6 +1077,14 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4eae44ac3cb0..de470e7e586f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -422,25 +422,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect); * TFO connection with data exchanges. */ -/* Default to 1hr */ -unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; -static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); -static unsigned long tfo_active_disable_stamp __read_mostly; - /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ void tcp_fastopen_active_disable(struct sock *sk) { - atomic_inc(&tfo_active_disable_times); - tfo_active_disable_stamp = jiffies; - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); -} + struct net *net = sock_net(sk); -/* Reset tfo_active_disable_times to 0 */ -void tcp_fastopen_active_timeout_reset(void) -{ - atomic_set(&tfo_active_disable_times, 0); + atomic_inc(&net->ipv4.tfo_active_disable_times); + net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Calculate timeout for tfo active disable @@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - int tfo_da_times = atomic_read(&tfo_active_disable_times); - int multiplier; + unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int multiplier; if (!tfo_da_times) return false; /* Limit timout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; - if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + timeout = multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) return true; /* Mark check bit so we can check for successful active TFO @@ -495,10 +487,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } } } else if (tp->syn_fastopen_ch && - atomic_read(&tfo_active_disable_times)) { + atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) - tcp_fastopen_active_timeout_reset(); + atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 49c74c0d0d21..ad3b5bbaf942 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2474,6 +2474,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return 0; fail: From b80ccfe9bbcac70e66fdfaef73f0988a27f9a68c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 26 Sep 2017 20:37:22 -0700 Subject: [PATCH 0422/2177] net-ipv6: remove unused IP6_ECN_clear() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is unused, and furthermore it is buggy since it suffers from the same issue that requires IP6_ECN_set_ce() to take a pointer to the skb so that it may (in case of CHECKSUM_COMPLETE) update skb->csum Instead of fixing it, let's just outright remove it. Tested: builds, and 'git grep IP6_ECN_clear' comes up empty Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index dce2d586d9ce..f5ff16d72fe6 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -133,11 +133,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) return 1; } -static inline void IP6_ECN_clear(struct ipv6hdr *iph) -{ - *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20); -} - static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; From 9c86b846ce02f7e35d7234cf090b80553eba5389 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:15 +0200 Subject: [PATCH 0423/2177] bcm63xx_enet: correct clock usage Check the return code of prepare_enable and change one last instance of enable only to prepare_enable. Also properly disable and release the clock in error paths and on remove for enetsw. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 33 ++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index c6221f04a748..a45ec97b5b1e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1773,7 +1773,9 @@ static int bcm_enet_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out; } - clk_prepare_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk_mac; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1805,9 +1807,11 @@ static int bcm_enet_probe(struct platform_device *pdev) if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; - goto out_put_clk_mac; + goto out_disable_clk_mac; } - clk_prepare_enable(priv->phy_clk); + ret = clk_prepare_enable(priv->phy_clk); + if (ret) + goto out_put_clk_phy; } /* do minimal hardware init to be able to probe mii bus */ @@ -1900,13 +1904,16 @@ out_free_mdio: out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) { + if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); - clk_put(priv->phy_clk); - } -out_put_clk_mac: +out_put_clk_phy: + if (priv->phy_clk) + clk_put(priv->phy_clk); + +out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); +out_put_clk_mac: clk_put(priv->mac_clk); out: free_netdev(dev); @@ -2748,7 +2755,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out_unmap; } - clk_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk; priv->rx_chan = 0; priv->tx_chan = 1; @@ -2769,7 +2778,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = register_netdev(dev); if (ret) - goto out_put_clk; + goto out_disable_clk; netif_carrier_off(dev); platform_set_drvdata(pdev, dev); @@ -2778,6 +2787,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) return 0; +out_disable_clk: + clk_disable_unprepare(priv->mac_clk); + out_put_clk: clk_put(priv->mac_clk); @@ -2809,6 +2821,9 @@ static int bcm_enetsw_remove(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); + clk_disable_unprepare(priv->mac_clk); + clk_put(priv->mac_clk); + free_netdev(dev); return 0; } From d6213c1f2ad54a964b77471690264ed685718928 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:16 +0200 Subject: [PATCH 0424/2177] bcm63xx_enet: do not write to random DMA channel on BCM6345 The DMA controller regs actually point to DMA channel 0, so the write to ENETDMA_CFG_REG will actually modify a random DMA channel. Since DMA controller registers do not exist on BCM6345, guard the write with the usual check for dma_has_sram. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a45ec97b5b1e..a1e1e12e187a 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1062,7 +1062,8 @@ static int bcm_enet_open(struct net_device *dev) val = enet_readl(priv, ENET_CTL_REG); val |= ENET_CTL_ENABLE_MASK; enet_writel(priv, val, ENET_CTL_REG); - enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); + if (priv->dma_has_sram) + enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); enet_dmac_writel(priv, priv->dma_chan_en_mask, ENETDMAC_CHANCFG, priv->rx_chan); From 527a48713b01057d94aeec8f4383b1e20c82522c Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:17 +0200 Subject: [PATCH 0425/2177] bcm63xx_enet: do not rely on probe order Do not rely on the shared device being probed before the enet(sw) devices. This makes it easier to eventually move out the shared device as a dma controller driver (what it should be). Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a1e1e12e187a..8caf6abab3a6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1722,10 +1722,8 @@ static int bcm_enet_probe(struct platform_device *pdev) const char *clk_name; int i, ret; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1); @@ -2696,11 +2694,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev) struct resource *res_mem; int ret, irq_rx, irq_tx; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) - */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq_rx = platform_get_irq(pdev, 0); From 7e697ce99ceb09538cdc1dfa9ebb3db60236b0a7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:18 +0200 Subject: [PATCH 0426/2177] bcm63xx_enet: use managed functions for clock/ioremap Use managed functions where possible to reduce the amount of resource handling on error and remove paths. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 54 +++++--------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8caf6abab3a6..059ef4f1d137 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1767,14 +1767,14 @@ static int bcm_enet_probe(struct platform_device *pdev) clk_name = "enet1"; } - priv->mac_clk = clk_get(&pdev->dev, clk_name); + priv->mac_clk = devm_clk_get(&pdev->dev, clk_name); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); goto out; } ret = clk_prepare_enable(priv->mac_clk); if (ret) - goto out_put_clk_mac; + goto out; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1802,7 +1802,7 @@ static int bcm_enet_probe(struct platform_device *pdev) if (priv->mac_id == 0 && priv->has_phy && !priv->use_external_mii) { /* using internal PHY, enable clock */ - priv->phy_clk = clk_get(&pdev->dev, "ephy"); + priv->phy_clk = devm_clk_get(&pdev->dev, "ephy"); if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; @@ -1810,7 +1810,7 @@ static int bcm_enet_probe(struct platform_device *pdev) } ret = clk_prepare_enable(priv->phy_clk); if (ret) - goto out_put_clk_phy; + goto out_disable_clk_mac; } /* do minimal hardware init to be able to probe mii bus */ @@ -1906,14 +1906,8 @@ out_uninit_hw: if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); -out_put_clk_phy: - if (priv->phy_clk) - clk_put(priv->phy_clk); - out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); -out_put_clk_mac: - clk_put(priv->mac_clk); out: free_netdev(dev); return ret; @@ -1949,12 +1943,10 @@ static int bcm_enet_remove(struct platform_device *pdev) } /* disable hw block clocks */ - if (priv->phy_clk) { + if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); - clk_put(priv->phy_clk); - } + clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); free_netdev(dev); return 0; @@ -2734,26 +2726,20 @@ static int bcm_enetsw_probe(struct platform_device *pdev) if (ret) goto out; - if (!request_mem_region(res_mem->start, resource_size(res_mem), - "bcm63xx_enetsw")) { - ret = -EBUSY; + priv->base = devm_ioremap_resource(&pdev->dev, res_mem); + if (IS_ERR(priv->base)) { + ret = PTR_ERR(priv->base); goto out; } - priv->base = ioremap(res_mem->start, resource_size(res_mem)); - if (priv->base == NULL) { - ret = -ENOMEM; - goto out_release_mem; - } - - priv->mac_clk = clk_get(&pdev->dev, "enetsw"); + priv->mac_clk = devm_clk_get(&pdev->dev, "enetsw"); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); - goto out_unmap; + goto out; } ret = clk_prepare_enable(priv->mac_clk); if (ret) - goto out_put_clk; + goto out; priv->rx_chan = 0; priv->tx_chan = 1; @@ -2785,15 +2771,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev) out_disable_clk: clk_disable_unprepare(priv->mac_clk); - -out_put_clk: - clk_put(priv->mac_clk); - -out_unmap: - iounmap(priv->base); - -out_release_mem: - release_mem_region(res_mem->start, resource_size(res_mem)); out: free_netdev(dev); return ret; @@ -2805,20 +2782,13 @@ static int bcm_enetsw_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; - struct resource *res; /* stop netdevice */ dev = platform_get_drvdata(pdev); priv = netdev_priv(dev); unregister_netdev(dev); - /* release device resources */ - iounmap(priv->base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); free_netdev(dev); return 0; From 4e78e5c5d881bf2d6267545a554c1baf245257b7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:19 +0200 Subject: [PATCH 0427/2177] bcm63xx_enet: drop unneeded NULL phy_clk check clk_disable and clk_unprepare are NULL-safe, so need to duplicate the NULL check of the functions. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 059ef4f1d137..f6bc13fe8a99 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1903,8 +1903,7 @@ out_free_mdio: out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) - clk_disable_unprepare(priv->phy_clk); + clk_disable_unprepare(priv->phy_clk); out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); @@ -1943,9 +1942,7 @@ static int bcm_enet_remove(struct platform_device *pdev) } /* disable hw block clocks */ - if (priv->phy_clk) - clk_disable_unprepare(priv->phy_clk); - + clk_disable_unprepare(priv->phy_clk); clk_disable_unprepare(priv->mac_clk); free_netdev(dev); From 840f922317fb5c20841d6d7f3853ead506546ade Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:20 +0200 Subject: [PATCH 0428/2177] bcm63xx_enet: remove unneeded include We don't use anyhing from that file, so drop it. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index 0a1b7b2e55bd..dd6ae3077433 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -8,7 +8,6 @@ #include #include -#include #include #include From 45bfbc013b4294cadafbef821d377d3a99c7ab1e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 1 Oct 2017 17:27:35 +0100 Subject: [PATCH 0429/2177] mlxsw: spectrum: fix uninitialized value in err In the unlikely event that mfc->mfc_un.res.ttls[i] is 255 for all values of i from 0 to MAXIVS-1, the err is not set at all and hence has a garbage value on the error return at the end of the function, so initialize it to 0. Also, the error return check on err and goto to err: inside the for loop makes it impossible for err to be zero at the end of the for loop, so we can remove the redundant err check at the end of the loop. Detected by CoverityScan CID#1457207 ("Unitialized scalar value") Fixes: c011ec1bbfd6 ("mlxsw: spectrum: Add the multicast routing offloading logic") Signed-off-by: Colin Ian King Reviewed-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 09120259a45d..4aaf6ca1be7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -349,7 +349,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, { struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; struct mlxsw_sp_mr_route *mr_route; - int err; + int err = 0; int i; /* Allocate and init a new route and fill it with parameters */ @@ -376,8 +376,6 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, } } mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); - if (err) - goto err; mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); return mr_route; From 0929567a7a2dab8455a7313956973ff0d339709a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Oct 2017 14:07:34 -0700 Subject: [PATCH 0430/2177] samples/bpf: fix warnings in xdp_monitor_user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make local functions static to fix HOSTCC samples/bpf/xdp_monitor_user.o samples/bpf/xdp_monitor_user.c:64:7: warning: no previous prototype for ‘gettime’ [-Wmissing-prototypes] __u64 gettime(void) ^~~~~~~ samples/bpf/xdp_monitor_user.c:209:6: warning: no previous prototype for ‘print_bpf_prog_info’ [-Wmissing-prototypes] void print_bpf_prog_info(void) ^~~~~~~~~~~~~~~~~~~ Fixes: 3ffab5460264 ("samples/bpf: xdp_monitor tool based on tracepoints") Signed-off-by: Stephen Hemminger Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/xdp_monitor_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index b51b4f5e3257..c5ab8b776973 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -61,7 +61,7 @@ static void usage(char *argv[]) } #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ -__u64 gettime(void) +static __u64 gettime(void) { struct timespec t; int res; @@ -206,7 +206,7 @@ static void stats_poll(int interval, bool err_only) } } -void print_bpf_prog_info(void) +static void print_bpf_prog_info(void) { int i; From 504871e602d9a9ea2321d47ca506887417f54e75 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Tue, 26 Sep 2017 00:39:15 +0530 Subject: [PATCH 0431/2177] mac80211: fix bandwidth computation for TDLS peers Section 11.23.1 of 80211-2016 specification allows TDLS peers to operate on wider bandwidths though they are connected to a BSS which do not support wider bandwidth operations, provided both the peers advertise wider bandwidth capabilities. The existing logic considers the minimum of station's and AP's capability for bandwidth computation. The same logic applies for TDLS peers as well, this restricts operating on wider bandwidths over a TDLS link when the peers are connected to legacy APs. As an example, if 80Mhz VHT capable peers are connected to a 20Mhz 5 GHz AP, then as per the existing logic TDLS operation will be restricted to 20Mhz. Address this problem by not considering BSS capability in bandwidth computation if the participating TDLS peers have wider bandwidth capability. Signed-off-by: Manikanta Pubbisetty [lots of wording/typo fixes] Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 19ec2189d3ac..b9276ac849fa 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); + + /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of + * IEEE80211-2016 specification makes higher bandwidth operation + * possible on the TDLS link if the peers have wider bandwidth + * capability. + */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) + return bw; + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; From 8f797c288e3a788e9578dd4db08b624a6d4b6a9b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 26 Sep 2017 13:48:05 +0200 Subject: [PATCH 0432/2177] mac80211: fix STA_SLOW_THRESHOLD htmldocs failure Patch fixes htmldocs build problem: Error(.//net/mac80211/sta_info.h:416): cannot understand prototype: 'STA_SLOW_THRESHOLD 6000 ' Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3acbdfa9f649..a35c964f6217 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -398,7 +398,7 @@ struct ieee80211_sta_rx_stats { u64 msdu[IEEE80211_NUM_TIDS + 1]; }; -/** +/* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This * value will be scaled by the number of active stations when it is being From 66b1bedf662518e9b6367990a87e9601b35a94c1 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:14 +0300 Subject: [PATCH 0433/2177] ieee80211: Add WFA TPC report element OUI type Add Transmit Power Control OUI type definition for WLAN_OUI_MICROSOFT. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 55a604ad459f..ee6657a0ed69 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2445,6 +2445,7 @@ enum ieee80211_sa_query_action { #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 #define WLAN_OUI_TYPE_MICROSOFT_WPS 4 +#define WLAN_OUI_TYPE_MICROSOFT_TPC 8 /* * WMM/802.11e Tspec Element From 503c1fb98ba3859c13863957c7c65c92371a9e50 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:49 +0200 Subject: [PATCH 0434/2177] cfg80211/nl80211: add a port authorized event Add an event that indicates that a connection is authorized (i.e. the 4 way handshake was performed by the driver). This event should be sent by the driver after sending a connect/roamed event. This is useful for networks that require 802.1X authentication. In cases that the driver supports 4 way handshake offload, but the 802.1X authentication is managed by user space, the driver needs to inform user space right after the 802.11 association was completed so user space can initialize its 802.1X state machine etc. However, it is also possible that the AP will choose to skip the 802.1X authentication (e.g. when PMKSA caching is used) and proceed with the 4 way handshake immediately. In this case the driver needs to inform user space that 802.1X authentication is no longer required (e.g. to prevent user space from disconnecting since it did not get any EAPOLs from the AP). This is also useful for roaming, in which case it is possible that the driver used the Fast Transition protocol so 802.1X is not required. Since there will now be a dedicated notification indicating that the connection is authorized, the authorized flag can be removed from the roamed event. Drivers can send the new port authorized event right after sending the roamed event to indicate the new AP is already authorized. This therefore reserves the old PORT_AUTHORIZED attribute. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++++++---- include/uapi/linux/nl80211.h | 28 +++++++++++++--------- net/wireless/core.h | 5 ++++ net/wireless/nl80211.c | 34 ++++++++++++++++++++++++--- net/wireless/nl80211.h | 2 ++ net/wireless/sme.c | 45 +++++++++++++++++++++++++++++++++++- net/wireless/util.c | 3 +++ 7 files changed, 119 insertions(+), 19 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cc1996081463..8b8118a7fadb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5428,9 +5428,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @authorized: true if the 802.1X authentication was done by the driver or is - * not needed (e.g., when Fast Transition protocol was used), false - * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5440,7 +5437,6 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; - bool authorized; }; /** @@ -5464,6 +5460,23 @@ struct cfg80211_roam_info { void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp); +/** + * cfg80211_port_authorized - notify cfg80211 of successful security association + * + * @dev: network device + * @bssid: the BSSID of the AP + * @gfp: allocation flags + * + * This function should be called by a driver that supports 4 way handshake + * offload after a security association was successfully established (i.e., + * the 4 way handshake was completed successfully). The call to this function + * should be preceded with a call to cfg80211_connect_result(), + * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to + * indicate the 802.11 association. + */ +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp); + /** * cfg80211_disconnected - notify cfg80211 that connection was dropped * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 59ba6ca66a0d..95832ce03a44 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -569,13 +569,14 @@ * authentication/association or not receiving a response from the AP. * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as * well to remain backwards compatible. - * @NL80211_CMD_ROAM: notifcation indicating the card/driver roamed by itself. - * When the driver roamed in a network that requires 802.1X authentication, - * %NL80211_ATTR_PORT_AUTHORIZED should be set if the 802.1X authentication - * was done by the driver or if roaming was done using Fast Transition - * protocol (in which case 802.1X authentication is not needed). If - * %NL80211_ATTR_PORT_AUTHORIZED is not set, user space is responsible for - * the 802.1X authentication. + * When establishing a security association, drivers that support 4 way + * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when + * the 4 way handshake is completed successfully. + * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself. + * When a security association was established with the new AP (e.g. if + * the FT protocol was used for roaming or the driver completed the 4 way + * handshake), this event should be followed by an + * %NL80211_CMD_PORT_AUTHORIZED event. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify * userspace that a connection was dropped by the AP or due to other * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and @@ -982,6 +983,12 @@ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by * &NL80211_ATTR_MAC. + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way + * handshake was completed successfully by the driver. The BSSID is + * specified with &NL80211_ATTR_MAC. Drivers that support 4 way handshake + * offload should send this event after indicating 802.11 association with + * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed + * &NL80211_CMD_DISCONNECT should be indicated instead. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1185,6 +1192,8 @@ enum nl80211_commands { NL80211_CMD_SET_PMK, NL80211_CMD_DEL_PMK, + NL80211_CMD_PORT_AUTHORIZED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2138,10 +2147,7 @@ enum nl80211_commands { * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it * wants to use the supported offload of the 4-way handshake. * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. - * @NL80211_ATTR_PORT_AUTHORIZED: flag attribute used in %NL80211_CMD_ROAMED - * notification indicating that that 802.1X authentication was done by - * the driver or is not needed (because roaming used the Fast Transition - * protocol). + * @NL80211_ATTR_PORT_AUTHORIZED: (reserved) * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined diff --git a/net/wireless/core.h b/net/wireless/core.h index 6e809325af3b..35165f42c2a8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -216,6 +216,7 @@ enum cfg80211_event_type { EVENT_DISCONNECTED, EVENT_IBSS_JOINED, EVENT_STOPPED, + EVENT_PORT_AUTHORIZED, }; struct cfg80211_event { @@ -235,6 +236,9 @@ struct cfg80211_event { u8 bssid[ETH_ALEN]; struct ieee80211_channel *channel; } ij; + struct { + u8 bssid[ETH_ALEN]; + } pa; }; }; @@ -385,6 +389,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e39ba3cfd06..90e212db6889 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13830,9 +13830,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, - info->resp_ie)) || - (info->authorized && - nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED))) + info->resp_ie))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13846,6 +13844,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b96933322077..bf9e772a30b9 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -58,6 +58,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0a49b88070d0..f38ed490e42b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -960,7 +960,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); ev->rm.bss = info->bss; - ev->rm.authorized = info->authorized; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); @@ -969,6 +968,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, } EXPORT_SYMBOL(cfg80211_roamed); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) +{ + ASSERT_WDEV_LOCK(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(!wdev->current_bss) || + WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + return; + + nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, + bssid); +} + +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + if (WARN_ON(!bssid)) + return; + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_PORT_AUTHORIZED; + memcpy(ev->pa.bssid, bssid, ETH_ALEN); + + /* + * Use the wdev event list so that if there are pending + * connected/roamed events, they will be reported first. + */ + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_port_authorized); + void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 7a1fcc6ee060..ff21c314a609 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -846,6 +846,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) case EVENT_STOPPED: __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; + case EVENT_PORT_AUTHORIZED: + __cfg80211_port_authorized(wdev, ev->pa.bssid); + break; } wdev_unlock(wdev); From 73f2c8e933b1dcf432ac8c6965a6e67af630077f Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:22 -0700 Subject: [PATCH 0435/2177] brcmfmac: Avoid possible out-of-bounds read In brcmf_p2p_notify_rx_mgmt_p2p_probereq(), chanspec is assigned before the length of rxframe is validated. This could lead to uninitialized data being accessed (but not printed). Since we already have a perfectly good endian-swapped copy of rxframe->chanspec in ch.chspec, and ch.chspec is not modified by decchspec(), avoid the extra assignment and use ch.chspec in the debug print. Suggested-by: Mattias Nissler Signed-off-by: Kevin Cernekee Reviewed-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 2ce675ab40ef..1c450c0727cb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1853,7 +1853,6 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp, struct afx_hdl *afx_hdl = &p2p->afx_hdl; struct brcmf_cfg80211_vif *vif = ifp->vif; struct brcmf_rx_mgmt_data *rxframe = (struct brcmf_rx_mgmt_data *)data; - u16 chanspec = be16_to_cpu(rxframe->chanspec); struct brcmu_chan ch; u8 *mgmt_frame; u32 mgmt_frame_len; @@ -1906,7 +1905,7 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp, cfg80211_rx_mgmt(&vif->wdev, freq, 0, mgmt_frame, mgmt_frame_len, 0); brcmf_dbg(INFO, "mgmt_frame_len (%d) , e->datalen (%d), chanspec (%04x), freq (%d)\n", - mgmt_frame_len, e->datalen, chanspec, freq); + mgmt_frame_len, e->datalen, ch.chspec, freq); return 0; } From a7c9acc452b21b56c99dd7dfe0ab542f7baa6563 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:23 -0700 Subject: [PATCH 0436/2177] brcmfmac: Delete redundant length check brcmf_fweh_process_event() sets event->datalen to the endian-swapped value of event_packet->msg.datalen, which is the same as emsg.datalen. This length is already validated in brcmf_fweh_process_event(), so there is no need to check it again upon dequeuing the event. Suggested-by: Arend van Spriel Signed-off-by: Kevin Cernekee Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 4eb1e1ce9ace..27e661fa356f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -257,11 +257,6 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_dbg_hex_dump(BRCMF_EVENT_ON(), event->data, min_t(u32, emsg.datalen, 64), "event payload, len=%d\n", emsg.datalen); - if (emsg.datalen > event->datalen) { - brcmf_err("event invalid length header=%d, msg=%d\n", - event->datalen, emsg.datalen); - goto event_free; - } /* special handling of interface event */ if (event->code == BRCMF_E_IF) { From 17a91809942ca32c70026d2d5ba3348a2c4fdf8f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 2 Oct 2017 07:17:50 -0700 Subject: [PATCH 0437/2177] fm10k: ensure we process SM mbx when processing VF mbx When we process VF mailboxes, the driver is likely going to also queue up messages to the switch manager. This process merely queues up the FIFO, but doesn't actually begin the transmission process. Because we hold the mailbox lock during this VF processing, the PF<->SM mailbox is not getting processed at this time. Ensure that we actually process the PF<->SM mailbox in between each PF<->VF mailbox. This should ensure prompt transmission of the messages queued up after each VF message is received and handled. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 5f4dac0d36ef..2ec49116fe91 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -126,6 +126,9 @@ process_mbx: struct fm10k_mbx_info *mbx = &vf_info->mbx; u16 glort = vf_info->glort; + /* process the SM mailbox first to drain outgoing messages */ + hw->mbx.ops.process(hw, &hw->mbx); + /* verify port mapping is valid, if not reset port */ if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) hw->iov.ops.reset_lport(hw, vf_info); From b52b7f7059f2df8eb3258a25bc69e12dc21ebcd7 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 8 Mar 2017 15:55:43 -0800 Subject: [PATCH 0438/2177] fm10k: reschedule service event if we stall the PF<->SM mailbox When we are handling PF<->VF mailbox messages, it is possible that the VF will send us so many messages that the PF<->SM FIFO will fill up. In this case, we stop the loop and wait until the service event is rescheduled. Normally this should happen due to an interrupt. But it is possible that we don't get another interrupt for a while and it isn't until the service timer actually reschedules us. Instead, simply reschedule immediately which will cause the service event to be run again as soon as we exit. This ensures that we promptly handle all of the PF<->VF messages with minimal delay, while still giving time for the SM mailbox to drain. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 2ec49116fe91..d8356c494f06 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -143,6 +143,10 @@ process_mbx: if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) { /* keep track of how many times this occurs */ interface->hw_sm_mbx_full++; + + /* make sure we try again momentarily */ + fm10k_service_event_schedule(interface); + break; } From 95f49d4bdee34dd0f68446bb260ab537f62ed9b3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 8 May 2017 18:18:09 +0200 Subject: [PATCH 0439/2177] fm10k: Use seq_putc() in fm10k_dbg_desc_break() Two single characters should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 5116fd043630..14df09e2d964 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -52,9 +52,9 @@ static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s, static void fm10k_dbg_desc_break(struct seq_file *s, int i) { while (i--) - seq_puts(s, "-"); + seq_putc(s, '-'); - seq_puts(s, "\n"); + seq_putc(s, '\n'); } static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v) From 5c66d1251d67714e9f6e6b0af18ca989109b876f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:04 -0700 Subject: [PATCH 0440/2177] fm10k: stop spurious link down messages when Tx FIFO is full In fm10k_get_host_state_generic, we check the mailbox tx_read() function to ensure that the mailbox is still open. This function also checks to make sure we have space to transmit another message. Unfortunately, if we just recently sent a bunch of messages (such as enabling hundreds of VLANs on a VF) this can result in a race where the watchdog task thinks the link went down just because we haven't had time to process all these messages yet. Instead, lets just check whether the mailbox is still open. This ensures that we don't race with the Tx FIFO, and we only link down once the mailbox is not open. This is safe, because if the FIFO fills up and we're unable to send a message for too long, we'll end up triggering the timeout detection which results in a reset. Additionally, since we still check to ensure the mailbox state is OPEN, we'll transition to link down whenever the mailbox closes as well. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 62a6ad9b3eed..736a9f087bc9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -517,8 +517,8 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready) goto out; } - /* verify Mailbox is still valid */ - if (!mbx->ops.tx_ready(mbx, FM10K_VFMBX_MSG_MTU)) + /* verify Mailbox is still open */ + if (mbx->state != FM10K_STATE_OPEN) goto out; /* interface cannot receive traffic without logical ports */ From 375ce90eab7ee1c87eefa2cd312b0be9ac961082 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:05 -0700 Subject: [PATCH 0441/2177] fm10k: fix typos on fall through comments Newer versions of GCC since version 7 now warn when a case statement may fall through without an explicit comment. "Fallthough" does not count as it is misspelled. Fix the typos for these comments to appease the new warnings. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_mbx.c | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 334088a101c3..244d3ad58ca7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1586,7 +1586,7 @@ s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx, mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0); break; } - /* fallthough */ + /* fall through */ default: return FM10K_MBX_ERR_NO_MBX; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 40ee0242a80a..9e4fb3a44376 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1334,19 +1334,19 @@ static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info, case FM10K_XCAST_MODE_PROMISC: if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE) return FM10K_XCAST_MODE_PROMISC; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_ALLMULTI: if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE) return FM10K_XCAST_MODE_ALLMULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_MULTI: if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE) return FM10K_XCAST_MODE_MULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_NONE: if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE) return FM10K_XCAST_MODE_NONE; - /* fallthough */ + /* fall through */ default: break; } From b94dd008c401fc73a8d843e3219356255f40c1ed Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:06 -0700 Subject: [PATCH 0442/2177] fm10k: avoid possible truncation of q_vector->name New versions of GCC since version 7 began warning about possible truncation of calls to snprintf. We can fix this and avoid false positives. First, we should pass the full buffer size to snprintf, because it guarantees a NULL character as part of its passed length, so passing len-1 is simply wasting a byte of possible storage. Second, if we make the ri and ti variables unsigned, the compiler is able to correctly reason that the value never gets larger than 256, so it doesn't need to warn about the full space required to print a signed integer. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 63784576ae8b..9212b3fa3b62 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1544,7 +1544,7 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) struct net_device *dev = interface->netdev; struct fm10k_hw *hw = &interface->hw; struct msix_entry *entry; - int ri = 0, ti = 0; + unsigned int ri = 0, ti = 0; int vector, err; entry = &interface->msix_entries[NON_Q_VECTORS(hw)]; @@ -1554,15 +1554,15 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) /* name the vector */ if (q_vector->tx.count && q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-TxRx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-TxRx-%u", dev->name, ri++); ti++; } else if (q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-rx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-rx-%u", dev->name, ri++); } else if (q_vector->tx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-tx-%d", dev->name, ti++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-tx-%u", dev->name, ti++); } else { /* skip this unused q_vector */ continue; From 523a0b558db4ca205522976077911e5efe235781 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:07 -0700 Subject: [PATCH 0443/2177] fm10k: add missing fall through comment Newer versions of GCC starting with 7 now additionally warn when a case statement may fall through without an explicit comment mentioning it. Add such a comment to silence the warning, as this is expected. Unfortunately the comment must come directly before the next case statement, so we put it outside the #ifdef. Otherwise, the compiler cannot properly detect it and thus the warning is displayed regardless. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 9dffaba85ae6..189d52a8a605 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -876,6 +876,7 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, case IPPROTO_GRE: if (skb->encapsulation) break; + /* fall through */ default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, From 8bac58be1700dab3cac8cb53ed0651da40777024 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:08 -0700 Subject: [PATCH 0444/2177] fm10k: avoid needless delay when loading driver When we load the driver, we set the last_reset to be in the future, which delays the initial driver reset. Additionally, the service task isn't scheduled to run automatically until the timer runs out. This causes a needless delay of the first reset to begin talking to the switch manager. We can avoid this by simply not setting last_reset and immediately scheduling the service task while in probe. This allows the device to wake up faster, and avoids this delay. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 9212b3fa3b62..6c2c4bffaedf 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1800,9 +1800,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev->vlan_features |= NETIF_F_HIGHDMA; } - /* delay any future reset requests */ - interface->last_reset = jiffies + (10 * HZ); - /* reset and initialize the hardware so it is in a known state */ err = hw->mac.ops.reset_hw(hw); if (err) { @@ -2079,8 +2076,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable SR-IOV after registering netdev to enforce PF/VF ordering */ fm10k_iov_configure(pdev, 0); - /* clear the service task disable bit to allow service task to start */ + /* clear the service task disable bit and kick off service task */ clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); return 0; From 4abf01b43b62525e4f1a20dd1a2bc4a1967d8928 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:09 -0700 Subject: [PATCH 0445/2177] fm10k: simplify reading PFVFLRE register We're doing a really convoluted bitshift and read for the PFVFLRE register. Just reading the PFVFLRE(1), shifting it by 32, then reading PFVFLRE(0) should be sufficient. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index d8356c494f06..dfc88a463735 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -67,10 +67,8 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) /* read VFLRE to determine if any VFs have been reset */ do { - vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(0)); + vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); vflre <<= 32; - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(1)); - vflre = (vflre << 32) | (vflre >> 32); vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); i = iov_data->num_vfs; From d876c1583bb1b7f7264880265b824e88b791aa5d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:10 -0700 Subject: [PATCH 0446/2177] fm10k: don't loop while resetting VFs due to VFLR event We've always had a really weird looping construction for resetting VFs. We read the VFLRE register and reset the VF if the corresponding bit is set, which makes sense. However we loop continuously until we no longer have any bits left unset. At first this makes sense, as a sort of "keep trying until we succeed" concept. Unfortunately this causes a problem if we happen to surprise remove while this code is executing, because in this case we'll always read all 1s for the VFLRE register. This results in a hard lockup on the CPU because the loop will never terminate. Because our own reset function will clear the VFLR event register always, (except when we've lost PCIe link obviously) there is no real reason to loop. In practice, we'll loop over once and find that no VFs are pending anymore. Lets just check once. Since we're clear the notification when we reset there's no benefit to the loop. Additionally, there shouldn't be a race as future VLFRE events should trigger an interrupt. Additionally, we didn't warn or do anything in the looped case anyways. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 24 +++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index dfc88a463735..03897720bf0b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -66,23 +66,21 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) goto read_unlock; /* read VFLRE to determine if any VFs have been reset */ - do { - vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); - vflre <<= 32; - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); + vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); + vflre <<= 32; + vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); - i = iov_data->num_vfs; + i = iov_data->num_vfs; - for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { - struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; + for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { + struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; - if (vflre >= 0) - continue; + if (vflre >= 0) + continue; - hw->iov.ops.reset_resources(hw, vf_info); - vf_info->mbx.ops.connect(hw, &vf_info->mbx); - } - } while (i != iov_data->num_vfs); + hw->iov.ops.reset_resources(hw, vf_info); + vf_info->mbx.ops.connect(hw, &vf_info->mbx); + } read_unlock: rcu_read_unlock(); From dd5eede2b711350f684e8510300cb3762a821ae6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:11 -0700 Subject: [PATCH 0447/2177] fm10k: avoid divide by zero in rare cases when device is resetting It is possible that under rare circumstances the device is undergoing a reset, such as when a PFLR occurs, and the device may be transmitting simultaneously. In this case, we might attempt to divide by zero when finding the proper r_idx. Instead, lets read the num_tx_queues once, and make sure it's non-zero. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index e69d49d91d67..77d495fedced 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -643,9 +643,13 @@ int fm10k_close(struct net_device *netdev) static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) { struct fm10k_intfc *interface = netdev_priv(dev); + int num_tx_queues = READ_ONCE(interface->num_tx_queues); unsigned int r_idx = skb->queue_mapping; int err; + if (!num_tx_queues) + return NETDEV_TX_BUSY; + if ((skb->protocol == htons(ETH_P_8021Q)) && !skb_vlan_tag_present(skb)) { /* FM10K only supports hardware tagging, any tags in frame @@ -698,8 +702,8 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) __skb_put(skb, pad_len); } - if (r_idx >= interface->num_tx_queues) - r_idx %= interface->num_tx_queues; + if (r_idx >= num_tx_queues) + r_idx %= num_tx_queues; err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]); From 65b0a469e9e6d025ce9cc46f6cc94d28abf5561c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:12 -0700 Subject: [PATCH 0448/2177] fm10k: move fm10k_prepare_for_reset and fm10k_handle_reset A future patch needs these functions defined earlier in the file. Move them closer to above where they will be called. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 58 ++++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 6c2c4bffaedf..41335154d6b1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -132,35 +132,6 @@ static void fm10k_service_timer(unsigned long data) fm10k_service_event_schedule(interface); } -static void fm10k_detach_subtask(struct fm10k_intfc *interface) -{ - struct net_device *netdev = interface->netdev; - u32 __iomem *hw_addr; - u32 value; - - /* do nothing if device is still present or hw_addr is set */ - if (netif_device_present(netdev) || interface->hw.hw_addr) - return; - - /* check the real address space to see if we've recovered */ - hw_addr = READ_ONCE(interface->uc_addr); - value = readl(hw_addr); - if (~value) { - interface->hw.hw_addr = interface->uc_addr; - netif_device_attach(netdev); - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); - netdev_warn(netdev, "PCIe link restored, device now attached\n"); - return; - } - - rtnl_lock(); - - if (netif_running(netdev)) - dev_close(netdev); - - rtnl_unlock(); -} - static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) { struct net_device *netdev = interface->netdev; @@ -270,6 +241,35 @@ reinit_err: return err; } +static void fm10k_detach_subtask(struct fm10k_intfc *interface) +{ + struct net_device *netdev = interface->netdev; + u32 __iomem *hw_addr; + u32 value; + + /* do nothing if device is still present or hw_addr is set */ + if (netif_device_present(netdev) || interface->hw.hw_addr) + return; + + /* check the real address space to see if we've recovered */ + hw_addr = READ_ONCE(interface->uc_addr); + value = readl(hw_addr); + if (~value) { + interface->hw.hw_addr = interface->uc_addr; + netif_device_attach(netdev); + set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); + netdev_warn(netdev, "PCIe link restored, device now attached\n"); + return; + } + + rtnl_lock(); + + if (netif_running(netdev)) + dev_close(netdev); + + rtnl_unlock(); +} + static void fm10k_reinit(struct fm10k_intfc *interface) { int err; From 04914390f5a197da1e042f585e1263ad8ebff632 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:13 -0700 Subject: [PATCH 0449/2177] fm10k: prevent race condition of __FM10K_SERVICE_SCHED Although very unlikely, it is possible that cancel_work_sync() may stop the service_task before it actually started. In this case, the __FM10K_SERVICE_SCHED bit will never be cleared. This results in the service task being unable to reschedule in the future. Add a helper function which sets the service disable bit, waits for the service task to stop and clears the schedule bit, thus avoiding the race condition. We know the schedule bit is safe to clear because the cancel_work_sync() guarantees the service task is not running. Add a helper function also to restart the service task, for symmetry. This is not strictly needed but helps the mental model of how to stop and start the service task. This race could only happen in fm10k_suspend/fm10k_resume as this is the only place where the service task is actually restarted. Thus, suspend/resume testing would be ideal. However, note that the chance of this happening is very slim as the service event is scheduled for immediate execution, and you would have to trigger a suspend at almost the exact same time as the service task was scheduled. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 32 +++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 41335154d6b1..9575f7c1862d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -118,6 +118,27 @@ static void fm10k_service_event_complete(struct fm10k_intfc *interface) fm10k_service_event_schedule(interface); } +static void fm10k_stop_service_event(struct fm10k_intfc *interface) +{ + set_bit(__FM10K_SERVICE_DISABLE, interface->state); + cancel_work_sync(&interface->service_task); + + /* It's possible that cancel_work_sync stopped the service task from + * running before it could actually start. In this case the + * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that + * the service task cannot be running at this point, we need to clear + * the scheduled bit, as otherwise the service task may never be + * restarted. + */ + clear_bit(__FM10K_SERVICE_SCHED, interface->state); +} + +static void fm10k_start_service_event(struct fm10k_intfc *interface) +{ + clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); +} + /** * fm10k_service_timer - Timer Call-back * @data: pointer to interface cast into an unsigned long @@ -2116,8 +2137,7 @@ static void fm10k_remove(struct pci_dev *pdev) del_timer_sync(&interface->service_timer); - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); /* free netdev, this may bounce the interrupts due to setup_tc */ if (netdev->reg_state == NETREG_REGISTERED) @@ -2155,8 +2175,7 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) * stopped. We stop the watchdog task until after we resume software * activity. */ - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); fm10k_prepare_for_reset(interface); } @@ -2183,9 +2202,8 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) interface->link_down_event = jiffies + (HZ); set_bit(__FM10K_LINK_DOWN, interface->state); - /* clear the service task disable bit to allow service task to start */ - clear_bit(__FM10K_SERVICE_DISABLE, interface->state); - fm10k_service_event_schedule(interface); + /* restart the service task */ + fm10k_start_service_event(interface); return err; } From 32f16369e59fcc505c5ed93a6a8cad3d5636b463 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Oct 2017 10:41:15 +0200 Subject: [PATCH 0450/2177] net/dst: Make skb parameter of skb{metadata_dst, tunnel_info}() const Make the skb parameter of skb_metadata_dst() and skb_tunnel_info() const as they are not modified. This is in preparation for using them in call-sites where skb is const. Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a803129a4849..9fba2ebf6dda 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -24,7 +24,7 @@ struct metadata_dst { } u; }; -static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); @@ -34,7 +34,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) return NULL; } -static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +static inline struct ip_tunnel_info * +skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; From a38402bc50709aac76796a955a15152a76e3fd4e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Oct 2017 10:41:16 +0200 Subject: [PATCH 0451/2177] flow_dissector: dissect tunnel info Move dissection of tunnel info from the flower classifier to the flow dissector where all other dissection occurs. This should not have any behavioural affect on other users of the flow dissector. Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 100 ++++++++++++++++++++++++++++++++++++++ net/sched/cls_flower.c | 25 ---------- 2 files changed, 100 insertions(+), 25 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0a977373d003..1f5caafb4492 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +static void +skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_control *ctrl; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) + return; + + ctrl = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + target_container); + ctrl->addr_type = type; +} + +static void +__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct ip_tunnel_info *info; + struct ip_tunnel_key *key; + + /* A quick check to see if there might be something to do. */ + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS)) + return; + + info = skb_tunnel_info(skb); + if (!info) + return; + + key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *ipv4; + + ipv4 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target_container); + ipv4->src = key->u.ipv4.src; + ipv4->dst = key->u.ipv4.dst; + } + break; + case AF_INET6: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *ipv6; + + ipv6 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + target_container); + ipv6->src = key->u.ipv6.src; + ipv6->dst = key->u.ipv6.dst; + } + break; + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *keyid; + + keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target_container); + keyid->keyid = tunnel_id_to_key32(key->tun_id); + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *tp; + + tp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + target_container); + tp->src = key->tp_src; + tp->dst = key->tp_dst; + } +} + static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -478,6 +575,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + __skb_flow_dissect_tunnel_info(skb, flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d230cb4c8094..db831ac708f6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -152,37 +152,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); - info = skb_tunnel_info(skb); - if (info) { - struct ip_tunnel_key *key = &info->key; - - switch (ip_tunnel_info_af(info)) { - case AF_INET: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV4_ADDRS; - skb_key.enc_ipv4.src = key->u.ipv4.src; - skb_key.enc_ipv4.dst = key->u.ipv4.dst; - break; - case AF_INET6: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV6_ADDRS; - skb_key.enc_ipv6.src = key->u.ipv6.src; - skb_key.enc_ipv6.dst = key->u.ipv6.dst; - break; - } - - skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); - skb_key.enc_tp.src = key->tp_src; - skb_key.enc_tp.dst = key->tp_dst; - } - skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. From 6227efc1a20b24a21ce8e9122c0569696d644aff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 2 Oct 2017 12:05:29 +0200 Subject: [PATCH 0452/2177] selftests: rtnetlink.sh: add vxlan and fou test cases fou test lifted from ip-fou man page. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 96 ++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index a048f7a5f94c..62c87da92770 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -332,6 +332,101 @@ kci_test_vrf() echo "PASS: vrf" } +kci_test_encap_vxlan() +{ + ret=0 + vxlan="test-vxlan0" + vlan="test-vlan0" + testns="$1" + + ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ + dev "$devdummy" dstport 4789 2>/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: can't add vxlan interface, skipping test" + return 0 + fi + check_err $? + + ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link set up dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1 + check_err $? + + ip netns exec "$testns" ip link del "$vxlan" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: vxlan" + return 1 + fi + echo "PASS: vxlan" +} + +kci_test_encap_fou() +{ + ret=0 + name="test-fou" + testns="$1" + + ip fou help 2>&1 |grep -q 'Usage: ip fou' + if [ $? -ne 0 ];then + echo "SKIP: fou: iproute2 too old" + return 1 + fi + + ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null + if [ $? -ne 0 ];then + echo "FAIL: can't add fou port 7777, skipping test" + return 1 + fi + + ip netns exec "$testns" ip fou add port 8888 ipproto 4 + check_err $? + + ip netns exec "$testns" ip fou del port 9999 2>/dev/null + check_fail $? + + ip netns exec "$testns" ip fou del port 7777 + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: fou" + return 1 + fi + + echo "PASS: fou" +} + +# test various encap methods, use netns to avoid unwanted interference +kci_test_encap() +{ + testns="testns" + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP encap tests: cannot add net namespace $testns" + return 1 + fi + + ip netns exec "$testns" ip link set lo up + check_err $? + + ip netns exec "$testns" ip link add name "$devdummy" type dummy + check_err $? + ip netns exec "$testns" ip link set "$devdummy" up + check_err $? + + kci_test_encap_vxlan "$testns" + kci_test_encap_fou "$testns" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -348,6 +443,7 @@ kci_test_rtnl() kci_test_addrlabel kci_test_ifalias kci_test_vrf + kci_test_encap kci_del_dummy } From 7ff176f81dd4675d08951d2395c7ddb1d9974da6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:21:57 +0200 Subject: [PATCH 0453/2177] mlxsw: spectrum_router: Fix a typo Signed-off-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ef4b86b3aa9b..75078a3fbee8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1680,7 +1680,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n"); break; } num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); From 85f44a15b1920babc2d28c11f514f5b217a29968 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:21:58 +0200 Subject: [PATCH 0454/2177] mlxsw: spectrum_router: Drop a redundant condition Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 75078a3fbee8..58bc04cbbef4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3251,7 +3251,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, return; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) + else mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: From f2f2efb807d339513199b1bb771806c90cce83ae Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:28 +0300 Subject: [PATCH 0455/2177] byteorder: Move {cpu_to_be32, be32_to_cpu}_array() from Thunderbolt to core We will be using these when communicating XDomain discovery protocol over Thunderbolt link but they might be useful for other drivers as well. Make them available through byteorder/generic.h. Suggested-by: Andy Shevchenko Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 14 -------------- include/linux/byteorder/generic.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index fb40dd0588b9..e6a4c9458c76 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -289,20 +289,6 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, } } -static void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = cpu_to_be32(src[i]); -} - -static void be32_to_cpu_array(u32 *dst, __be32 *src, size_t len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = be32_to_cpu(src[i]); -} - static __be32 tb_crc(const void *data, size_t len) { return cpu_to_be32(~__crc32c_le(~0, data, len)); diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 89f67c1c3160..805d16654459 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -170,4 +170,20 @@ static inline void be64_add_cpu(__be64 *var, u64 val) *var = cpu_to_be64(be64_to_cpu(*var) + val); } +static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = cpu_to_be32(src[i]); +} + +static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = be32_to_cpu(src[i]); +} + #endif /* _LINUX_BYTEORDER_GENERIC_H */ From 806717081ab6088957c4857d42941159a07cc571 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:29 +0300 Subject: [PATCH 0456/2177] thunderbolt: Remove __packed from ICM message structures These messages are all 32-bit aligned and they should be packed without the __packed attribute just fine. It also allows compiler to generate better code on some architectures. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/tb_msgs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index de6441e4a060..f3adf58a40ce 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -130,7 +130,7 @@ struct icm_pkg_header { u8 flags; u8 packet_id; u8 total_packets; -} __packed; +}; #define ICM_FLAGS_ERROR BIT(0) #define ICM_FLAGS_NO_KEY BIT(1) @@ -139,20 +139,20 @@ struct icm_pkg_header { struct icm_pkg_driver_ready { struct icm_pkg_header hdr; -} __packed; +}; struct icm_pkg_driver_ready_response { struct icm_pkg_header hdr; u8 romver; u8 ramver; u16 security_level; -} __packed; +}; /* Falcon Ridge & Alpine Ridge common messages */ struct icm_fr_pkg_get_topology { struct icm_pkg_header hdr; -} __packed; +}; #define ICM_GET_TOPOLOGY_PACKETS 14 @@ -167,7 +167,7 @@ struct icm_fr_pkg_get_topology_response { u32 reserved[2]; u32 ports[16]; u32 port_hop_info[16]; -} __packed; +}; #define ICM_SWITCH_USED BIT(0) #define ICM_SWITCH_UPSTREAM_PORT_MASK GENMASK(7, 1) @@ -184,7 +184,7 @@ struct icm_fr_event_device_connected { u8 connection_id; u16 link_info; u32 ep_name[55]; -} __packed; +}; #define ICM_LINK_INFO_LINK_MASK 0x7 #define ICM_LINK_INFO_DEPTH_SHIFT 4 @@ -197,13 +197,13 @@ struct icm_fr_pkg_approve_device { u8 connection_key; u8 connection_id; u16 reserved; -} __packed; +}; struct icm_fr_event_device_disconnected { struct icm_pkg_header hdr; u16 reserved; u16 link_info; -} __packed; +}; struct icm_fr_pkg_add_device_key { struct icm_pkg_header hdr; @@ -212,7 +212,7 @@ struct icm_fr_pkg_add_device_key { u8 connection_id; u16 reserved; u32 key[8]; -} __packed; +}; struct icm_fr_pkg_add_device_key_response { struct icm_pkg_header hdr; @@ -220,7 +220,7 @@ struct icm_fr_pkg_add_device_key_response { u8 connection_key; u8 connection_id; u16 reserved; -} __packed; +}; struct icm_fr_pkg_challenge_device { struct icm_pkg_header hdr; @@ -229,7 +229,7 @@ struct icm_fr_pkg_challenge_device { u8 connection_id; u16 reserved; u32 challenge[8]; -} __packed; +}; struct icm_fr_pkg_challenge_device_response { struct icm_pkg_header hdr; @@ -239,7 +239,7 @@ struct icm_fr_pkg_challenge_device_response { u16 reserved; u32 challenge[8]; u32 response[8]; -} __packed; +}; /* Alpine Ridge only messages */ @@ -247,7 +247,7 @@ struct icm_ar_pkg_get_route { struct icm_pkg_header hdr; u16 reserved; u16 link_info; -} __packed; +}; struct icm_ar_pkg_get_route_response { struct icm_pkg_header hdr; @@ -255,6 +255,6 @@ struct icm_ar_pkg_get_route_response { u16 link_info; u32 route_hi; u32 route_lo; -} __packed; +}; #endif From cdae7c07e3e3509eaabc18c1640a55dc5b99c179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:30 +0300 Subject: [PATCH 0457/2177] thunderbolt: Add support for XDomain properties Thunderbolt XDomain discovery protocol uses directories which contain properties and other directories to exchange information about what capabilities the remote host supports. This also includes identification information like device ID and name. This adds support for parsing and formatting these properties and establishes an API drivers can use in addition to the core Thunderbolt driver. This API is exposed in a new header: include/linux/thunderbolt.h. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/Makefile | 2 +- drivers/thunderbolt/property.c | 670 +++++++++++++++++++++++++++++++++ include/linux/thunderbolt.h | 89 +++++ 3 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 drivers/thunderbolt/property.c create mode 100644 include/linux/thunderbolt.h diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile index 4900febc6c8a..7afd21f5383a 100644 --- a/drivers/thunderbolt/Makefile +++ b/drivers/thunderbolt/Makefile @@ -1,3 +1,3 @@ obj-${CONFIG_THUNDERBOLT} := thunderbolt.o thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel_pci.o eeprom.o -thunderbolt-objs += domain.o dma_port.o icm.o +thunderbolt-objs += domain.o dma_port.o icm.o property.o diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c new file mode 100644 index 000000000000..8fe913a95b4a --- /dev/null +++ b/drivers/thunderbolt/property.c @@ -0,0 +1,670 @@ +/* + * Thunderbolt XDomain property support + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +struct tb_property_entry { + u32 key_hi; + u32 key_lo; + u16 length; + u8 reserved; + u8 type; + u32 value; +}; + +struct tb_property_rootdir_entry { + u32 magic; + u32 length; + struct tb_property_entry entries[]; +}; + +struct tb_property_dir_entry { + u32 uuid[4]; + struct tb_property_entry entries[]; +}; + +#define TB_PROPERTY_ROOTDIR_MAGIC 0x55584401 + +static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, + size_t block_len, unsigned int dir_offset, size_t dir_len, + bool is_root); + +static inline void parse_dwdata(void *dst, const void *src, size_t dwords) +{ + be32_to_cpu_array(dst, src, dwords); +} + +static inline void format_dwdata(void *dst, const void *src, size_t dwords) +{ + cpu_to_be32_array(dst, src, dwords); +} + +static bool tb_property_entry_valid(const struct tb_property_entry *entry, + size_t block_len) +{ + switch (entry->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + case TB_PROPERTY_TYPE_DATA: + case TB_PROPERTY_TYPE_TEXT: + if (entry->length > block_len) + return false; + if (entry->value + entry->length > block_len) + return false; + break; + + case TB_PROPERTY_TYPE_VALUE: + if (entry->length != 1) + return false; + break; + } + + return true; +} + +static bool tb_property_key_valid(const char *key) +{ + return key && strlen(key) <= TB_PROPERTY_KEY_SIZE; +} + +static struct tb_property * +tb_property_alloc(const char *key, enum tb_property_type type) +{ + struct tb_property *property; + + property = kzalloc(sizeof(*property), GFP_KERNEL); + if (!property) + return NULL; + + strcpy(property->key, key); + property->type = type; + INIT_LIST_HEAD(&property->list); + + return property; +} + +static struct tb_property *tb_property_parse(const u32 *block, size_t block_len, + const struct tb_property_entry *entry) +{ + char key[TB_PROPERTY_KEY_SIZE + 1]; + struct tb_property *property; + struct tb_property_dir *dir; + + if (!tb_property_entry_valid(entry, block_len)) + return NULL; + + parse_dwdata(key, entry, 2); + key[TB_PROPERTY_KEY_SIZE] = '\0'; + + property = tb_property_alloc(key, entry->type); + if (!property) + return NULL; + + property->length = entry->length; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + dir = __tb_property_parse_dir(block, block_len, entry->value, + entry->length, false); + if (!dir) { + kfree(property); + return NULL; + } + property->value.dir = dir; + break; + + case TB_PROPERTY_TYPE_DATA: + property->value.data = kcalloc(property->length, sizeof(u32), + GFP_KERNEL); + if (!property->value.data) { + kfree(property); + return NULL; + } + parse_dwdata(property->value.data, block + entry->value, + entry->length); + break; + + case TB_PROPERTY_TYPE_TEXT: + property->value.text = kcalloc(property->length, sizeof(u32), + GFP_KERNEL); + if (!property->value.text) { + kfree(property); + return NULL; + } + parse_dwdata(property->value.text, block + entry->value, + entry->length); + /* Force null termination */ + property->value.text[property->length * 4 - 1] = '\0'; + break; + + case TB_PROPERTY_TYPE_VALUE: + property->value.immediate = entry->value; + break; + + default: + property->type = TB_PROPERTY_TYPE_UNKNOWN; + break; + } + + return property; +} + +static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, + size_t block_len, unsigned int dir_offset, size_t dir_len, bool is_root) +{ + const struct tb_property_entry *entries; + size_t i, content_len, nentries; + unsigned int content_offset; + struct tb_property_dir *dir; + + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return NULL; + + if (is_root) { + content_offset = dir_offset + 2; + content_len = dir_len; + } else { + dir->uuid = kmemdup(&block[dir_offset], sizeof(*dir->uuid), + GFP_KERNEL); + content_offset = dir_offset + 4; + content_len = dir_len - 4; /* Length includes UUID */ + } + + entries = (const struct tb_property_entry *)&block[content_offset]; + nentries = content_len / (sizeof(*entries) / 4); + + INIT_LIST_HEAD(&dir->properties); + + for (i = 0; i < nentries; i++) { + struct tb_property *property; + + property = tb_property_parse(block, block_len, &entries[i]); + if (!property) { + tb_property_free_dir(dir); + return NULL; + } + + list_add_tail(&property->list, &dir->properties); + } + + return dir; +} + +/** + * tb_property_parse_dir() - Parses properties from given property block + * @block: Property block to parse + * @block_len: Number of dword elements in the property block + * + * This function parses the XDomain properties data block into format that + * can be traversed using the helper functions provided by this module. + * Upon success returns the parsed directory. In case of error returns + * %NULL. The resulting &struct tb_property_dir needs to be released by + * calling tb_property_free_dir() when not needed anymore. + * + * The @block is expected to be root directory. + */ +struct tb_property_dir *tb_property_parse_dir(const u32 *block, + size_t block_len) +{ + const struct tb_property_rootdir_entry *rootdir = + (const struct tb_property_rootdir_entry *)block; + + if (rootdir->magic != TB_PROPERTY_ROOTDIR_MAGIC) + return NULL; + if (rootdir->length > block_len) + return NULL; + + return __tb_property_parse_dir(block, block_len, 0, rootdir->length, + true); +} + +/** + * tb_property_create_dir() - Creates new property directory + * @uuid: UUID used to identify the particular directory + * + * Creates new, empty property directory. If @uuid is %NULL then the + * directory is assumed to be root directory. + */ +struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid) +{ + struct tb_property_dir *dir; + + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return NULL; + + INIT_LIST_HEAD(&dir->properties); + if (uuid) { + dir->uuid = kmemdup(uuid, sizeof(*dir->uuid), GFP_KERNEL); + if (!dir->uuid) { + kfree(dir); + return NULL; + } + } + + return dir; +} +EXPORT_SYMBOL_GPL(tb_property_create_dir); + +static void tb_property_free(struct tb_property *property) +{ + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + tb_property_free_dir(property->value.dir); + break; + + case TB_PROPERTY_TYPE_DATA: + kfree(property->value.data); + break; + + case TB_PROPERTY_TYPE_TEXT: + kfree(property->value.text); + break; + + default: + break; + } + + kfree(property); +} + +/** + * tb_property_free_dir() - Release memory allocated for property directory + * @dir: Directory to release + * + * This will release all the memory the directory occupies including all + * descendants. It is OK to pass %NULL @dir, then the function does + * nothing. + */ +void tb_property_free_dir(struct tb_property_dir *dir) +{ + struct tb_property *property, *tmp; + + if (!dir) + return; + + list_for_each_entry_safe(property, tmp, &dir->properties, list) { + list_del(&property->list); + tb_property_free(property); + } + kfree(dir->uuid); + kfree(dir); +} +EXPORT_SYMBOL_GPL(tb_property_free_dir); + +static size_t tb_property_dir_length(const struct tb_property_dir *dir, + bool recurse, size_t *data_len) +{ + const struct tb_property *property; + size_t len = 0; + + if (dir->uuid) + len += sizeof(*dir->uuid) / 4; + else + len += sizeof(struct tb_property_rootdir_entry) / 4; + + list_for_each_entry(property, &dir->properties, list) { + len += sizeof(struct tb_property_entry) / 4; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + if (recurse) { + len += tb_property_dir_length( + property->value.dir, recurse, data_len); + } + /* Reserve dword padding after each directory */ + if (data_len) + *data_len += 1; + break; + + case TB_PROPERTY_TYPE_DATA: + case TB_PROPERTY_TYPE_TEXT: + if (data_len) + *data_len += property->length; + break; + + default: + break; + } + } + + return len; +} + +static ssize_t __tb_property_format_dir(const struct tb_property_dir *dir, + u32 *block, unsigned int start_offset, size_t block_len) +{ + unsigned int data_offset, dir_end; + const struct tb_property *property; + struct tb_property_entry *entry; + size_t dir_len, data_len = 0; + int ret; + + /* + * The structure of property block looks like following. Leaf + * data/text is included right after the directory and each + * directory follows each other (even nested ones). + * + * +----------+ <-- start_offset + * | header | <-- root directory header + * +----------+ --- + * | entry 0 | -^--------------------. + * +----------+ | | + * | entry 1 | -|--------------------|--. + * +----------+ | | | + * | entry 2 | -|-----------------. | | + * +----------+ | | | | + * : : | dir_len | | | + * . . | | | | + * : : | | | | + * +----------+ | | | | + * | entry n | v | | | + * +----------+ <-- data_offset | | | + * | data 0 | <------------------|--' | + * +----------+ | | + * | data 1 | <------------------|-----' + * +----------+ | + * | 00000000 | padding | + * +----------+ <-- dir_end <------' + * | UUID | <-- directory UUID (child directory) + * +----------+ + * | entry 0 | + * +----------+ + * | entry 1 | + * +----------+ + * : : + * . . + * : : + * +----------+ + * | entry n | + * +----------+ + * | data 0 | + * +----------+ + * + * We use dir_end to hold pointer to the end of the directory. It + * will increase as we add directories and each directory should be + * added starting from previous dir_end. + */ + dir_len = tb_property_dir_length(dir, false, &data_len); + data_offset = start_offset + dir_len; + dir_end = start_offset + data_len + dir_len; + + if (data_offset > dir_end) + return -EINVAL; + if (dir_end > block_len) + return -EINVAL; + + /* Write headers first */ + if (dir->uuid) { + struct tb_property_dir_entry *pe; + + pe = (struct tb_property_dir_entry *)&block[start_offset]; + memcpy(pe->uuid, dir->uuid, sizeof(pe->uuid)); + entry = pe->entries; + } else { + struct tb_property_rootdir_entry *re; + + re = (struct tb_property_rootdir_entry *)&block[start_offset]; + re->magic = TB_PROPERTY_ROOTDIR_MAGIC; + re->length = dir_len - sizeof(*re) / 4; + entry = re->entries; + } + + list_for_each_entry(property, &dir->properties, list) { + const struct tb_property_dir *child; + + format_dwdata(entry, property->key, 2); + entry->type = property->type; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + child = property->value.dir; + ret = __tb_property_format_dir(child, block, dir_end, + block_len); + if (ret < 0) + return ret; + entry->length = tb_property_dir_length(child, false, + NULL); + entry->value = dir_end; + dir_end = ret; + break; + + case TB_PROPERTY_TYPE_DATA: + format_dwdata(&block[data_offset], property->value.data, + property->length); + entry->length = property->length; + entry->value = data_offset; + data_offset += entry->length; + break; + + case TB_PROPERTY_TYPE_TEXT: + format_dwdata(&block[data_offset], property->value.text, + property->length); + entry->length = property->length; + entry->value = data_offset; + data_offset += entry->length; + break; + + case TB_PROPERTY_TYPE_VALUE: + entry->length = property->length; + entry->value = property->value.immediate; + break; + + default: + break; + } + + entry++; + } + + return dir_end; +} + +/** + * tb_property_format_dir() - Formats directory to the packed XDomain format + * @dir: Directory to format + * @block: Property block where the packed data is placed + * @block_len: Length of the property block + * + * This function formats the directory to the packed format that can be + * then send over the thunderbolt fabric to receiving host. Returns %0 in + * case of success and negative errno on faulure. Passing %NULL in @block + * returns number of entries the block takes. + */ +ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, + size_t block_len) +{ + ssize_t ret; + + if (!block) { + size_t dir_len, data_len = 0; + + dir_len = tb_property_dir_length(dir, true, &data_len); + return dir_len + data_len; + } + + ret = __tb_property_format_dir(dir, block, 0, block_len); + return ret < 0 ? ret : 0; +} + +/** + * tb_property_add_immediate() - Add immediate property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @value: Immediate value to store with the property + */ +int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, + u32 value) +{ + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_VALUE); + if (!property) + return -ENOMEM; + + property->length = 1; + property->value.immediate = value; + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_immediate); + +/** + * tb_property_add_data() - Adds arbitrary data property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @buf: Data buffer to add + * @buflen: Number of bytes in the data buffer + * + * Function takes a copy of @buf and adds it to the directory. + */ +int tb_property_add_data(struct tb_property_dir *parent, const char *key, + const void *buf, size_t buflen) +{ + /* Need to pad to dword boundary */ + size_t size = round_up(buflen, 4); + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_DATA); + if (!property) + return -ENOMEM; + + property->length = size / 4; + property->value.data = kzalloc(size, GFP_KERNEL); + memcpy(property->value.data, buf, buflen); + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_data); + +/** + * tb_property_add_text() - Adds string property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @text: String to add + * + * Function takes a copy of @text and adds it to the directory. + */ +int tb_property_add_text(struct tb_property_dir *parent, const char *key, + const char *text) +{ + /* Need to pad to dword boundary */ + size_t size = round_up(strlen(text) + 1, 4); + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_TEXT); + if (!property) + return -ENOMEM; + + property->length = size / 4; + property->value.data = kzalloc(size, GFP_KERNEL); + strcpy(property->value.text, text); + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_text); + +/** + * tb_property_add_dir() - Adds a directory to the parent directory + * @parent: Directory to add the property + * @key: Key for the property + * @dir: Directory to add + */ +int tb_property_add_dir(struct tb_property_dir *parent, const char *key, + struct tb_property_dir *dir) +{ + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_DIRECTORY); + if (!property) + return -ENOMEM; + + property->value.dir = dir; + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_dir); + +/** + * tb_property_remove() - Removes property from a parent directory + * @property: Property to remove + * + * Note memory for @property is released as well so it is not allowed to + * touch the object after call to this function. + */ +void tb_property_remove(struct tb_property *property) +{ + list_del(&property->list); + kfree(property); +} +EXPORT_SYMBOL_GPL(tb_property_remove); + +/** + * tb_property_find() - Find a property from a directory + * @dir: Directory where the property is searched + * @key: Key to look for + * @type: Type of the property + * + * Finds and returns property from the given directory. Does not recurse + * into sub-directories. Returns %NULL if the property was not found. + */ +struct tb_property *tb_property_find(struct tb_property_dir *dir, + const char *key, enum tb_property_type type) +{ + struct tb_property *property; + + list_for_each_entry(property, &dir->properties, list) { + if (property->type == type && !strcmp(property->key, key)) + return property; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(tb_property_find); + +/** + * tb_property_get_next() - Get next property from directory + * @dir: Directory holding properties + * @prev: Previous property in the directory (%NULL returns the first) + */ +struct tb_property *tb_property_get_next(struct tb_property_dir *dir, + struct tb_property *prev) +{ + if (prev) { + if (list_is_last(&prev->list, &dir->properties)) + return NULL; + return list_next_entry(prev, list); + } + return list_first_entry_or_null(&dir->properties, struct tb_property, + list); +} +EXPORT_SYMBOL_GPL(tb_property_get_next); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h new file mode 100644 index 000000000000..96561c1265ae --- /dev/null +++ b/include/linux/thunderbolt.h @@ -0,0 +1,89 @@ +/* + * Thunderbolt service API + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef THUNDERBOLT_H_ +#define THUNDERBOLT_H_ + +#include +#include + +/** + * struct tb_property_dir - XDomain property directory + * @uuid: Directory UUID or %NULL if root directory + * @properties: List of properties in this directory + * + * User needs to provide serialization if needed. + */ +struct tb_property_dir { + const uuid_t *uuid; + struct list_head properties; +}; + +enum tb_property_type { + TB_PROPERTY_TYPE_UNKNOWN = 0x00, + TB_PROPERTY_TYPE_DIRECTORY = 0x44, + TB_PROPERTY_TYPE_DATA = 0x64, + TB_PROPERTY_TYPE_TEXT = 0x74, + TB_PROPERTY_TYPE_VALUE = 0x76, +}; + +#define TB_PROPERTY_KEY_SIZE 8 + +/** + * struct tb_property - XDomain property + * @list: Used to link properties together in a directory + * @key: Key for the property (always terminated). + * @type: Type of the property + * @length: Length of the property data in dwords + * @value: Property value + * + * Users use @type to determine which field in @value is filled. + */ +struct tb_property { + struct list_head list; + char key[TB_PROPERTY_KEY_SIZE + 1]; + enum tb_property_type type; + size_t length; + union { + struct tb_property_dir *dir; + u8 *data; + char *text; + u32 immediate; + } value; +}; + +struct tb_property_dir *tb_property_parse_dir(const u32 *block, + size_t block_len); +ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, + size_t block_len); +struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid); +void tb_property_free_dir(struct tb_property_dir *dir); +int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, + u32 value); +int tb_property_add_data(struct tb_property_dir *parent, const char *key, + const void *buf, size_t buflen); +int tb_property_add_text(struct tb_property_dir *parent, const char *key, + const char *text); +int tb_property_add_dir(struct tb_property_dir *parent, const char *key, + struct tb_property_dir *dir); +void tb_property_remove(struct tb_property *tb_property); +struct tb_property *tb_property_find(struct tb_property_dir *dir, + const char *key, enum tb_property_type type); +struct tb_property *tb_property_get_next(struct tb_property_dir *dir, + struct tb_property *prev); + +#define tb_property_for_each(dir, property) \ + for (property = tb_property_get_next(dir, NULL); \ + property; \ + property = tb_property_get_next(dir, property)) + +#endif /* THUNDERBOLT_H_ */ From eaf8ff35a345449207ad116e2574c19780ec9a98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:31 +0300 Subject: [PATCH 0458/2177] thunderbolt: Move enum tb_cfg_pkg_type to thunderbolt.h These will be needed by Thunderbolt services when sending and receiving XDomain control messages. While there change TB_CFG_PKG_PREPARE_TO_SLEEP value to be decimal in order to be consistent with other members. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.h | 1 + drivers/thunderbolt/tb_msgs.h | 17 ----------------- include/linux/thunderbolt.h | 17 +++++++++++++++++ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h index 36fd28b1c1c5..d0f21e1e0b8b 100644 --- a/drivers/thunderbolt/ctl.h +++ b/drivers/thunderbolt/ctl.h @@ -8,6 +8,7 @@ #define _TB_CFG #include +#include #include "nhi.h" #include "tb_msgs.h" diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index f3adf58a40ce..f2b2550cd97c 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -15,23 +15,6 @@ #include #include -enum tb_cfg_pkg_type { - TB_CFG_PKG_READ = 1, - TB_CFG_PKG_WRITE = 2, - TB_CFG_PKG_ERROR = 3, - TB_CFG_PKG_NOTIFY_ACK = 4, - TB_CFG_PKG_EVENT = 5, - TB_CFG_PKG_XDOMAIN_REQ = 6, - TB_CFG_PKG_XDOMAIN_RESP = 7, - TB_CFG_PKG_OVERRIDE = 8, - TB_CFG_PKG_RESET = 9, - TB_CFG_PKG_ICM_EVENT = 10, - TB_CFG_PKG_ICM_CMD = 11, - TB_CFG_PKG_ICM_RESP = 12, - TB_CFG_PKG_PREPARE_TO_SLEEP = 0xd, - -}; - enum tb_cfg_space { TB_CFG_HOPS = 0, TB_CFG_PORT = 1, diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 96561c1265ae..b512b1e2b4f2 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -1,6 +1,7 @@ /* * Thunderbolt service API * + * Copyright (C) 2014 Andreas Noever * Copyright (C) 2017, Intel Corporation * Authors: Michael Jamet * Mika Westerberg @@ -16,6 +17,22 @@ #include #include +enum tb_cfg_pkg_type { + TB_CFG_PKG_READ = 1, + TB_CFG_PKG_WRITE = 2, + TB_CFG_PKG_ERROR = 3, + TB_CFG_PKG_NOTIFY_ACK = 4, + TB_CFG_PKG_EVENT = 5, + TB_CFG_PKG_XDOMAIN_REQ = 6, + TB_CFG_PKG_XDOMAIN_RESP = 7, + TB_CFG_PKG_OVERRIDE = 8, + TB_CFG_PKG_RESET = 9, + TB_CFG_PKG_ICM_EVENT = 10, + TB_CFG_PKG_ICM_CMD = 11, + TB_CFG_PKG_ICM_RESP = 12, + TB_CFG_PKG_PREPARE_TO_SLEEP = 13, +}; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory From 9e99b9f4d5c36340dabda6d14053195b2a43796b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:32 +0300 Subject: [PATCH 0459/2177] thunderbolt: Move thunderbolt domain structure to thunderbolt.h These are needed by Thunderbolt services so move them to thunderbolt.h to make sure they are available outside of drivers/thunderbolt. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/tb.h | 42 ---------------------------------- include/linux/thunderbolt.h | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index e0deee4f1eb0..2fefe76621ca 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -39,20 +39,6 @@ struct tb_switch_nvm { bool authenticating; }; -/** - * enum tb_security_level - Thunderbolt security level - * @TB_SECURITY_NONE: No security, legacy mode - * @TB_SECURITY_USER: User approval required at minimum - * @TB_SECURITY_SECURE: One time saved key required at minimum - * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) - */ -enum tb_security_level { - TB_SECURITY_NONE, - TB_SECURITY_USER, - TB_SECURITY_SECURE, - TB_SECURITY_DPONLY, -}; - #define TB_SWITCH_KEY_SIZE 32 /* Each physical port contains 2 links on modern controllers */ #define TB_SWITCH_LINKS_PER_PHY_PORT 2 @@ -223,33 +209,6 @@ struct tb_cm_ops { int (*disconnect_pcie_paths)(struct tb *tb); }; -/** - * struct tb - main thunderbolt bus structure - * @dev: Domain device - * @lock: Big lock. Must be held when accessing any struct - * tb_switch / struct tb_port. - * @nhi: Pointer to the NHI structure - * @ctl: Control channel for this domain - * @wq: Ordered workqueue for all domain specific work - * @root_switch: Root switch of this domain - * @cm_ops: Connection manager specific operations vector - * @index: Linux assigned domain number - * @security_level: Current security level - * @privdata: Private connection manager specific data - */ -struct tb { - struct device dev; - struct mutex lock; - struct tb_nhi *nhi; - struct tb_ctl *ctl; - struct workqueue_struct *wq; - struct tb_switch *root_switch; - const struct tb_cm_ops *cm_ops; - int index; - enum tb_security_level security_level; - unsigned long privdata[0]; -}; - static inline void *tb_priv(struct tb *tb) { return (void *)tb->privdata; @@ -368,7 +327,6 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer, struct tb *icm_probe(struct tb_nhi *nhi); struct tb *tb_probe(struct tb_nhi *nhi); -extern struct bus_type tb_bus_type; extern struct device_type tb_domain_type; extern struct device_type tb_switch_type; diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index b512b1e2b4f2..910b1bf92112 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -14,7 +14,9 @@ #ifndef THUNDERBOLT_H_ #define THUNDERBOLT_H_ +#include #include +#include #include enum tb_cfg_pkg_type { @@ -33,6 +35,49 @@ enum tb_cfg_pkg_type { TB_CFG_PKG_PREPARE_TO_SLEEP = 13, }; +/** + * enum tb_security_level - Thunderbolt security level + * @TB_SECURITY_NONE: No security, legacy mode + * @TB_SECURITY_USER: User approval required at minimum + * @TB_SECURITY_SECURE: One time saved key required at minimum + * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) + */ +enum tb_security_level { + TB_SECURITY_NONE, + TB_SECURITY_USER, + TB_SECURITY_SECURE, + TB_SECURITY_DPONLY, +}; + +/** + * struct tb - main thunderbolt bus structure + * @dev: Domain device + * @lock: Big lock. Must be held when accessing any struct + * tb_switch / struct tb_port. + * @nhi: Pointer to the NHI structure + * @ctl: Control channel for this domain + * @wq: Ordered workqueue for all domain specific work + * @root_switch: Root switch of this domain + * @cm_ops: Connection manager specific operations vector + * @index: Linux assigned domain number + * @security_level: Current security level + * @privdata: Private connection manager specific data + */ +struct tb { + struct device dev; + struct mutex lock; + struct tb_nhi *nhi; + struct tb_ctl *ctl; + struct workqueue_struct *wq; + struct tb_switch *root_switch; + const struct tb_cm_ops *cm_ops; + int index; + enum tb_security_level security_level; + unsigned long privdata[0]; +}; + +extern struct bus_type tb_bus_type; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory From e69b71f8458b78a2ef44e3d07374a8f46e45123d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:33 +0300 Subject: [PATCH 0460/2177] thunderbolt: Move tb_switch_phy_port_from_link() to thunderbolt.h A Thunderbolt service might need to find the physical port from a link the cable is connected to. For instance networking driver uses this information to generate MAC address according the Apple ThunderboltIP protocol. Move this function to thunderbolt.h and rename it to tb_phy_port_from_link() to reflect the fact that it does not take switch as parameter. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/icm.c | 2 +- drivers/thunderbolt/tb.h | 7 ------- include/linux/thunderbolt.h | 7 +++++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 53250fc057e1..8c22b91ed040 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -89,7 +89,7 @@ static inline struct tb *icm_to_tb(struct icm *icm) static inline u8 phy_port_from_route(u64 route, u8 depth) { - return tb_switch_phy_port_from_link(route >> ((depth - 1) * 8)); + return tb_phy_port_from_link(route >> ((depth - 1) * 8)); } static inline u8 dual_link_from_link(u8 link) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 2fefe76621ca..ea21d927bd09 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -40,8 +40,6 @@ struct tb_switch_nvm { }; #define TB_SWITCH_KEY_SIZE 32 -/* Each physical port contains 2 links on modern controllers */ -#define TB_SWITCH_LINKS_PER_PHY_PORT 2 /** * struct tb_switch - a thunderbolt switch @@ -367,11 +365,6 @@ struct tb_switch *tb_switch_find_by_link_depth(struct tb *tb, u8 link, u8 depth); struct tb_switch *tb_switch_find_by_uuid(struct tb *tb, const uuid_t *uuid); -static inline unsigned int tb_switch_phy_port_from_link(unsigned int link) -{ - return (link - 1) / TB_SWITCH_LINKS_PER_PHY_PORT; -} - static inline void tb_switch_put(struct tb_switch *sw) { put_device(&sw->dev); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 910b1bf92112..43b8d1e09341 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -78,6 +78,13 @@ struct tb { extern struct bus_type tb_bus_type; +#define TB_LINKS_PER_PHY_PORT 2 + +static inline unsigned int tb_phy_port_from_link(unsigned int link) +{ + return (link - 1) / TB_LINKS_PER_PHY_PORT; +} + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory From d1ff70241a275133e1a0258b7c23588b122276c8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:34 +0300 Subject: [PATCH 0461/2177] thunderbolt: Add support for XDomain discovery protocol When two hosts are connected over a Thunderbolt cable, there is a protocol they can use to communicate capabilities supported by the host. The discovery protocol uses automatically configured control channel (ring 0) and is build on top of request/response transactions using special XDomain primitives provided by the Thunderbolt base protocol. The capabilities consists of a root directory block of basic properties used for identification of the host, and then there can be zero or more directories each describing a Thunderbolt service and its capabilities. Once both sides have discovered what is supported the two hosts can setup high-speed DMA paths and transfer data to the other side using whatever protocol was agreed based on the properties. The software protocol used to communicate which DMA paths to enable is service specific. This patch adds support for the XDomain discovery protocol to the Thunderbolt bus. We model each remote host connection as a Linux XDomain device. For each Thunderbolt service found supported on the XDomain device, we create Linux Thunderbolt service device which Thunderbolt service drivers can then bind to based on the protocol identification information retrieved from the property directory describing the service. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- .../ABI/testing/sysfs-bus-thunderbolt | 48 + drivers/thunderbolt/Makefile | 2 +- drivers/thunderbolt/ctl.c | 11 +- drivers/thunderbolt/ctl.h | 2 +- drivers/thunderbolt/domain.c | 197 ++- drivers/thunderbolt/icm.c | 218 ++- drivers/thunderbolt/nhi.h | 2 + drivers/thunderbolt/switch.c | 7 +- drivers/thunderbolt/tb.h | 39 +- drivers/thunderbolt/tb_msgs.h | 123 ++ drivers/thunderbolt/xdomain.c | 1576 +++++++++++++++++ include/linux/mod_devicetable.h | 26 + include/linux/thunderbolt.h | 242 +++ scripts/mod/devicetable-offsets.c | 7 + scripts/mod/file2alias.c | 25 + 15 files changed, 2507 insertions(+), 18 deletions(-) create mode 100644 drivers/thunderbolt/xdomain.c diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt index 392bef5bd399..93798c02e28b 100644 --- a/Documentation/ABI/testing/sysfs-bus-thunderbolt +++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt @@ -110,3 +110,51 @@ Description: When new NVM image is written to the non-active NVM is directly the status value from the DMA configuration based mailbox before the device is power cycled. Writing 0 here clears the status. + +What: /sys/bus/thunderbolt/devices/./key +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains name of the property directory the XDomain + service exposes. This entry describes the protocol in + question. Following directories are already reserved by + the Apple XDomain specification: + + network: IP/ethernet over Thunderbolt + targetdm: Target disk mode protocol over Thunderbolt + extdisp: External display mode protocol over Thunderbolt + +What: /sys/bus/thunderbolt/devices/./modalias +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: Stores the same MODALIAS value emitted by uevent for + the XDomain service. Format: tbtsvc:kSpNvNrN + +What: /sys/bus/thunderbolt/devices/./prtcid +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain protocol identifier the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcvers +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain protocol version the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcrevs +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain software version the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcstns +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain service specific settings as + bitmask. Format: %x diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile index 7afd21f5383a..f2f0de27252b 100644 --- a/drivers/thunderbolt/Makefile +++ b/drivers/thunderbolt/Makefile @@ -1,3 +1,3 @@ obj-${CONFIG_THUNDERBOLT} := thunderbolt.o thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel_pci.o eeprom.o -thunderbolt-objs += domain.o dma_port.o icm.o property.o +thunderbolt-objs += domain.o dma_port.o icm.o property.o xdomain.o diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index e6a4c9458c76..46e393c5fd1d 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -368,10 +368,10 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len, /** * tb_ctl_handle_event() - acknowledge a plug event, invoke ctl->callback */ -static void tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, +static bool tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, struct ctl_pkg *pkg, size_t size) { - ctl->callback(ctl->callback_data, type, pkg->buffer, size); + return ctl->callback(ctl->callback_data, type, pkg->buffer, size); } static void tb_ctl_rx_submit(struct ctl_pkg *pkg) @@ -444,6 +444,8 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame, break; case TB_CFG_PKG_EVENT: + case TB_CFG_PKG_XDOMAIN_RESP: + case TB_CFG_PKG_XDOMAIN_REQ: if (*(__be32 *)(pkg->buffer + frame->size) != crc32) { tb_ctl_err(pkg->ctl, "RX: checksum mismatch, dropping packet\n"); @@ -451,8 +453,9 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame, } /* Fall through */ case TB_CFG_PKG_ICM_EVENT: - tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size); - goto rx; + if (tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size)) + goto rx; + break; default: break; diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h index d0f21e1e0b8b..85c49dd301ea 100644 --- a/drivers/thunderbolt/ctl.h +++ b/drivers/thunderbolt/ctl.h @@ -16,7 +16,7 @@ /* control channel */ struct tb_ctl; -typedef void (*event_cb)(void *data, enum tb_cfg_pkg_type type, +typedef bool (*event_cb)(void *data, enum tb_cfg_pkg_type type, const void *buf, size_t size); struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data); diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c index 9f2dcd48974d..9b90115319ce 100644 --- a/drivers/thunderbolt/domain.c +++ b/drivers/thunderbolt/domain.c @@ -20,6 +20,98 @@ static DEFINE_IDA(tb_domain_ida); +static bool match_service_id(const struct tb_service_id *id, + const struct tb_service *svc) +{ + if (id->match_flags & TBSVC_MATCH_PROTOCOL_KEY) { + if (strcmp(id->protocol_key, svc->key)) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_ID) { + if (id->protocol_id != svc->prtcid) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) { + if (id->protocol_version != svc->prtcvers) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) { + if (id->protocol_revision != svc->prtcrevs) + return false; + } + + return true; +} + +static const struct tb_service_id *__tb_service_match(struct device *dev, + struct device_driver *drv) +{ + struct tb_service_driver *driver; + const struct tb_service_id *ids; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return NULL; + + driver = container_of(drv, struct tb_service_driver, driver); + if (!driver->id_table) + return NULL; + + for (ids = driver->id_table; ids->match_flags != 0; ids++) { + if (match_service_id(ids, svc)) + return ids; + } + + return NULL; +} + +static int tb_service_match(struct device *dev, struct device_driver *drv) +{ + return !!__tb_service_match(dev, drv); +} + +static int tb_service_probe(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tb_service_driver *driver; + const struct tb_service_id *id; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + id = __tb_service_match(dev, &driver->driver); + + return driver->probe(svc, id); +} + +static int tb_service_remove(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tb_service_driver *driver; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + if (driver->remove) + driver->remove(svc); + + return 0; +} + +static void tb_service_shutdown(struct device *dev) +{ + struct tb_service_driver *driver; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc || !dev->driver) + return; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + if (driver->shutdown) + driver->shutdown(svc); +} + static const char * const tb_security_names[] = { [TB_SECURITY_NONE] = "none", [TB_SECURITY_USER] = "user", @@ -52,6 +144,10 @@ static const struct attribute_group *domain_attr_groups[] = { struct bus_type tb_bus_type = { .name = "thunderbolt", + .match = tb_service_match, + .probe = tb_service_probe, + .remove = tb_service_remove, + .shutdown = tb_service_shutdown, }; static void tb_domain_release(struct device *dev) @@ -128,17 +224,26 @@ err_free: return NULL; } -static void tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type, +static bool tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type, const void *buf, size_t size) { struct tb *tb = data; if (!tb->cm_ops->handle_event) { tb_warn(tb, "domain does not have event handler\n"); - return; + return true; } - tb->cm_ops->handle_event(tb, type, buf, size); + switch (type) { + case TB_CFG_PKG_XDOMAIN_REQ: + case TB_CFG_PKG_XDOMAIN_RESP: + return tb_xdomain_handle_request(tb, type, buf, size); + + default: + tb->cm_ops->handle_event(tb, type, buf, size); + } + + return true; } /** @@ -443,9 +548,92 @@ int tb_domain_disconnect_pcie_paths(struct tb *tb) return tb->cm_ops->disconnect_pcie_paths(tb); } +/** + * tb_domain_approve_xdomain_paths() - Enable DMA paths for XDomain + * @tb: Domain enabling the DMA paths + * @xd: XDomain DMA paths are created to + * + * Calls connection manager specific method to enable DMA paths to the + * XDomain in question. + * + * Return: 0% in case of success and negative errno otherwise. In + * particular returns %-ENOTSUPP if the connection manager + * implementation does not support XDomains. + */ +int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + if (!tb->cm_ops->approve_xdomain_paths) + return -ENOTSUPP; + + return tb->cm_ops->approve_xdomain_paths(tb, xd); +} + +/** + * tb_domain_disconnect_xdomain_paths() - Disable DMA paths for XDomain + * @tb: Domain disabling the DMA paths + * @xd: XDomain whose DMA paths are disconnected + * + * Calls connection manager specific method to disconnect DMA paths to + * the XDomain in question. + * + * Return: 0% in case of success and negative errno otherwise. In + * particular returns %-ENOTSUPP if the connection manager + * implementation does not support XDomains. + */ +int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + if (!tb->cm_ops->disconnect_xdomain_paths) + return -ENOTSUPP; + + return tb->cm_ops->disconnect_xdomain_paths(tb, xd); +} + +static int disconnect_xdomain(struct device *dev, void *data) +{ + struct tb_xdomain *xd; + struct tb *tb = data; + int ret = 0; + + xd = tb_to_xdomain(dev); + if (xd && xd->tb == tb) + ret = tb_xdomain_disable_paths(xd); + + return ret; +} + +/** + * tb_domain_disconnect_all_paths() - Disconnect all paths for the domain + * @tb: Domain whose paths are disconnected + * + * This function can be used to disconnect all paths (PCIe, XDomain) for + * example in preparation for host NVM firmware upgrade. After this is + * called the paths cannot be established without resetting the switch. + * + * Return: %0 in case of success and negative errno otherwise. + */ +int tb_domain_disconnect_all_paths(struct tb *tb) +{ + int ret; + + ret = tb_domain_disconnect_pcie_paths(tb); + if (ret) + return ret; + + return bus_for_each_dev(&tb_bus_type, NULL, tb, disconnect_xdomain); +} + int tb_domain_init(void) { - return bus_register(&tb_bus_type); + int ret; + + ret = tb_xdomain_init(); + if (ret) + return ret; + ret = bus_register(&tb_bus_type); + if (ret) + tb_xdomain_exit(); + + return ret; } void tb_domain_exit(void) @@ -453,4 +641,5 @@ void tb_domain_exit(void) bus_unregister(&tb_bus_type); ida_destroy(&tb_domain_ida); tb_switch_exit(); + tb_xdomain_exit(); } diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 8c22b91ed040..ab02d13f40b7 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -60,6 +60,8 @@ * @get_route: Find a route string for given switch * @device_connected: Handle device connected ICM message * @device_disconnected: Handle device disconnected ICM message + * @xdomain_connected - Handle XDomain connected ICM message + * @xdomain_disconnected - Handle XDomain disconnected ICM message */ struct icm { struct mutex request_lock; @@ -74,6 +76,10 @@ struct icm { const struct icm_pkg_header *hdr); void (*device_disconnected)(struct tb *tb, const struct icm_pkg_header *hdr); + void (*xdomain_connected)(struct tb *tb, + const struct icm_pkg_header *hdr); + void (*xdomain_disconnected)(struct tb *tb, + const struct icm_pkg_header *hdr); }; struct icm_notification { @@ -89,7 +95,10 @@ static inline struct tb *icm_to_tb(struct icm *icm) static inline u8 phy_port_from_route(u64 route, u8 depth) { - return tb_phy_port_from_link(route >> ((depth - 1) * 8)); + u8 link; + + link = depth ? route >> ((depth - 1) * 8) : route; + return tb_phy_port_from_link(link); } static inline u8 dual_link_from_link(u8 link) @@ -320,6 +329,51 @@ static int icm_fr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, return 0; } +static int icm_fr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + struct icm_fr_pkg_approve_xdomain_response reply; + struct icm_fr_pkg_approve_xdomain request; + int ret; + + memset(&request, 0, sizeof(request)); + request.hdr.code = ICM_APPROVE_XDOMAIN; + request.link_info = xd->depth << ICM_LINK_INFO_DEPTH_SHIFT | xd->link; + memcpy(&request.remote_uuid, xd->remote_uuid, sizeof(*xd->remote_uuid)); + + request.transmit_path = xd->transmit_path; + request.transmit_ring = xd->transmit_ring; + request.receive_path = xd->receive_path; + request.receive_ring = xd->receive_ring; + + memset(&reply, 0, sizeof(reply)); + ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), + 1, ICM_TIMEOUT); + if (ret) + return ret; + + if (reply.hdr.flags & ICM_FLAGS_ERROR) + return -EIO; + + return 0; +} + +static int icm_fr_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + u8 phy_port; + u8 cmd; + + phy_port = tb_phy_port_from_link(xd->link); + if (phy_port == 0) + cmd = NHI_MAILBOX_DISCONNECT_PA; + else + cmd = NHI_MAILBOX_DISCONNECT_PB; + + nhi_mailbox_cmd(tb->nhi, cmd, 1); + usleep_range(10, 50); + nhi_mailbox_cmd(tb->nhi, cmd, 2); + return 0; +} + static void remove_switch(struct tb_switch *sw) { struct tb_switch *parent_sw; @@ -475,6 +529,141 @@ icm_fr_device_disconnected(struct tb *tb, const struct icm_pkg_header *hdr) tb_switch_put(sw); } +static void remove_xdomain(struct tb_xdomain *xd) +{ + struct tb_switch *sw; + + sw = tb_to_switch(xd->dev.parent); + tb_port_at(xd->route, sw)->xdomain = NULL; + tb_xdomain_remove(xd); +} + +static void +icm_fr_xdomain_connected(struct tb *tb, const struct icm_pkg_header *hdr) +{ + const struct icm_fr_event_xdomain_connected *pkg = + (const struct icm_fr_event_xdomain_connected *)hdr; + struct tb_xdomain *xd; + struct tb_switch *sw; + u8 link, depth; + bool approved; + u64 route; + + /* + * After NVM upgrade adding root switch device fails because we + * initiated reset. During that time ICM might still send + * XDomain connected message which we ignore here. + */ + if (!tb->root_switch) + return; + + link = pkg->link_info & ICM_LINK_INFO_LINK_MASK; + depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >> + ICM_LINK_INFO_DEPTH_SHIFT; + approved = pkg->link_info & ICM_LINK_INFO_APPROVED; + + if (link > ICM_MAX_LINK || depth > ICM_MAX_DEPTH) { + tb_warn(tb, "invalid topology %u.%u, ignoring\n", link, depth); + return; + } + + route = get_route(pkg->local_route_hi, pkg->local_route_lo); + + xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid); + if (xd) { + u8 xd_phy_port, phy_port; + + xd_phy_port = phy_port_from_route(xd->route, xd->depth); + phy_port = phy_port_from_route(route, depth); + + if (xd->depth == depth && xd_phy_port == phy_port) { + xd->link = link; + xd->route = route; + xd->is_unplugged = false; + tb_xdomain_put(xd); + return; + } + + /* + * If we find an existing XDomain connection remove it + * now. We need to go through login handshake and + * everything anyway to be able to re-establish the + * connection. + */ + remove_xdomain(xd); + tb_xdomain_put(xd); + } + + /* + * Look if there already exists an XDomain in the same place + * than the new one and in that case remove it because it is + * most likely another host that got disconnected. + */ + xd = tb_xdomain_find_by_link_depth(tb, link, depth); + if (!xd) { + u8 dual_link; + + dual_link = dual_link_from_link(link); + if (dual_link) + xd = tb_xdomain_find_by_link_depth(tb, dual_link, + depth); + } + if (xd) { + remove_xdomain(xd); + tb_xdomain_put(xd); + } + + /* + * If the user disconnected a switch during suspend and + * connected another host to the same port, remove the switch + * first. + */ + sw = get_switch_at_route(tb->root_switch, route); + if (sw) + remove_switch(sw); + + sw = tb_switch_find_by_link_depth(tb, link, depth); + if (!sw) { + tb_warn(tb, "no switch exists at %u.%u, ignoring\n", link, + depth); + return; + } + + xd = tb_xdomain_alloc(sw->tb, &sw->dev, route, + &pkg->local_uuid, &pkg->remote_uuid); + if (!xd) { + tb_switch_put(sw); + return; + } + + xd->link = link; + xd->depth = depth; + + tb_port_at(route, sw)->xdomain = xd; + + tb_xdomain_add(xd); + tb_switch_put(sw); +} + +static void +icm_fr_xdomain_disconnected(struct tb *tb, const struct icm_pkg_header *hdr) +{ + const struct icm_fr_event_xdomain_disconnected *pkg = + (const struct icm_fr_event_xdomain_disconnected *)hdr; + struct tb_xdomain *xd; + + /* + * If the connection is through one or multiple devices, the + * XDomain device is removed along with them so it is fine if we + * cannot find it here. + */ + xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid); + if (xd) { + remove_xdomain(xd); + tb_xdomain_put(xd); + } +} + static struct pci_dev *get_upstream_port(struct pci_dev *pdev) { struct pci_dev *parent; @@ -594,6 +783,12 @@ static void icm_handle_notification(struct work_struct *work) case ICM_EVENT_DEVICE_DISCONNECTED: icm->device_disconnected(tb, n->pkg); break; + case ICM_EVENT_XDOMAIN_CONNECTED: + icm->xdomain_connected(tb, n->pkg); + break; + case ICM_EVENT_XDOMAIN_DISCONNECTED: + icm->xdomain_disconnected(tb, n->pkg); + break; } mutex_unlock(&tb->lock); @@ -927,6 +1122,10 @@ static void icm_unplug_children(struct tb_switch *sw) if (tb_is_upstream_port(port)) continue; + if (port->xdomain) { + port->xdomain->is_unplugged = true; + continue; + } if (!port->remote) continue; @@ -943,6 +1142,13 @@ static void icm_free_unplugged_children(struct tb_switch *sw) if (tb_is_upstream_port(port)) continue; + + if (port->xdomain && port->xdomain->is_unplugged) { + tb_xdomain_remove(port->xdomain); + port->xdomain = NULL; + continue; + } + if (!port->remote) continue; @@ -1009,8 +1215,10 @@ static int icm_start(struct tb *tb) tb->root_switch->no_nvm_upgrade = x86_apple_machine; ret = tb_switch_add(tb->root_switch); - if (ret) + if (ret) { tb_switch_put(tb->root_switch); + tb->root_switch = NULL; + } return ret; } @@ -1042,6 +1250,8 @@ static const struct tb_cm_ops icm_fr_ops = { .add_switch_key = icm_fr_add_switch_key, .challenge_switch_key = icm_fr_challenge_switch_key, .disconnect_pcie_paths = icm_disconnect_pcie_paths, + .approve_xdomain_paths = icm_fr_approve_xdomain_paths, + .disconnect_xdomain_paths = icm_fr_disconnect_xdomain_paths, }; struct tb *icm_probe(struct tb_nhi *nhi) @@ -1064,6 +1274,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) icm->get_route = icm_fr_get_route; icm->device_connected = icm_fr_device_connected; icm->device_disconnected = icm_fr_device_disconnected; + icm->xdomain_connected = icm_fr_xdomain_connected; + icm->xdomain_disconnected = icm_fr_xdomain_disconnected; tb->cm_ops = &icm_fr_ops; break; @@ -1077,6 +1289,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) icm->get_route = icm_ar_get_route; icm->device_connected = icm_fr_device_connected; icm->device_disconnected = icm_fr_device_disconnected; + icm->xdomain_connected = icm_fr_xdomain_connected; + icm->xdomain_disconnected = icm_fr_xdomain_disconnected; tb->cm_ops = &icm_fr_ops; break; } diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 5b5bb2c436be..0e05828983db 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -157,6 +157,8 @@ enum nhi_mailbox_cmd { NHI_MAILBOX_SAVE_DEVS = 0x05, NHI_MAILBOX_DISCONNECT_PCIE_PATHS = 0x06, NHI_MAILBOX_DRV_UNLOADS = 0x07, + NHI_MAILBOX_DISCONNECT_PA = 0x10, + NHI_MAILBOX_DISCONNECT_PB = 0x11, NHI_MAILBOX_ALLOW_ALL_DEVS = 0x23, }; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 53f40c57df59..dfc357d33e1e 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -171,11 +171,11 @@ static int nvm_authenticate_host(struct tb_switch *sw) /* * Root switch NVM upgrade requires that we disconnect the - * existing PCIe paths first (in case it is not in safe mode + * existing paths first (in case it is not in safe mode * already). */ if (!sw->safe_mode) { - ret = tb_domain_disconnect_pcie_paths(sw->tb); + ret = tb_domain_disconnect_all_paths(sw->tb); if (ret) return ret; /* @@ -1363,6 +1363,9 @@ void tb_switch_remove(struct tb_switch *sw) if (sw->ports[i].remote) tb_switch_remove(sw->ports[i].remote->sw); sw->ports[i].remote = NULL; + if (sw->ports[i].xdomain) + tb_xdomain_remove(sw->ports[i].xdomain); + sw->ports[i].xdomain = NULL; } if (!sw->is_unplugged) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index ea21d927bd09..74af9d4929ab 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -9,6 +9,7 @@ #include #include +#include #include #include "tb_regs.h" @@ -109,14 +110,25 @@ struct tb_switch { /** * struct tb_port - a thunderbolt port, part of a tb_switch + * @config: Cached port configuration read from registers + * @sw: Switch the port belongs to + * @remote: Remote port (%NULL if not connected) + * @xdomain: Remote host (%NULL if not connected) + * @cap_phy: Offset, zero if not found + * @port: Port number on switch + * @disabled: Disabled by eeprom + * @dual_link_port: If the switch is connected using two ports, points + * to the other port. + * @link_nr: Is this primary or secondary port on the dual_link. */ struct tb_port { struct tb_regs_port_header config; struct tb_switch *sw; - struct tb_port *remote; /* remote port, NULL if not connected */ - int cap_phy; /* offset, zero if not found */ - u8 port; /* port number on switch */ - bool disabled; /* disabled by eeprom */ + struct tb_port *remote; + struct tb_xdomain *xdomain; + int cap_phy; + u8 port; + bool disabled; struct tb_port *dual_link_port; u8 link_nr:1; }; @@ -189,6 +201,8 @@ struct tb_path { * @add_switch_key: Add key to switch * @challenge_switch_key: Challenge switch using key * @disconnect_pcie_paths: Disconnects PCIe paths before NVM update + * @approve_xdomain_paths: Approve (establish) XDomain DMA paths + * @disconnect_xdomain_paths: Disconnect XDomain DMA paths */ struct tb_cm_ops { int (*driver_ready)(struct tb *tb); @@ -205,6 +219,8 @@ struct tb_cm_ops { int (*challenge_switch_key)(struct tb *tb, struct tb_switch *sw, const u8 *challenge, u8 *response); int (*disconnect_pcie_paths)(struct tb *tb); + int (*approve_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd); + int (*disconnect_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd); }; static inline void *tb_priv(struct tb *tb) @@ -331,6 +347,8 @@ extern struct device_type tb_switch_type; int tb_domain_init(void); void tb_domain_exit(void); void tb_switch_exit(void); +int tb_xdomain_init(void); +void tb_xdomain_exit(void); struct tb *tb_domain_alloc(struct tb_nhi *nhi, size_t privsize); int tb_domain_add(struct tb *tb); @@ -343,6 +361,9 @@ int tb_domain_approve_switch(struct tb *tb, struct tb_switch *sw); int tb_domain_approve_switch_key(struct tb *tb, struct tb_switch *sw); int tb_domain_challenge_switch_key(struct tb *tb, struct tb_switch *sw); int tb_domain_disconnect_pcie_paths(struct tb *tb); +int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd); +int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd); +int tb_domain_disconnect_all_paths(struct tb *tb); static inline void tb_domain_put(struct tb *tb) { @@ -422,4 +443,14 @@ static inline u64 tb_downstream_route(struct tb_port *port) | ((u64) port->port << (port->sw->config.depth * 8)); } +bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type, + const void *buf, size_t size); +struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent, + u64 route, const uuid_t *local_uuid, + const uuid_t *remote_uuid); +void tb_xdomain_add(struct tb_xdomain *xd); +void tb_xdomain_remove(struct tb_xdomain *xd); +struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link, + u8 depth); + #endif diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index f2b2550cd97c..b0a092baa605 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -101,11 +101,14 @@ enum icm_pkg_code { ICM_CHALLENGE_DEVICE = 0x5, ICM_ADD_DEVICE_KEY = 0x6, ICM_GET_ROUTE = 0xa, + ICM_APPROVE_XDOMAIN = 0x10, }; enum icm_event_code { ICM_EVENT_DEVICE_CONNECTED = 3, ICM_EVENT_DEVICE_DISCONNECTED = 4, + ICM_EVENT_XDOMAIN_CONNECTED = 6, + ICM_EVENT_XDOMAIN_DISCONNECTED = 7, }; struct icm_pkg_header { @@ -188,6 +191,25 @@ struct icm_fr_event_device_disconnected { u16 link_info; }; +struct icm_fr_event_xdomain_connected { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + uuid_t local_uuid; + u32 local_route_hi; + u32 local_route_lo; + u32 remote_route_hi; + u32 remote_route_lo; +}; + +struct icm_fr_event_xdomain_disconnected { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; +}; + struct icm_fr_pkg_add_device_key { struct icm_pkg_header hdr; uuid_t ep_uuid; @@ -224,6 +246,28 @@ struct icm_fr_pkg_challenge_device_response { u32 response[8]; }; +struct icm_fr_pkg_approve_xdomain { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; +}; + +struct icm_fr_pkg_approve_xdomain_response { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; +}; + /* Alpine Ridge only messages */ struct icm_ar_pkg_get_route { @@ -240,4 +284,83 @@ struct icm_ar_pkg_get_route_response { u32 route_lo; }; +/* XDomain messages */ + +struct tb_xdomain_header { + u32 route_hi; + u32 route_lo; + u32 length_sn; +}; + +#define TB_XDOMAIN_LENGTH_MASK GENMASK(5, 0) +#define TB_XDOMAIN_SN_MASK GENMASK(28, 27) +#define TB_XDOMAIN_SN_SHIFT 27 + +enum tb_xdp_type { + UUID_REQUEST_OLD = 1, + UUID_RESPONSE = 2, + PROPERTIES_REQUEST, + PROPERTIES_RESPONSE, + PROPERTIES_CHANGED_REQUEST, + PROPERTIES_CHANGED_RESPONSE, + ERROR_RESPONSE, + UUID_REQUEST = 12, +}; + +struct tb_xdp_header { + struct tb_xdomain_header xd_hdr; + uuid_t uuid; + u32 type; +}; + +struct tb_xdp_properties { + struct tb_xdp_header hdr; + uuid_t src_uuid; + uuid_t dst_uuid; + u16 offset; + u16 reserved; +}; + +struct tb_xdp_properties_response { + struct tb_xdp_header hdr; + uuid_t src_uuid; + uuid_t dst_uuid; + u16 offset; + u16 data_length; + u32 generation; + u32 data[0]; +}; + +/* + * Max length of data array single XDomain property response is allowed + * to carry. + */ +#define TB_XDP_PROPERTIES_MAX_DATA_LENGTH \ + (((256 - 4 - sizeof(struct tb_xdp_properties_response))) / 4) + +/* Maximum size of the total property block in dwords we allow */ +#define TB_XDP_PROPERTIES_MAX_LENGTH 500 + +struct tb_xdp_properties_changed { + struct tb_xdp_header hdr; + uuid_t src_uuid; +}; + +struct tb_xdp_properties_changed_response { + struct tb_xdp_header hdr; +}; + +enum tb_xdp_error { + ERROR_SUCCESS, + ERROR_UNKNOWN_PACKET, + ERROR_UNKNOWN_DOMAIN, + ERROR_NOT_SUPPORTED, + ERROR_NOT_READY, +}; + +struct tb_xdp_error_response { + struct tb_xdp_header hdr; + u32 error; +}; + #endif diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c new file mode 100644 index 000000000000..f2d06f6f7be9 --- /dev/null +++ b/drivers/thunderbolt/xdomain.c @@ -0,0 +1,1576 @@ +/* + * Thunderbolt XDomain discovery protocol support + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "tb.h" + +#define XDOMAIN_DEFAULT_TIMEOUT 5000 /* ms */ +#define XDOMAIN_PROPERTIES_RETRIES 60 +#define XDOMAIN_PROPERTIES_CHANGED_RETRIES 10 + +struct xdomain_request_work { + struct work_struct work; + struct tb_xdp_header *pkg; + struct tb *tb; +}; + +/* Serializes access to the properties and protocol handlers below */ +static DEFINE_MUTEX(xdomain_lock); + +/* Properties exposed to the remote domains */ +static struct tb_property_dir *xdomain_property_dir; +static u32 *xdomain_property_block; +static u32 xdomain_property_block_len; +static u32 xdomain_property_block_gen; + +/* Additional protocol handlers */ +static LIST_HEAD(protocol_handlers); + +/* UUID for XDomain discovery protocol: b638d70e-42ff-40bb-97c2-90e2c0b2ff07 */ +static const uuid_t tb_xdp_uuid = + UUID_INIT(0xb638d70e, 0x42ff, 0x40bb, + 0x97, 0xc2, 0x90, 0xe2, 0xc0, 0xb2, 0xff, 0x07); + +static bool tb_xdomain_match(const struct tb_cfg_request *req, + const struct ctl_pkg *pkg) +{ + switch (pkg->frame.eof) { + case TB_CFG_PKG_ERROR: + return true; + + case TB_CFG_PKG_XDOMAIN_RESP: { + const struct tb_xdp_header *res_hdr = pkg->buffer; + const struct tb_xdp_header *req_hdr = req->request; + u8 req_seq, res_seq; + + if (pkg->frame.size < req->response_size / 4) + return false; + + /* Make sure route matches */ + if ((res_hdr->xd_hdr.route_hi & ~BIT(31)) != + req_hdr->xd_hdr.route_hi) + return false; + if ((res_hdr->xd_hdr.route_lo) != req_hdr->xd_hdr.route_lo) + return false; + + /* Then check that the sequence number matches */ + res_seq = res_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; + res_seq >>= TB_XDOMAIN_SN_SHIFT; + req_seq = req_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; + req_seq >>= TB_XDOMAIN_SN_SHIFT; + if (res_seq != req_seq) + return false; + + /* Check that the XDomain protocol matches */ + if (!uuid_equal(&res_hdr->uuid, &req_hdr->uuid)) + return false; + + return true; + } + + default: + return false; + } +} + +static bool tb_xdomain_copy(struct tb_cfg_request *req, + const struct ctl_pkg *pkg) +{ + memcpy(req->response, pkg->buffer, req->response_size); + req->result.err = 0; + return true; +} + +static void response_ready(void *data) +{ + tb_cfg_request_put(data); +} + +static int __tb_xdomain_response(struct tb_ctl *ctl, const void *response, + size_t size, enum tb_cfg_pkg_type type) +{ + struct tb_cfg_request *req; + + req = tb_cfg_request_alloc(); + if (!req) + return -ENOMEM; + + req->match = tb_xdomain_match; + req->copy = tb_xdomain_copy; + req->request = response; + req->request_size = size; + req->request_type = type; + + return tb_cfg_request(ctl, req, response_ready, req); +} + +/** + * tb_xdomain_response() - Send a XDomain response message + * @xd: XDomain to send the message + * @response: Response to send + * @size: Size of the response + * @type: PDF type of the response + * + * This can be used to send a XDomain response message to the other + * domain. No response for the message is expected. + * + * Return: %0 in case of success and negative errno in case of failure + */ +int tb_xdomain_response(struct tb_xdomain *xd, const void *response, + size_t size, enum tb_cfg_pkg_type type) +{ + return __tb_xdomain_response(xd->tb->ctl, response, size, type); +} +EXPORT_SYMBOL_GPL(tb_xdomain_response); + +static int __tb_xdomain_request(struct tb_ctl *ctl, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, void *response, + size_t response_size, enum tb_cfg_pkg_type response_type, + unsigned int timeout_msec) +{ + struct tb_cfg_request *req; + struct tb_cfg_result res; + + req = tb_cfg_request_alloc(); + if (!req) + return -ENOMEM; + + req->match = tb_xdomain_match; + req->copy = tb_xdomain_copy; + req->request = request; + req->request_size = request_size; + req->request_type = request_type; + req->response = response; + req->response_size = response_size; + req->response_type = response_type; + + res = tb_cfg_request_sync(ctl, req, timeout_msec); + + tb_cfg_request_put(req); + + return res.err == 1 ? -EIO : res.err; +} + +/** + * tb_xdomain_request() - Send a XDomain request + * @xd: XDomain to send the request + * @request: Request to send + * @request_size: Size of the request in bytes + * @request_type: PDF type of the request + * @response: Response is copied here + * @response_size: Expected size of the response in bytes + * @response_type: Expected PDF type of the response + * @timeout_msec: Timeout in milliseconds to wait for the response + * + * This function can be used to send XDomain control channel messages to + * the other domain. The function waits until the response is received + * or when timeout triggers. Whichever comes first. + * + * Return: %0 in case of success and negative errno in case of failure + */ +int tb_xdomain_request(struct tb_xdomain *xd, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, + void *response, size_t response_size, + enum tb_cfg_pkg_type response_type, unsigned int timeout_msec) +{ + return __tb_xdomain_request(xd->tb->ctl, request, request_size, + request_type, response, response_size, + response_type, timeout_msec); +} +EXPORT_SYMBOL_GPL(tb_xdomain_request); + +static inline void tb_xdp_fill_header(struct tb_xdp_header *hdr, u64 route, + u8 sequence, enum tb_xdp_type type, size_t size) +{ + u32 length_sn; + + length_sn = (size - sizeof(hdr->xd_hdr)) / 4; + length_sn |= (sequence << TB_XDOMAIN_SN_SHIFT) & TB_XDOMAIN_SN_MASK; + + hdr->xd_hdr.route_hi = upper_32_bits(route); + hdr->xd_hdr.route_lo = lower_32_bits(route); + hdr->xd_hdr.length_sn = length_sn; + hdr->type = type; + memcpy(&hdr->uuid, &tb_xdp_uuid, sizeof(tb_xdp_uuid)); +} + +static int tb_xdp_handle_error(const struct tb_xdp_header *hdr) +{ + const struct tb_xdp_error_response *error; + + if (hdr->type != ERROR_RESPONSE) + return 0; + + error = (const struct tb_xdp_error_response *)hdr; + + switch (error->error) { + case ERROR_UNKNOWN_PACKET: + case ERROR_UNKNOWN_DOMAIN: + return -EIO; + case ERROR_NOT_SUPPORTED: + return -ENOTSUPP; + case ERROR_NOT_READY: + return -EAGAIN; + default: + break; + } + + return 0; +} + +static int tb_xdp_error_response(struct tb_ctl *ctl, u64 route, u8 sequence, + enum tb_xdp_error error) +{ + struct tb_xdp_error_response res; + + memset(&res, 0, sizeof(res)); + tb_xdp_fill_header(&res.hdr, route, sequence, ERROR_RESPONSE, + sizeof(res)); + res.error = error; + + return __tb_xdomain_response(ctl, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tb_xdp_properties_request(struct tb_ctl *ctl, u64 route, + const uuid_t *src_uuid, const uuid_t *dst_uuid, int retry, + u32 **block, u32 *generation) +{ + struct tb_xdp_properties_response *res; + struct tb_xdp_properties req; + u16 data_len, len; + size_t total_size; + u32 *data = NULL; + int ret; + + total_size = sizeof(*res) + TB_XDP_PROPERTIES_MAX_DATA_LENGTH * 4; + res = kzalloc(total_size, GFP_KERNEL); + if (!res) + return -ENOMEM; + + memset(&req, 0, sizeof(req)); + tb_xdp_fill_header(&req.hdr, route, retry % 4, PROPERTIES_REQUEST, + sizeof(req)); + memcpy(&req.src_uuid, src_uuid, sizeof(*src_uuid)); + memcpy(&req.dst_uuid, dst_uuid, sizeof(*dst_uuid)); + + len = 0; + data_len = 0; + + do { + ret = __tb_xdomain_request(ctl, &req, sizeof(req), + TB_CFG_PKG_XDOMAIN_REQ, res, + total_size, TB_CFG_PKG_XDOMAIN_RESP, + XDOMAIN_DEFAULT_TIMEOUT); + if (ret) + goto err; + + ret = tb_xdp_handle_error(&res->hdr); + if (ret) + goto err; + + /* + * Package length includes the whole payload without the + * XDomain header. Validate first that the package is at + * least size of the response structure. + */ + len = res->hdr.xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK; + if (len < sizeof(*res) / 4) { + ret = -EINVAL; + goto err; + } + + len += sizeof(res->hdr.xd_hdr) / 4; + len -= sizeof(*res) / 4; + + if (res->offset != req.offset) { + ret = -EINVAL; + goto err; + } + + /* + * First time allocate block that has enough space for + * the whole properties block. + */ + if (!data) { + data_len = res->data_length; + if (data_len > TB_XDP_PROPERTIES_MAX_LENGTH) { + ret = -E2BIG; + goto err; + } + + data = kcalloc(data_len, sizeof(u32), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err; + } + } + + memcpy(data + req.offset, res->data, len * 4); + req.offset += len; + } while (!data_len || req.offset < data_len); + + *block = data; + *generation = res->generation; + + kfree(res); + + return data_len; + +err: + kfree(data); + kfree(res); + + return ret; +} + +static int tb_xdp_properties_response(struct tb *tb, struct tb_ctl *ctl, + u64 route, u8 sequence, const uuid_t *src_uuid, + const struct tb_xdp_properties *req) +{ + struct tb_xdp_properties_response *res; + size_t total_size; + u16 len; + int ret; + + /* + * Currently we expect all requests to be directed to us. The + * protocol supports forwarding, though which we might add + * support later on. + */ + if (!uuid_equal(src_uuid, &req->dst_uuid)) { + tb_xdp_error_response(ctl, route, sequence, + ERROR_UNKNOWN_DOMAIN); + return 0; + } + + mutex_lock(&xdomain_lock); + + if (req->offset >= xdomain_property_block_len) { + mutex_unlock(&xdomain_lock); + return -EINVAL; + } + + len = xdomain_property_block_len - req->offset; + len = min_t(u16, len, TB_XDP_PROPERTIES_MAX_DATA_LENGTH); + total_size = sizeof(*res) + len * 4; + + res = kzalloc(total_size, GFP_KERNEL); + if (!res) { + mutex_unlock(&xdomain_lock); + return -ENOMEM; + } + + tb_xdp_fill_header(&res->hdr, route, sequence, PROPERTIES_RESPONSE, + total_size); + res->generation = xdomain_property_block_gen; + res->data_length = xdomain_property_block_len; + res->offset = req->offset; + uuid_copy(&res->src_uuid, src_uuid); + uuid_copy(&res->dst_uuid, &req->src_uuid); + memcpy(res->data, &xdomain_property_block[req->offset], len * 4); + + mutex_unlock(&xdomain_lock); + + ret = __tb_xdomain_response(ctl, res, total_size, + TB_CFG_PKG_XDOMAIN_RESP); + + kfree(res); + return ret; +} + +static int tb_xdp_properties_changed_request(struct tb_ctl *ctl, u64 route, + int retry, const uuid_t *uuid) +{ + struct tb_xdp_properties_changed_response res; + struct tb_xdp_properties_changed req; + int ret; + + memset(&req, 0, sizeof(req)); + tb_xdp_fill_header(&req.hdr, route, retry % 4, + PROPERTIES_CHANGED_REQUEST, sizeof(req)); + uuid_copy(&req.src_uuid, uuid); + + memset(&res, 0, sizeof(res)); + ret = __tb_xdomain_request(ctl, &req, sizeof(req), + TB_CFG_PKG_XDOMAIN_REQ, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP, + XDOMAIN_DEFAULT_TIMEOUT); + if (ret) + return ret; + + return tb_xdp_handle_error(&res.hdr); +} + +static int +tb_xdp_properties_changed_response(struct tb_ctl *ctl, u64 route, u8 sequence) +{ + struct tb_xdp_properties_changed_response res; + + memset(&res, 0, sizeof(res)); + tb_xdp_fill_header(&res.hdr, route, sequence, + PROPERTIES_CHANGED_RESPONSE, sizeof(res)); + return __tb_xdomain_response(ctl, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP); +} + +/** + * tb_register_protocol_handler() - Register protocol handler + * @handler: Handler to register + * + * This allows XDomain service drivers to hook into incoming XDomain + * messages. After this function is called the service driver needs to + * be able to handle calls to callback whenever a package with the + * registered protocol is received. + */ +int tb_register_protocol_handler(struct tb_protocol_handler *handler) +{ + if (!handler->uuid || !handler->callback) + return -EINVAL; + if (uuid_equal(handler->uuid, &tb_xdp_uuid)) + return -EINVAL; + + mutex_lock(&xdomain_lock); + list_add_tail(&handler->list, &protocol_handlers); + mutex_unlock(&xdomain_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(tb_register_protocol_handler); + +/** + * tb_unregister_protocol_handler() - Unregister protocol handler + * @handler: Handler to unregister + * + * Removes the previously registered protocol handler. + */ +void tb_unregister_protocol_handler(struct tb_protocol_handler *handler) +{ + mutex_lock(&xdomain_lock); + list_del_init(&handler->list); + mutex_unlock(&xdomain_lock); +} +EXPORT_SYMBOL_GPL(tb_unregister_protocol_handler); + +static void tb_xdp_handle_request(struct work_struct *work) +{ + struct xdomain_request_work *xw = container_of(work, typeof(*xw), work); + const struct tb_xdp_header *pkg = xw->pkg; + const struct tb_xdomain_header *xhdr = &pkg->xd_hdr; + struct tb *tb = xw->tb; + struct tb_ctl *ctl = tb->ctl; + const uuid_t *uuid; + int ret = 0; + u8 sequence; + u64 route; + + route = ((u64)xhdr->route_hi << 32 | xhdr->route_lo) & ~BIT_ULL(63); + sequence = xhdr->length_sn & TB_XDOMAIN_SN_MASK; + sequence >>= TB_XDOMAIN_SN_SHIFT; + + mutex_lock(&tb->lock); + if (tb->root_switch) + uuid = tb->root_switch->uuid; + else + uuid = NULL; + mutex_unlock(&tb->lock); + + if (!uuid) { + tb_xdp_error_response(ctl, route, sequence, ERROR_NOT_READY); + goto out; + } + + switch (pkg->type) { + case PROPERTIES_REQUEST: + ret = tb_xdp_properties_response(tb, ctl, route, sequence, uuid, + (const struct tb_xdp_properties *)pkg); + break; + + case PROPERTIES_CHANGED_REQUEST: { + const struct tb_xdp_properties_changed *xchg = + (const struct tb_xdp_properties_changed *)pkg; + struct tb_xdomain *xd; + + ret = tb_xdp_properties_changed_response(ctl, route, sequence); + + /* + * Since the properties have been changed, let's update + * the xdomain related to this connection as well in + * case there is a change in services it offers. + */ + xd = tb_xdomain_find_by_uuid_locked(tb, &xchg->src_uuid); + if (xd) { + queue_delayed_work(tb->wq, &xd->get_properties_work, + msecs_to_jiffies(50)); + tb_xdomain_put(xd); + } + + break; + } + + default: + break; + } + + if (ret) { + tb_warn(tb, "failed to send XDomain response for %#x\n", + pkg->type); + } + +out: + kfree(xw->pkg); + kfree(xw); +} + +static void +tb_xdp_schedule_request(struct tb *tb, const struct tb_xdp_header *hdr, + size_t size) +{ + struct xdomain_request_work *xw; + + xw = kmalloc(sizeof(*xw), GFP_KERNEL); + if (!xw) + return; + + INIT_WORK(&xw->work, tb_xdp_handle_request); + xw->pkg = kmemdup(hdr, size, GFP_KERNEL); + xw->tb = tb; + + queue_work(tb->wq, &xw->work); +} + +/** + * tb_register_service_driver() - Register XDomain service driver + * @drv: Driver to register + * + * Registers new service driver from @drv to the bus. + */ +int tb_register_service_driver(struct tb_service_driver *drv) +{ + drv->driver.bus = &tb_bus_type; + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(tb_register_service_driver); + +/** + * tb_unregister_service_driver() - Unregister XDomain service driver + * @xdrv: Driver to unregister + * + * Unregisters XDomain service driver from the bus. + */ +void tb_unregister_service_driver(struct tb_service_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(tb_unregister_service_driver); + +static ssize_t key_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + /* + * It should be null terminated but anything else is pretty much + * allowed. + */ + return sprintf(buf, "%*pEp\n", (int)strlen(svc->key), svc->key); +} +static DEVICE_ATTR_RO(key); + +static int get_modalias(struct tb_service *svc, char *buf, size_t size) +{ + return snprintf(buf, size, "tbsvc:k%sp%08Xv%08Xr%08X", svc->key, + svc->prtcid, svc->prtcvers, svc->prtcrevs); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + /* Full buffer size except new line and null termination */ + get_modalias(svc, buf, PAGE_SIZE - 2); + return sprintf(buf, "%s\n", buf); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t prtcid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcid); +} +static DEVICE_ATTR_RO(prtcid); + +static ssize_t prtcvers_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcvers); +} +static DEVICE_ATTR_RO(prtcvers); + +static ssize_t prtcrevs_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcrevs); +} +static DEVICE_ATTR_RO(prtcrevs); + +static ssize_t prtcstns_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "0x%08x\n", svc->prtcstns); +} +static DEVICE_ATTR_RO(prtcstns); + +static struct attribute *tb_service_attrs[] = { + &dev_attr_key.attr, + &dev_attr_modalias.attr, + &dev_attr_prtcid.attr, + &dev_attr_prtcvers.attr, + &dev_attr_prtcrevs.attr, + &dev_attr_prtcstns.attr, + NULL, +}; + +static struct attribute_group tb_service_attr_group = { + .attrs = tb_service_attrs, +}; + +static const struct attribute_group *tb_service_attr_groups[] = { + &tb_service_attr_group, + NULL, +}; + +static int tb_service_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + char modalias[64]; + + get_modalias(svc, modalias, sizeof(modalias)); + return add_uevent_var(env, "MODALIAS=%s", modalias); +} + +static void tb_service_release(struct device *dev) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + struct tb_xdomain *xd = tb_service_parent(svc); + + ida_simple_remove(&xd->service_ids, svc->id); + kfree(svc->key); + kfree(svc); +} + +struct device_type tb_service_type = { + .name = "thunderbolt_service", + .groups = tb_service_attr_groups, + .uevent = tb_service_uevent, + .release = tb_service_release, +}; +EXPORT_SYMBOL_GPL(tb_service_type); + +static int remove_missing_service(struct device *dev, void *data) +{ + struct tb_xdomain *xd = data; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return 0; + + if (!tb_property_find(xd->properties, svc->key, + TB_PROPERTY_TYPE_DIRECTORY)) + device_unregister(dev); + + return 0; +} + +static int find_service(struct device *dev, void *data) +{ + const struct tb_property *p = data; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return 0; + + return !strcmp(svc->key, p->key); +} + +static int populate_service(struct tb_service *svc, + struct tb_property *property) +{ + struct tb_property_dir *dir = property->value.dir; + struct tb_property *p; + + /* Fill in standard properties */ + p = tb_property_find(dir, "prtcid", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcid = p->value.immediate; + p = tb_property_find(dir, "prtcvers", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcvers = p->value.immediate; + p = tb_property_find(dir, "prtcrevs", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcrevs = p->value.immediate; + p = tb_property_find(dir, "prtcstns", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcstns = p->value.immediate; + + svc->key = kstrdup(property->key, GFP_KERNEL); + if (!svc->key) + return -ENOMEM; + + return 0; +} + +static void enumerate_services(struct tb_xdomain *xd) +{ + struct tb_service *svc; + struct tb_property *p; + struct device *dev; + + /* + * First remove all services that are not available anymore in + * the updated property block. + */ + device_for_each_child_reverse(&xd->dev, xd, remove_missing_service); + + /* Then re-enumerate properties creating new services as we go */ + tb_property_for_each(xd->properties, p) { + if (p->type != TB_PROPERTY_TYPE_DIRECTORY) + continue; + + /* If the service exists already we are fine */ + dev = device_find_child(&xd->dev, p, find_service); + if (dev) { + put_device(dev); + continue; + } + + svc = kzalloc(sizeof(*svc), GFP_KERNEL); + if (!svc) + break; + + if (populate_service(svc, p)) { + kfree(svc); + break; + } + + svc->id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); + svc->dev.bus = &tb_bus_type; + svc->dev.type = &tb_service_type; + svc->dev.parent = &xd->dev; + dev_set_name(&svc->dev, "%s.%d", dev_name(&xd->dev), svc->id); + + if (device_register(&svc->dev)) { + put_device(&svc->dev); + break; + } + } +} + +static int populate_properties(struct tb_xdomain *xd, + struct tb_property_dir *dir) +{ + const struct tb_property *p; + + /* Required properties */ + p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_VALUE); + if (!p) + return -EINVAL; + xd->device = p->value.immediate; + + p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_VALUE); + if (!p) + return -EINVAL; + xd->vendor = p->value.immediate; + + kfree(xd->device_name); + xd->device_name = NULL; + kfree(xd->vendor_name); + xd->vendor_name = NULL; + + /* Optional properties */ + p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_TEXT); + if (p) + xd->device_name = kstrdup(p->value.text, GFP_KERNEL); + p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_TEXT); + if (p) + xd->vendor_name = kstrdup(p->value.text, GFP_KERNEL); + + return 0; +} + +/* Called with @xd->lock held */ +static void tb_xdomain_restore_paths(struct tb_xdomain *xd) +{ + if (!xd->resume) + return; + + xd->resume = false; + if (xd->transmit_path) { + dev_dbg(&xd->dev, "re-establishing DMA path\n"); + tb_domain_approve_xdomain_paths(xd->tb, xd); + } +} + +static void tb_xdomain_get_properties(struct work_struct *work) +{ + struct tb_xdomain *xd = container_of(work, typeof(*xd), + get_properties_work.work); + struct tb_property_dir *dir; + struct tb *tb = xd->tb; + bool update = false; + u32 *block = NULL; + u32 gen = 0; + int ret; + + ret = tb_xdp_properties_request(tb->ctl, xd->route, xd->local_uuid, + xd->remote_uuid, xd->properties_retries, + &block, &gen); + if (ret < 0) { + if (xd->properties_retries-- > 0) { + queue_delayed_work(xd->tb->wq, &xd->get_properties_work, + msecs_to_jiffies(1000)); + } else { + /* Give up now */ + dev_err(&xd->dev, + "failed read XDomain properties from %pUb\n", + xd->remote_uuid); + } + return; + } + + xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES; + + mutex_lock(&xd->lock); + + /* Only accept newer generation properties */ + if (xd->properties && gen <= xd->property_block_gen) { + /* + * On resume it is likely that the properties block is + * not changed (unless the other end added or removed + * services). However, we need to make sure the existing + * DMA paths are restored properly. + */ + tb_xdomain_restore_paths(xd); + goto err_free_block; + } + + dir = tb_property_parse_dir(block, ret); + if (!dir) { + dev_err(&xd->dev, "failed to parse XDomain properties\n"); + goto err_free_block; + } + + ret = populate_properties(xd, dir); + if (ret) { + dev_err(&xd->dev, "missing XDomain properties in response\n"); + goto err_free_dir; + } + + /* Release the existing one */ + if (xd->properties) { + tb_property_free_dir(xd->properties); + update = true; + } + + xd->properties = dir; + xd->property_block_gen = gen; + + tb_xdomain_restore_paths(xd); + + mutex_unlock(&xd->lock); + + kfree(block); + + /* + * Now the device should be ready enough so we can add it to the + * bus and let userspace know about it. If the device is already + * registered, we notify the userspace that it has changed. + */ + if (!update) { + if (device_add(&xd->dev)) { + dev_err(&xd->dev, "failed to add XDomain device\n"); + return; + } + } else { + kobject_uevent(&xd->dev.kobj, KOBJ_CHANGE); + } + + enumerate_services(xd); + return; + +err_free_dir: + tb_property_free_dir(dir); +err_free_block: + kfree(block); + mutex_unlock(&xd->lock); +} + +static void tb_xdomain_properties_changed(struct work_struct *work) +{ + struct tb_xdomain *xd = container_of(work, typeof(*xd), + properties_changed_work.work); + int ret; + + ret = tb_xdp_properties_changed_request(xd->tb->ctl, xd->route, + xd->properties_changed_retries, xd->local_uuid); + if (ret) { + if (xd->properties_changed_retries-- > 0) + queue_delayed_work(xd->tb->wq, + &xd->properties_changed_work, + msecs_to_jiffies(1000)); + return; + } + + xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES; +} + +static ssize_t device_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%#x\n", xd->device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t +device_name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + int ret; + + if (mutex_lock_interruptible(&xd->lock)) + return -ERESTARTSYS; + ret = sprintf(buf, "%s\n", xd->device_name ? xd->device_name : ""); + mutex_unlock(&xd->lock); + + return ret; +} +static DEVICE_ATTR_RO(device_name); + +static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%#x\n", xd->vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t +vendor_name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + int ret; + + if (mutex_lock_interruptible(&xd->lock)) + return -ERESTARTSYS; + ret = sprintf(buf, "%s\n", xd->vendor_name ? xd->vendor_name : ""); + mutex_unlock(&xd->lock); + + return ret; +} +static DEVICE_ATTR_RO(vendor_name); + +static ssize_t unique_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%pUb\n", xd->remote_uuid); +} +static DEVICE_ATTR_RO(unique_id); + +static struct attribute *xdomain_attrs[] = { + &dev_attr_device.attr, + &dev_attr_device_name.attr, + &dev_attr_unique_id.attr, + &dev_attr_vendor.attr, + &dev_attr_vendor_name.attr, + NULL, +}; + +static struct attribute_group xdomain_attr_group = { + .attrs = xdomain_attrs, +}; + +static const struct attribute_group *xdomain_attr_groups[] = { + &xdomain_attr_group, + NULL, +}; + +static void tb_xdomain_release(struct device *dev) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + put_device(xd->dev.parent); + + tb_property_free_dir(xd->properties); + ida_destroy(&xd->service_ids); + + kfree(xd->local_uuid); + kfree(xd->remote_uuid); + kfree(xd->device_name); + kfree(xd->vendor_name); + kfree(xd); +} + +static void start_handshake(struct tb_xdomain *xd) +{ + xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES; + xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES; + + /* Start exchanging properties with the other host */ + queue_delayed_work(xd->tb->wq, &xd->properties_changed_work, + msecs_to_jiffies(100)); + queue_delayed_work(xd->tb->wq, &xd->get_properties_work, + msecs_to_jiffies(1000)); +} + +static void stop_handshake(struct tb_xdomain *xd) +{ + xd->properties_retries = 0; + xd->properties_changed_retries = 0; + + cancel_delayed_work_sync(&xd->get_properties_work); + cancel_delayed_work_sync(&xd->properties_changed_work); +} + +static int __maybe_unused tb_xdomain_suspend(struct device *dev) +{ + stop_handshake(tb_to_xdomain(dev)); + return 0; +} + +static int __maybe_unused tb_xdomain_resume(struct device *dev) +{ + struct tb_xdomain *xd = tb_to_xdomain(dev); + + /* + * Ask tb_xdomain_get_properties() restore any existing DMA + * paths after properties are re-read. + */ + xd->resume = true; + start_handshake(xd); + + return 0; +} + +static const struct dev_pm_ops tb_xdomain_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tb_xdomain_suspend, tb_xdomain_resume) +}; + +struct device_type tb_xdomain_type = { + .name = "thunderbolt_xdomain", + .release = tb_xdomain_release, + .pm = &tb_xdomain_pm_ops, +}; +EXPORT_SYMBOL_GPL(tb_xdomain_type); + +/** + * tb_xdomain_alloc() - Allocate new XDomain object + * @tb: Domain where the XDomain belongs + * @parent: Parent device (the switch through the connection to the + * other domain is reached). + * @route: Route string used to reach the other domain + * @local_uuid: Our local domain UUID + * @remote_uuid: UUID of the other domain + * + * Allocates new XDomain structure and returns pointer to that. The + * object must be released by calling tb_xdomain_put(). + */ +struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent, + u64 route, const uuid_t *local_uuid, + const uuid_t *remote_uuid) +{ + struct tb_xdomain *xd; + + xd = kzalloc(sizeof(*xd), GFP_KERNEL); + if (!xd) + return NULL; + + xd->tb = tb; + xd->route = route; + ida_init(&xd->service_ids); + mutex_init(&xd->lock); + INIT_DELAYED_WORK(&xd->get_properties_work, tb_xdomain_get_properties); + INIT_DELAYED_WORK(&xd->properties_changed_work, + tb_xdomain_properties_changed); + + xd->local_uuid = kmemdup(local_uuid, sizeof(uuid_t), GFP_KERNEL); + if (!xd->local_uuid) + goto err_free; + + xd->remote_uuid = kmemdup(remote_uuid, sizeof(uuid_t), GFP_KERNEL); + if (!xd->remote_uuid) + goto err_free_local_uuid; + + device_initialize(&xd->dev); + xd->dev.parent = get_device(parent); + xd->dev.bus = &tb_bus_type; + xd->dev.type = &tb_xdomain_type; + xd->dev.groups = xdomain_attr_groups; + dev_set_name(&xd->dev, "%u-%llx", tb->index, route); + + return xd; + +err_free_local_uuid: + kfree(xd->local_uuid); +err_free: + kfree(xd); + + return NULL; +} + +/** + * tb_xdomain_add() - Add XDomain to the bus + * @xd: XDomain to add + * + * This function starts XDomain discovery protocol handshake and + * eventually adds the XDomain to the bus. After calling this function + * the caller needs to call tb_xdomain_remove() in order to remove and + * release the object regardless whether the handshake succeeded or not. + */ +void tb_xdomain_add(struct tb_xdomain *xd) +{ + /* Start exchanging properties with the other host */ + start_handshake(xd); +} + +static int unregister_service(struct device *dev, void *data) +{ + device_unregister(dev); + return 0; +} + +/** + * tb_xdomain_remove() - Remove XDomain from the bus + * @xd: XDomain to remove + * + * This will stop all ongoing configuration work and remove the XDomain + * along with any services from the bus. When the last reference to @xd + * is released the object will be released as well. + */ +void tb_xdomain_remove(struct tb_xdomain *xd) +{ + stop_handshake(xd); + + device_for_each_child_reverse(&xd->dev, xd, unregister_service); + + if (!device_is_registered(&xd->dev)) + put_device(&xd->dev); + else + device_unregister(&xd->dev); +} + +/** + * tb_xdomain_enable_paths() - Enable DMA paths for XDomain connection + * @xd: XDomain connection + * @transmit_path: HopID of the transmit path the other end is using to + * send packets + * @transmit_ring: DMA ring used to receive packets from the other end + * @receive_path: HopID of the receive path the other end is using to + * receive packets + * @receive_ring: DMA ring used to send packets to the other end + * + * The function enables DMA paths accordingly so that after successful + * return the caller can send and receive packets using high-speed DMA + * path. + * + * Return: %0 in case of success and negative errno in case of error + */ +int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, + u16 transmit_ring, u16 receive_path, + u16 receive_ring) +{ + int ret; + + mutex_lock(&xd->lock); + + if (xd->transmit_path) { + ret = xd->transmit_path == transmit_path ? 0 : -EBUSY; + goto exit_unlock; + } + + xd->transmit_path = transmit_path; + xd->transmit_ring = transmit_ring; + xd->receive_path = receive_path; + xd->receive_ring = receive_ring; + + ret = tb_domain_approve_xdomain_paths(xd->tb, xd); + +exit_unlock: + mutex_unlock(&xd->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tb_xdomain_enable_paths); + +/** + * tb_xdomain_disable_paths() - Disable DMA paths for XDomain connection + * @xd: XDomain connection + * + * This does the opposite of tb_xdomain_enable_paths(). After call to + * this the caller is not expected to use the rings anymore. + * + * Return: %0 in case of success and negative errno in case of error + */ +int tb_xdomain_disable_paths(struct tb_xdomain *xd) +{ + int ret = 0; + + mutex_lock(&xd->lock); + if (xd->transmit_path) { + xd->transmit_path = 0; + xd->transmit_ring = 0; + xd->receive_path = 0; + xd->receive_ring = 0; + + ret = tb_domain_disconnect_xdomain_paths(xd->tb, xd); + } + mutex_unlock(&xd->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tb_xdomain_disable_paths); + +struct tb_xdomain_lookup { + const uuid_t *uuid; + u8 link; + u8 depth; +}; + +static struct tb_xdomain *switch_find_xdomain(struct tb_switch *sw, + const struct tb_xdomain_lookup *lookup) +{ + int i; + + for (i = 1; i <= sw->config.max_port_number; i++) { + struct tb_port *port = &sw->ports[i]; + struct tb_xdomain *xd; + + if (tb_is_upstream_port(port)) + continue; + + if (port->xdomain) { + xd = port->xdomain; + + if (lookup->uuid) { + if (uuid_equal(xd->remote_uuid, lookup->uuid)) + return xd; + } else if (lookup->link == xd->link && + lookup->depth == xd->depth) { + return xd; + } + } else if (port->remote) { + xd = switch_find_xdomain(port->remote->sw, lookup); + if (xd) + return xd; + } + } + + return NULL; +} + +/** + * tb_xdomain_find_by_uuid() - Find an XDomain by UUID + * @tb: Domain where the XDomain belongs to + * @uuid: UUID to look for + * + * Finds XDomain by walking through the Thunderbolt topology below @tb. + * The returned XDomain will have its reference count increased so the + * caller needs to call tb_xdomain_put() when it is done with the + * object. + * + * This will find all XDomains including the ones that are not yet added + * to the bus (handshake is still in progress). + * + * The caller needs to hold @tb->lock. + */ +struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid) +{ + struct tb_xdomain_lookup lookup; + struct tb_xdomain *xd; + + memset(&lookup, 0, sizeof(lookup)); + lookup.uuid = uuid; + + xd = switch_find_xdomain(tb->root_switch, &lookup); + if (xd) { + get_device(&xd->dev); + return xd; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(tb_xdomain_find_by_uuid); + +/** + * tb_xdomain_find_by_link_depth() - Find an XDomain by link and depth + * @tb: Domain where the XDomain belongs to + * @link: Root switch link number + * @depth: Depth in the link + * + * Finds XDomain by walking through the Thunderbolt topology below @tb. + * The returned XDomain will have its reference count increased so the + * caller needs to call tb_xdomain_put() when it is done with the + * object. + * + * This will find all XDomains including the ones that are not yet added + * to the bus (handshake is still in progress). + * + * The caller needs to hold @tb->lock. + */ +struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link, + u8 depth) +{ + struct tb_xdomain_lookup lookup; + struct tb_xdomain *xd; + + memset(&lookup, 0, sizeof(lookup)); + lookup.link = link; + lookup.depth = depth; + + xd = switch_find_xdomain(tb->root_switch, &lookup); + if (xd) { + get_device(&xd->dev); + return xd; + } + + return NULL; +} + +bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type, + const void *buf, size_t size) +{ + const struct tb_protocol_handler *handler, *tmp; + const struct tb_xdp_header *hdr = buf; + unsigned int length; + int ret = 0; + + /* We expect the packet is at least size of the header */ + length = hdr->xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK; + if (length != size / 4 - sizeof(hdr->xd_hdr) / 4) + return true; + if (length < sizeof(*hdr) / 4 - sizeof(hdr->xd_hdr) / 4) + return true; + + /* + * Handle XDomain discovery protocol packets directly here. For + * other protocols (based on their UUID) we call registered + * handlers in turn. + */ + if (uuid_equal(&hdr->uuid, &tb_xdp_uuid)) { + if (type == TB_CFG_PKG_XDOMAIN_REQ) { + tb_xdp_schedule_request(tb, hdr, size); + return true; + } + return false; + } + + mutex_lock(&xdomain_lock); + list_for_each_entry_safe(handler, tmp, &protocol_handlers, list) { + if (!uuid_equal(&hdr->uuid, handler->uuid)) + continue; + + mutex_unlock(&xdomain_lock); + ret = handler->callback(buf, size, handler->data); + mutex_lock(&xdomain_lock); + + if (ret) + break; + } + mutex_unlock(&xdomain_lock); + + return ret > 0; +} + +static int rebuild_property_block(void) +{ + u32 *block, len; + int ret; + + ret = tb_property_format_dir(xdomain_property_dir, NULL, 0); + if (ret < 0) + return ret; + + len = ret; + + block = kcalloc(len, sizeof(u32), GFP_KERNEL); + if (!block) + return -ENOMEM; + + ret = tb_property_format_dir(xdomain_property_dir, block, len); + if (ret) { + kfree(block); + return ret; + } + + kfree(xdomain_property_block); + xdomain_property_block = block; + xdomain_property_block_len = len; + xdomain_property_block_gen++; + + return 0; +} + +static int update_xdomain(struct device *dev, void *data) +{ + struct tb_xdomain *xd; + + xd = tb_to_xdomain(dev); + if (xd) { + queue_delayed_work(xd->tb->wq, &xd->properties_changed_work, + msecs_to_jiffies(50)); + } + + return 0; +} + +static void update_all_xdomains(void) +{ + bus_for_each_dev(&tb_bus_type, NULL, NULL, update_xdomain); +} + +static bool remove_directory(const char *key, const struct tb_property_dir *dir) +{ + struct tb_property *p; + + p = tb_property_find(xdomain_property_dir, key, + TB_PROPERTY_TYPE_DIRECTORY); + if (p && p->value.dir == dir) { + tb_property_remove(p); + return true; + } + return false; +} + +/** + * tb_register_property_dir() - Register property directory to the host + * @key: Key (name) of the directory to add + * @dir: Directory to add + * + * Service drivers can use this function to add new property directory + * to the host available properties. The other connected hosts are + * notified so they can re-read properties of this host if they are + * interested. + * + * Return: %0 on success and negative errno on failure + */ +int tb_register_property_dir(const char *key, struct tb_property_dir *dir) +{ + int ret; + + if (!key || strlen(key) > 8) + return -EINVAL; + + mutex_lock(&xdomain_lock); + if (tb_property_find(xdomain_property_dir, key, + TB_PROPERTY_TYPE_DIRECTORY)) { + ret = -EEXIST; + goto err_unlock; + } + + ret = tb_property_add_dir(xdomain_property_dir, key, dir); + if (ret) + goto err_unlock; + + ret = rebuild_property_block(); + if (ret) { + remove_directory(key, dir); + goto err_unlock; + } + + mutex_unlock(&xdomain_lock); + update_all_xdomains(); + return 0; + +err_unlock: + mutex_unlock(&xdomain_lock); + return ret; +} +EXPORT_SYMBOL_GPL(tb_register_property_dir); + +/** + * tb_unregister_property_dir() - Removes property directory from host + * @key: Key (name) of the directory + * @dir: Directory to remove + * + * This will remove the existing directory from this host and notify the + * connected hosts about the change. + */ +void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir) +{ + int ret = 0; + + mutex_lock(&xdomain_lock); + if (remove_directory(key, dir)) + ret = rebuild_property_block(); + mutex_unlock(&xdomain_lock); + + if (!ret) + update_all_xdomains(); +} +EXPORT_SYMBOL_GPL(tb_unregister_property_dir); + +int tb_xdomain_init(void) +{ + int ret; + + xdomain_property_dir = tb_property_create_dir(NULL); + if (!xdomain_property_dir) + return -ENOMEM; + + /* + * Initialize standard set of properties without any service + * directories. Those will be added by service drivers + * themselves when they are loaded. + */ + tb_property_add_immediate(xdomain_property_dir, "vendorid", + PCI_VENDOR_ID_INTEL); + tb_property_add_text(xdomain_property_dir, "vendorid", "Intel Corp."); + tb_property_add_immediate(xdomain_property_dir, "deviceid", 0x1); + tb_property_add_text(xdomain_property_dir, "deviceid", + utsname()->nodename); + tb_property_add_immediate(xdomain_property_dir, "devicerv", 0x80000100); + + ret = rebuild_property_block(); + if (ret) { + tb_property_free_dir(xdomain_property_dir); + xdomain_property_dir = NULL; + } + + return ret; +} + +void tb_xdomain_exit(void) +{ + kfree(xdomain_property_block); + tb_property_free_dir(xdomain_property_dir); +} diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 694cebb50f72..7625c3b81f84 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -683,5 +683,31 @@ struct fsl_mc_device_id { const char obj_type[16]; }; +/** + * struct tb_service_id - Thunderbolt service identifiers + * @match_flags: Flags used to match the structure + * @protocol_key: Protocol key the service supports + * @protocol_id: Protocol id the service supports + * @protocol_version: Version of the protocol + * @protocol_revision: Revision of the protocol software + * @driver_data: Driver specific data + * + * Thunderbolt XDomain services are exposed as devices where each device + * carries the protocol information the service supports. Thunderbolt + * XDomain service drivers match against that information. + */ +struct tb_service_id { + __u32 match_flags; + char protocol_key[8 + 1]; + __u32 protocol_id; + __u32 protocol_version; + __u32 protocol_revision; + kernel_ulong_t driver_data; +}; + +#define TBSVC_MATCH_PROTOCOL_KEY 0x0001 +#define TBSVC_MATCH_PROTOCOL_ID 0x0002 +#define TBSVC_MATCH_PROTOCOL_VERSION 0x0004 +#define TBSVC_MATCH_PROTOCOL_REVISION 0x0008 #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 43b8d1e09341..18c0e3d5e85c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -17,6 +17,7 @@ #include #include #include +#include #include enum tb_cfg_pkg_type { @@ -77,6 +78,8 @@ struct tb { }; extern struct bus_type tb_bus_type; +extern struct device_type tb_service_type; +extern struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 @@ -155,4 +158,243 @@ struct tb_property *tb_property_get_next(struct tb_property_dir *dir, property; \ property = tb_property_get_next(dir, property)) +int tb_register_property_dir(const char *key, struct tb_property_dir *dir); +void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); + +/** + * struct tb_xdomain - Cross-domain (XDomain) connection + * @dev: XDomain device + * @tb: Pointer to the domain + * @remote_uuid: UUID of the remote domain (host) + * @local_uuid: Cached local UUID + * @route: Route string the other domain can be reached + * @vendor: Vendor ID of the remote domain + * @device: Device ID of the demote domain + * @lock: Lock to serialize access to the following fields of this structure + * @vendor_name: Name of the vendor (or %NULL if not known) + * @device_name: Name of the device (or %NULL if not known) + * @is_unplugged: The XDomain is unplugged + * @resume: The XDomain is being resumed + * @transmit_path: HopID which the remote end expects us to transmit + * @transmit_ring: Local ring (hop) where outgoing packets are pushed + * @receive_path: HopID which we expect the remote end to transmit + * @receive_ring: Local ring (hop) where incoming packets arrive + * @service_ids: Used to generate IDs for the services + * @properties: Properties exported by the remote domain + * @property_block_gen: Generation of @properties + * @properties_lock: Lock protecting @properties. + * @get_properties_work: Work used to get remote domain properties + * @properties_retries: Number of times left to read properties + * @properties_changed_work: Work used to notify the remote domain that + * our properties have changed + * @properties_changed_retries: Number of times left to send properties + * changed notification + * @link: Root switch link the remote domain is connected (ICM only) + * @depth: Depth in the chain the remote domain is connected (ICM only) + * + * This structure represents connection across two domains (hosts). + * Each XDomain contains zero or more services which are exposed as + * &struct tb_service objects. + * + * Service drivers may access this structure if they need to enumerate + * non-standard properties but they need hold @lock when doing so + * because properties can be changed asynchronously in response to + * changes in the remote domain. + */ +struct tb_xdomain { + struct device dev; + struct tb *tb; + uuid_t *remote_uuid; + const uuid_t *local_uuid; + u64 route; + u16 vendor; + u16 device; + struct mutex lock; + const char *vendor_name; + const char *device_name; + bool is_unplugged; + bool resume; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; + struct ida service_ids; + struct tb_property_dir *properties; + u32 property_block_gen; + struct delayed_work get_properties_work; + int properties_retries; + struct delayed_work properties_changed_work; + int properties_changed_retries; + u8 link; + u8 depth; +}; + +int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, + u16 transmit_ring, u16 receive_path, + u16 receive_ring); +int tb_xdomain_disable_paths(struct tb_xdomain *xd); +struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid); + +static inline struct tb_xdomain * +tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid) +{ + struct tb_xdomain *xd; + + mutex_lock(&tb->lock); + xd = tb_xdomain_find_by_uuid(tb, uuid); + mutex_unlock(&tb->lock); + + return xd; +} + +static inline struct tb_xdomain *tb_xdomain_get(struct tb_xdomain *xd) +{ + if (xd) + get_device(&xd->dev); + return xd; +} + +static inline void tb_xdomain_put(struct tb_xdomain *xd) +{ + if (xd) + put_device(&xd->dev); +} + +static inline bool tb_is_xdomain(const struct device *dev) +{ + return dev->type == &tb_xdomain_type; +} + +static inline struct tb_xdomain *tb_to_xdomain(struct device *dev) +{ + if (tb_is_xdomain(dev)) + return container_of(dev, struct tb_xdomain, dev); + return NULL; +} + +int tb_xdomain_response(struct tb_xdomain *xd, const void *response, + size_t size, enum tb_cfg_pkg_type type); +int tb_xdomain_request(struct tb_xdomain *xd, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, + void *response, size_t response_size, + enum tb_cfg_pkg_type response_type, + unsigned int timeout_msec); + +/** + * tb_protocol_handler - Protocol specific handler + * @uuid: XDomain messages with this UUID are dispatched to this handler + * @callback: Callback called with the XDomain message. Returning %1 + * here tells the XDomain core that the message was handled + * by this handler and should not be forwared to other + * handlers. + * @data: Data passed with the callback + * @list: Handlers are linked using this + * + * Thunderbolt services can hook into incoming XDomain requests by + * registering protocol handler. Only limitation is that the XDomain + * discovery protocol UUID cannot be registered since it is handled by + * the core XDomain code. + * + * The @callback must check that the message is really directed to the + * service the driver implements. + */ +struct tb_protocol_handler { + const uuid_t *uuid; + int (*callback)(const void *buf, size_t size, void *data); + void *data; + struct list_head list; +}; + +int tb_register_protocol_handler(struct tb_protocol_handler *handler); +void tb_unregister_protocol_handler(struct tb_protocol_handler *handler); + +/** + * struct tb_service - Thunderbolt service + * @dev: XDomain device + * @id: ID of the service (shown in sysfs) + * @key: Protocol key from the properties directory + * @prtcid: Protocol ID from the properties directory + * @prtcvers: Protocol version from the properties directory + * @prtcrevs: Protocol software revision from the properties directory + * @prtcstns: Protocol settings mask from the properties directory + * + * Each domain exposes set of services it supports as collection of + * properties. For each service there will be one corresponding + * &struct tb_service. Service drivers are bound to these. + */ +struct tb_service { + struct device dev; + int id; + const char *key; + u32 prtcid; + u32 prtcvers; + u32 prtcrevs; + u32 prtcstns; +}; + +static inline struct tb_service *tb_service_get(struct tb_service *svc) +{ + if (svc) + get_device(&svc->dev); + return svc; +} + +static inline void tb_service_put(struct tb_service *svc) +{ + if (svc) + put_device(&svc->dev); +} + +static inline bool tb_is_service(const struct device *dev) +{ + return dev->type == &tb_service_type; +} + +static inline struct tb_service *tb_to_service(struct device *dev) +{ + if (tb_is_service(dev)) + return container_of(dev, struct tb_service, dev); + return NULL; +} + +/** + * tb_service_driver - Thunderbolt service driver + * @driver: Driver structure + * @probe: Called when the driver is probed + * @remove: Called when the driver is removed (optional) + * @shutdown: Called at shutdown time to stop the service (optional) + * @id_table: Table of service identifiers the driver supports + */ +struct tb_service_driver { + struct device_driver driver; + int (*probe)(struct tb_service *svc, const struct tb_service_id *id); + void (*remove)(struct tb_service *svc); + void (*shutdown)(struct tb_service *svc); + const struct tb_service_id *id_table; +}; + +#define TB_SERVICE(key, id) \ + .match_flags = TBSVC_MATCH_PROTOCOL_KEY | \ + TBSVC_MATCH_PROTOCOL_ID, \ + .protocol_key = (key), \ + .protocol_id = (id) + +int tb_register_service_driver(struct tb_service_driver *drv); +void tb_unregister_service_driver(struct tb_service_driver *drv); + +static inline void *tb_service_get_drvdata(const struct tb_service *svc) +{ + return dev_get_drvdata(&svc->dev); +} + +static inline void tb_service_set_drvdata(struct tb_service *svc, void *data) +{ + dev_set_drvdata(&svc->dev, data); +} + +static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) +{ + return tb_to_xdomain(svc->dev.parent); +} + #endif /* THUNDERBOLT_H_ */ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e4d90e50f6fe..57263f2f8f2f 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -206,5 +206,12 @@ int main(void) DEVID_FIELD(fsl_mc_device_id, vendor); DEVID_FIELD(fsl_mc_device_id, obj_type); + DEVID(tb_service_id); + DEVID_FIELD(tb_service_id, match_flags); + DEVID_FIELD(tb_service_id, protocol_key); + DEVID_FIELD(tb_service_id, protocol_id); + DEVID_FIELD(tb_service_id, protocol_version); + DEVID_FIELD(tb_service_id, protocol_revision); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 29d6699d5a06..6ef6e63f96fd 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1301,6 +1301,31 @@ static int do_fsl_mc_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("fslmc", fsl_mc_device_id, do_fsl_mc_entry); +/* Looks like: tbsvc:kSpNvNrN */ +static int do_tbsvc_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, tb_service_id, match_flags); + DEF_FIELD_ADDR(symval, tb_service_id, protocol_key); + DEF_FIELD(symval, tb_service_id, protocol_id); + DEF_FIELD(symval, tb_service_id, protocol_version); + DEF_FIELD(symval, tb_service_id, protocol_revision); + + strcpy(alias, "tbsvc:"); + if (match_flags & TBSVC_MATCH_PROTOCOL_KEY) + sprintf(alias + strlen(alias), "k%s", *protocol_key); + else + strcat(alias + strlen(alias), "k*"); + ADD(alias, "p", match_flags & TBSVC_MATCH_PROTOCOL_ID, protocol_id); + ADD(alias, "v", match_flags & TBSVC_MATCH_PROTOCOL_VERSION, + protocol_version); + ADD(alias, "r", match_flags & TBSVC_MATCH_PROTOCOL_REVISION, + protocol_revision); + + add_wildcard(alias); + return 1; +} +ADD_TO_DEVTABLE("tbsvc", tb_service_id, do_tbsvc_entry); + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { From 8c6bba10fb9262d7b3a11e86a40621d5b37810a6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:35 +0300 Subject: [PATCH 0462/2177] thunderbolt: Configure interrupt throttling for all interrupts This will keep the interrupt delivery rate reasonable. The value used here (128 us) is a recommendation from the hardware people. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 23 ++++++++++++++++++++--- drivers/thunderbolt/nhi_regs.h | 2 ++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 05af126a2435..8a7a3d0133f9 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -651,6 +651,22 @@ static int nhi_suspend_noirq(struct device *dev) return tb_domain_suspend_noirq(tb); } +static void nhi_enable_int_throttling(struct tb_nhi *nhi) +{ + /* Throttling is specified in 256ns increments */ + u32 throttle = DIV_ROUND_UP(128 * NSEC_PER_USEC, 256); + unsigned int i; + + /* + * Configure interrupt throttling for all vectors even if we + * only use few. + */ + for (i = 0; i < MSIX_MAX_VECS; i++) { + u32 reg = REG_INT_THROTTLING_RATE + i * 4; + iowrite32(throttle, nhi->iobase + reg); + } +} + static int nhi_resume_noirq(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -663,6 +679,8 @@ static int nhi_resume_noirq(struct device *dev) */ if (!pci_device_is_present(pdev)) tb->nhi->going_away = true; + else + nhi_enable_int_throttling(tb->nhi); return tb_domain_resume_noirq(tb); } @@ -717,6 +735,8 @@ static int nhi_init_msi(struct tb_nhi *nhi) /* In case someone left them on. */ nhi_disable_interrupts(nhi); + nhi_enable_int_throttling(nhi); + ida_init(&nhi->msix_ida); /* @@ -796,9 +816,6 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); - /* magic value - clock related? */ - iowrite32(3906250 / 10000, nhi->iobase + 0x38c00); - tb = icm_probe(nhi); if (!tb) tb = tb_probe(nhi); diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 09ed574e92ff..46eff69b19ad 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -95,6 +95,8 @@ struct ring_desc { #define REG_RING_INTERRUPT_BASE 0x38200 #define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32) +#define REG_INT_THROTTLING_RATE 0x38c00 + /* Interrupt Vector Allocation */ #define REG_INT_VEC_ALLOC_BASE 0x38c40 #define REG_INT_VEC_ALLOC_BITS 4 From 9fb1e654dcf781e71a0ea7c5bdfea3ba85d1d06d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:36 +0300 Subject: [PATCH 0463/2177] thunderbolt: Add support for frame mode When high-speed DMA paths are used to transfer arbitrary data over a Thunderbolt link, DMA rings should be in frame mode instead of raw mode. The latter is used by the control channel (ring 0). In frame mode each data frame can hold up to 4kB payload. This patch modifies the DMA ring code to allow configuring a ring to be in frame mode by passing a new flag (RING_FLAG_FRAME) to the ring when it is allocated. In addition there might be need to enable end-to-end (E2E) workaround for the ring to prevent losing Rx frames in certain situations. We add another flag (RING_FLAG_E2E) that can be used for this purpose. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 3 +- drivers/thunderbolt/nhi.c | 76 +++++++++++++++++++++------------- drivers/thunderbolt/nhi.h | 10 ++++- drivers/thunderbolt/nhi_regs.h | 2 + 4 files changed, 61 insertions(+), 30 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index 46e393c5fd1d..05400b77dcd7 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -618,7 +618,8 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) if (!ctl->tx) goto err; - ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); + ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, + 0xffff); if (!ctl->rx) goto err; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 8a7a3d0133f9..bebcad3d2c1f 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -21,6 +21,12 @@ #define RING_TYPE(ring) ((ring)->is_tx ? "TX ring" : "RX ring") +/* + * Used to enable end-to-end workaround for missing RX packets. Do not + * use this ring for anything else. + */ +#define RING_E2E_UNUSED_HOPID 2 + /* * Minimal number of vectors when we use MSI-X. Two for control channel * Rx/Tx and the rest four are for cross domain DMA paths. @@ -229,23 +235,6 @@ static void ring_work(struct work_struct *work) frame->eof = ring->descriptors[ring->tail].eof; frame->sof = ring->descriptors[ring->tail].sof; frame->flags = ring->descriptors[ring->tail].flags; - if (frame->sof != 0) - dev_WARN(&ring->nhi->pdev->dev, - "%s %d got unexpected SOF: %#x\n", - RING_TYPE(ring), ring->hop, - frame->sof); - /* - * known flags: - * raw not enabled, interupt not set: 0x2=0010 - * raw enabled: 0xa=1010 - * raw not enabled: 0xb=1011 - * partial frame (>MAX_FRAME_SIZE): 0xe=1110 - */ - if (frame->flags != 0xa) - dev_WARN(&ring->nhi->pdev->dev, - "%s %d got unexpected flags: %#x\n", - RING_TYPE(ring), ring->hop, - frame->flags); } ring->tail = (ring->tail + 1) % ring->size; } @@ -321,12 +310,17 @@ static void ring_release_msix(struct tb_ring *ring) } static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, - bool transmit, unsigned int flags) + bool transmit, unsigned int flags, + u16 sof_mask, u16 eof_mask) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", transmit ? "TX" : "RX", hop, size); + /* Tx Ring 2 is reserved for E2E workaround */ + if (transmit && hop == RING_E2E_UNUSED_HOPID) + return NULL; + mutex_lock(&nhi->lock); if (hop >= nhi->hop_count) { dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); @@ -353,6 +347,8 @@ static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->is_tx = transmit; ring->size = size; ring->flags = flags; + ring->sof_mask = sof_mask; + ring->eof_mask = eof_mask; ring->head = 0; ring->tail = 0; ring->running = false; @@ -384,13 +380,13 @@ err: struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags) { - return ring_alloc(nhi, hop, size, true, flags); + return ring_alloc(nhi, hop, size, true, flags, 0, 0); } struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags) + unsigned int flags, u16 sof_mask, u16 eof_mask) { - return ring_alloc(nhi, hop, size, false, flags); + return ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); } /** @@ -400,6 +396,9 @@ struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, */ void ring_start(struct tb_ring *ring) { + u16 frame_size; + u32 flags; + mutex_lock(&ring->nhi->lock); mutex_lock(&ring->lock); if (ring->nhi->going_away) @@ -411,18 +410,39 @@ void ring_start(struct tb_ring *ring) dev_info(&ring->nhi->pdev->dev, "starting %s %d\n", RING_TYPE(ring), ring->hop); + if (ring->flags & RING_FLAG_FRAME) { + /* Means 4096 */ + frame_size = 0; + flags = RING_FLAG_ENABLE; + } else { + frame_size = TB_FRAME_SIZE; + flags = RING_FLAG_ENABLE | RING_FLAG_RAW; + } + + if (ring->flags & RING_FLAG_E2E && !ring->is_tx) { + u32 hop; + + /* + * In order not to lose Rx packets we enable end-to-end + * workaround which transfers Rx credits to an unused Tx + * HopID. + */ + hop = RING_E2E_UNUSED_HOPID << REG_RX_OPTIONS_E2E_HOP_SHIFT; + hop &= REG_RX_OPTIONS_E2E_HOP_MASK; + flags |= hop | RING_FLAG_E2E_FLOW_CONTROL; + } + ring_iowrite64desc(ring, ring->descriptors_dma, 0); if (ring->is_tx) { ring_iowrite32desc(ring, ring->size, 12); ring_iowrite32options(ring, 0, 4); /* time releated ? */ - ring_iowrite32options(ring, - RING_FLAG_ENABLE | RING_FLAG_RAW, 0); + ring_iowrite32options(ring, flags, 0); } else { - ring_iowrite32desc(ring, - (TB_FRAME_SIZE << 16) | ring->size, 12); - ring_iowrite32options(ring, 0xffffffff, 4); /* SOF EOF mask */ - ring_iowrite32options(ring, - RING_FLAG_ENABLE | RING_FLAG_RAW, 0); + u32 sof_eof_mask = ring->sof_mask << 16 | ring->eof_mask; + + ring_iowrite32desc(ring, (frame_size << 16) | ring->size, 12); + ring_iowrite32options(ring, sof_eof_mask, 4); + ring_iowrite32options(ring, flags, 0); } ring_interrupt_active(ring, true); ring->running = true; diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 0e05828983db..4503ddbeccb3 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -56,6 +56,8 @@ struct tb_nhi { * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) * @flags: Ring specific flags + * @sof_mask: Bit mask used to detect start of frame PDF + * @eof_mask: Bit mask used to detect end of frame PDF */ struct tb_ring { struct mutex lock; @@ -74,10 +76,16 @@ struct tb_ring { int irq; u8 vector; unsigned int flags; + u16 sof_mask; + u16 eof_mask; }; /* Leave ring interrupt enabled on suspend */ #define RING_FLAG_NO_SUSPEND BIT(0) +/* Configure the ring to be in frame mode */ +#define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) struct ring_frame; typedef void (*ring_cb)(struct tb_ring*, struct ring_frame*, bool canceled); @@ -100,7 +108,7 @@ struct ring_frame { struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags); + unsigned int flags, u16 sof_mask, u16 eof_mask); void ring_start(struct tb_ring *ring); void ring_stop(struct tb_ring *ring); void ring_free(struct tb_ring *ring); diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 46eff69b19ad..491a4c0c18fc 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -77,6 +77,8 @@ struct ring_desc { * ..: unknown */ #define REG_RX_OPTIONS_BASE 0x29800 +#define REG_RX_OPTIONS_E2E_HOP_MASK GENMASK(22, 12) +#define REG_RX_OPTIONS_E2E_HOP_SHIFT 12 /* * three bitfields: tx, rx, rx overflow From 3b3d9f4da96493e4f68d0a80ab210763a24f8b33 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:37 +0300 Subject: [PATCH 0464/2177] thunderbolt: Export ring handling functions to modules These are used by Thunderbolt services to send and receive frames over the high-speed DMA rings. We also put the functions to tb_ namespace to make sure we do not collide with others and add missing kernel-doc comments for the exported functions. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 20 ++--- drivers/thunderbolt/nhi.c | 70 ++++++++++------ drivers/thunderbolt/nhi.h | 147 +-------------------------------- include/linux/thunderbolt.h | 158 ++++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 180 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index 05400b77dcd7..dd10789e1dbb 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -359,7 +359,7 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len, cpu_to_be32_array(pkg->buffer, data, len / 4); *(__be32 *) (pkg->buffer + len) = tb_crc(pkg->buffer, len); - res = ring_tx(ctl->tx, &pkg->frame); + res = tb_ring_tx(ctl->tx, &pkg->frame); if (res) /* ring is stopped */ tb_ctl_pkg_free(pkg); return res; @@ -376,7 +376,7 @@ static bool tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, static void tb_ctl_rx_submit(struct ctl_pkg *pkg) { - ring_rx(pkg->ctl->rx, &pkg->frame); /* + tb_ring_rx(pkg->ctl->rx, &pkg->frame); /* * We ignore failures during stop. * All rx packets are referenced * from ctl->rx_packets, so we do @@ -614,11 +614,11 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) if (!ctl->frame_pool) goto err; - ctl->tx = ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); + ctl->tx = tb_ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); if (!ctl->tx) goto err; - ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, + ctl->rx = tb_ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, 0xffff); if (!ctl->rx) goto err; @@ -652,9 +652,9 @@ void tb_ctl_free(struct tb_ctl *ctl) return; if (ctl->rx) - ring_free(ctl->rx); + tb_ring_free(ctl->rx); if (ctl->tx) - ring_free(ctl->tx); + tb_ring_free(ctl->tx); /* free RX packets */ for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++) @@ -673,8 +673,8 @@ void tb_ctl_start(struct tb_ctl *ctl) { int i; tb_ctl_info(ctl, "control channel starting...\n"); - ring_start(ctl->tx); /* is used to ack hotplug packets, start first */ - ring_start(ctl->rx); + tb_ring_start(ctl->tx); /* is used to ack hotplug packets, start first */ + tb_ring_start(ctl->rx); for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++) tb_ctl_rx_submit(ctl->rx_packets[i]); @@ -695,8 +695,8 @@ void tb_ctl_stop(struct tb_ctl *ctl) ctl->running = false; mutex_unlock(&ctl->request_queue_lock); - ring_stop(ctl->rx); - ring_stop(ctl->tx); + tb_ring_stop(ctl->rx); + tb_ring_stop(ctl->tx); if (!list_empty(&ctl->request_queue)) tb_ctl_WARN(ctl, "dangling request in request_queue\n"); diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index bebcad3d2c1f..e0a47f7581cb 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -253,7 +253,7 @@ invoke_callback: } } -int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) +int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) { int ret = 0; mutex_lock(&ring->lock); @@ -266,6 +266,7 @@ int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) mutex_unlock(&ring->lock); return ret; } +EXPORT_SYMBOL_GPL(__tb_ring_enqueue); static irqreturn_t ring_msix(int irq, void *data) { @@ -309,9 +310,9 @@ static void ring_release_msix(struct tb_ring *ring) ring->irq = 0; } -static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, - bool transmit, unsigned int flags, - u16 sof_mask, u16 eof_mask) +static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, + bool transmit, unsigned int flags, + u16 sof_mask, u16 eof_mask) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", @@ -377,24 +378,42 @@ err: return NULL; } -struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags) +/** + * tb_ring_alloc_tx() - Allocate DMA ring for transmit + * @nhi: Pointer to the NHI the ring is to be allocated + * @hop: HopID (ring) to allocate + * @size: Number of entries in the ring + * @flags: Flags for the ring + */ +struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags) { - return ring_alloc(nhi, hop, size, true, flags, 0, 0); -} - -struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask) -{ - return ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); + return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0); } +EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); /** - * ring_start() - enable a ring - * - * Must not be invoked in parallel with ring_stop(). + * tb_ring_alloc_rx() - Allocate DMA ring for receive + * @nhi: Pointer to the NHI the ring is to be allocated + * @hop: HopID (ring) to allocate + * @size: Number of entries in the ring + * @flags: Flags for the ring + * @sof_mask: Mask of PDF values that start a frame + * @eof_mask: Mask of PDF values that end a frame */ -void ring_start(struct tb_ring *ring) +struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags, u16 sof_mask, u16 eof_mask) +{ + return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); +} +EXPORT_SYMBOL_GPL(tb_ring_alloc_rx); + +/** + * tb_ring_start() - enable a ring + * + * Must not be invoked in parallel with tb_ring_stop(). + */ +void tb_ring_start(struct tb_ring *ring) { u16 frame_size; u32 flags; @@ -450,21 +469,22 @@ err: mutex_unlock(&ring->lock); mutex_unlock(&ring->nhi->lock); } - +EXPORT_SYMBOL_GPL(tb_ring_start); /** - * ring_stop() - shutdown a ring + * tb_ring_stop() - shutdown a ring * * Must not be invoked from a callback. * - * This method will disable the ring. Further calls to ring_tx/ring_rx will - * return -ESHUTDOWN until ring_stop has been called. + * This method will disable the ring. Further calls to + * tb_ring_tx/tb_ring_rx will return -ESHUTDOWN until ring_stop has been + * called. * * All enqueued frames will be canceled and their callbacks will be executed * with frame->canceled set to true (on the callback thread). This method * returns only after all callback invocations have finished. */ -void ring_stop(struct tb_ring *ring) +void tb_ring_stop(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); mutex_lock(&ring->lock); @@ -497,9 +517,10 @@ err: schedule_work(&ring->work); flush_work(&ring->work); } +EXPORT_SYMBOL_GPL(tb_ring_stop); /* - * ring_free() - free ring + * tb_ring_free() - free ring * * When this method returns all invocations of ring->callback will have * finished. @@ -508,7 +529,7 @@ err: * * Must NOT be called from ring_frame->callback! */ -void ring_free(struct tb_ring *ring) +void tb_ring_free(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); /* @@ -550,6 +571,7 @@ void ring_free(struct tb_ring *ring) mutex_destroy(&ring->lock); kfree(ring); } +EXPORT_SYMBOL_GPL(tb_ring_free); /** * nhi_mailbox_cmd() - Send a command through NHI mailbox diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 4503ddbeccb3..771d09ca5dc5 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -7,152 +7,7 @@ #ifndef DSL3510_H_ #define DSL3510_H_ -#include -#include -#include - -/** - * struct tb_nhi - thunderbolt native host interface - * @lock: Must be held during ring creation/destruction. Is acquired by - * interrupt_work when dispatching interrupts to individual rings. - * @pdev: Pointer to the PCI device - * @iobase: MMIO space of the NHI - * @tx_rings: All Tx rings available on this host controller - * @rx_rings: All Rx rings available on this host controller - * @msix_ida: Used to allocate MSI-X vectors for rings - * @going_away: The host controller device is about to disappear so when - * this flag is set, avoid touching the hardware anymore. - * @interrupt_work: Work scheduled to handle ring interrupt when no - * MSI-X is used. - * @hop_count: Number of rings (end point hops) supported by NHI. - */ -struct tb_nhi { - struct mutex lock; - struct pci_dev *pdev; - void __iomem *iobase; - struct tb_ring **tx_rings; - struct tb_ring **rx_rings; - struct ida msix_ida; - bool going_away; - struct work_struct interrupt_work; - u32 hop_count; -}; - -/** - * struct tb_ring - thunderbolt TX or RX ring associated with a NHI - * @lock: Lock serializing actions to this ring. Must be acquired after - * nhi->lock. - * @nhi: Pointer to the native host controller interface - * @size: Size of the ring - * @hop: Hop (DMA channel) associated with this ring - * @head: Head of the ring (write next descriptor here) - * @tail: Tail of the ring (complete next descriptor here) - * @descriptors: Allocated descriptors for this ring - * @queue: Queue holding frames to be transferred over this ring - * @in_flight: Queue holding frames that are currently in flight - * @work: Interrupt work structure - * @is_tx: Is the ring Tx or Rx - * @running: Is the ring running - * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. - * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) - * @flags: Ring specific flags - * @sof_mask: Bit mask used to detect start of frame PDF - * @eof_mask: Bit mask used to detect end of frame PDF - */ -struct tb_ring { - struct mutex lock; - struct tb_nhi *nhi; - int size; - int hop; - int head; - int tail; - struct ring_desc *descriptors; - dma_addr_t descriptors_dma; - struct list_head queue; - struct list_head in_flight; - struct work_struct work; - bool is_tx:1; - bool running:1; - int irq; - u8 vector; - unsigned int flags; - u16 sof_mask; - u16 eof_mask; -}; - -/* Leave ring interrupt enabled on suspend */ -#define RING_FLAG_NO_SUSPEND BIT(0) -/* Configure the ring to be in frame mode */ -#define RING_FLAG_FRAME BIT(1) -/* Enable end-to-end flow control */ -#define RING_FLAG_E2E BIT(2) - -struct ring_frame; -typedef void (*ring_cb)(struct tb_ring*, struct ring_frame*, bool canceled); - -/** - * struct ring_frame - for use with ring_rx/ring_tx - */ -struct ring_frame { - dma_addr_t buffer_phy; - ring_cb callback; - struct list_head list; - u32 size:12; /* TX: in, RX: out*/ - u32 flags:12; /* RX: out */ - u32 eof:4; /* TX:in, RX: out */ - u32 sof:4; /* TX:in, RX: out */ -}; - -#define TB_FRAME_SIZE 0x100 /* minimum size for ring_rx */ - -struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags); -struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask); -void ring_start(struct tb_ring *ring); -void ring_stop(struct tb_ring *ring); -void ring_free(struct tb_ring *ring); - -int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); - -/** - * ring_rx() - enqueue a frame on an RX ring - * - * frame->buffer, frame->buffer_phy and frame->callback have to be set. The - * buffer must contain at least TB_FRAME_SIZE bytes. - * - * frame->callback will be invoked with frame->size, frame->flags, frame->eof, - * frame->sof set once the frame has been received. - * - * If ring_stop is called after the packet has been enqueued frame->callback - * will be called with canceled set to true. - * - * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise. - */ -static inline int ring_rx(struct tb_ring *ring, struct ring_frame *frame) -{ - WARN_ON(ring->is_tx); - return __ring_enqueue(ring, frame); -} - -/** - * ring_tx() - enqueue a frame on an TX ring - * - * frame->buffer, frame->buffer_phy, frame->callback, frame->size, frame->eof - * and frame->sof have to be set. - * - * frame->callback will be invoked with once the frame has been transmitted. - * - * If ring_stop is called after the packet has been enqueued frame->callback - * will be called with canceled set to true. - * - * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise. - */ -static inline int ring_tx(struct tb_ring *ring, struct ring_frame *frame) -{ - WARN_ON(!ring->is_tx); - return __ring_enqueue(ring, frame); -} +#include enum nhi_fw_mode { NHI_FW_SAFE_MODE, diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 18c0e3d5e85c..9ddb83ad890f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -15,10 +15,12 @@ #define THUNDERBOLT_H_ #include +#include #include #include #include #include +#include enum tb_cfg_pkg_type { TB_CFG_PKG_READ = 1, @@ -397,4 +399,160 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) return tb_to_xdomain(svc->dev.parent); } +/** + * struct tb_nhi - thunderbolt native host interface + * @lock: Must be held during ring creation/destruction. Is acquired by + * interrupt_work when dispatching interrupts to individual rings. + * @pdev: Pointer to the PCI device + * @iobase: MMIO space of the NHI + * @tx_rings: All Tx rings available on this host controller + * @rx_rings: All Rx rings available on this host controller + * @msix_ida: Used to allocate MSI-X vectors for rings + * @going_away: The host controller device is about to disappear so when + * this flag is set, avoid touching the hardware anymore. + * @interrupt_work: Work scheduled to handle ring interrupt when no + * MSI-X is used. + * @hop_count: Number of rings (end point hops) supported by NHI. + */ +struct tb_nhi { + struct mutex lock; + struct pci_dev *pdev; + void __iomem *iobase; + struct tb_ring **tx_rings; + struct tb_ring **rx_rings; + struct ida msix_ida; + bool going_away; + struct work_struct interrupt_work; + u32 hop_count; +}; + +/** + * struct tb_ring - thunderbolt TX or RX ring associated with a NHI + * @lock: Lock serializing actions to this ring. Must be acquired after + * nhi->lock. + * @nhi: Pointer to the native host controller interface + * @size: Size of the ring + * @hop: Hop (DMA channel) associated with this ring + * @head: Head of the ring (write next descriptor here) + * @tail: Tail of the ring (complete next descriptor here) + * @descriptors: Allocated descriptors for this ring + * @queue: Queue holding frames to be transferred over this ring + * @in_flight: Queue holding frames that are currently in flight + * @work: Interrupt work structure + * @is_tx: Is the ring Tx or Rx + * @running: Is the ring running + * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. + * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) + * @flags: Ring specific flags + * @sof_mask: Bit mask used to detect start of frame PDF + * @eof_mask: Bit mask used to detect end of frame PDF + */ +struct tb_ring { + struct mutex lock; + struct tb_nhi *nhi; + int size; + int hop; + int head; + int tail; + struct ring_desc *descriptors; + dma_addr_t descriptors_dma; + struct list_head queue; + struct list_head in_flight; + struct work_struct work; + bool is_tx:1; + bool running:1; + int irq; + u8 vector; + unsigned int flags; + u16 sof_mask; + u16 eof_mask; +}; + +/* Leave ring interrupt enabled on suspend */ +#define RING_FLAG_NO_SUSPEND BIT(0) +/* Configure the ring to be in frame mode */ +#define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) + +struct ring_frame; +typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); + +/** + * struct ring_frame - For use with ring_rx/ring_tx + * @buffer_phy: DMA mapped address of the frame + * @callback: Callback called when the frame is finished + * @list: Frame is linked to a queue using this + * @size: Size of the frame in bytes (%0 means %4096) + * @flags: Flags for the frame (see &enum ring_desc_flags) + * @eof: End of frame protocol defined field + * @sof: Start of frame protocol defined field + */ +struct ring_frame { + dma_addr_t buffer_phy; + ring_cb callback; + struct list_head list; + u32 size:12; + u32 flags:12; + u32 eof:4; + u32 sof:4; +}; + +/* Minimum size for ring_rx */ +#define TB_FRAME_SIZE 0x100 + +struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags); +struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags, u16 sof_mask, + u16 eof_mask); +void tb_ring_start(struct tb_ring *ring); +void tb_ring_stop(struct tb_ring *ring); +void tb_ring_free(struct tb_ring *ring); + +int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); + +/** + * tb_ring_rx() - enqueue a frame on an RX ring + * @ring: Ring to enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The + * buffer must contain at least %TB_FRAME_SIZE bytes. + * + * @frame->callback will be invoked with @frame->size, @frame->flags, + * @frame->eof, @frame->sof set once the frame has been received. + * + * If ring_stop() is called after the packet has been enqueued + * @frame->callback will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + +/** + * tb_ring_tx() - enqueue a frame on an TX ring + * @ring: Ring the enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, + * @frame->eof and @frame->sof have to be set. + * + * @frame->callback will be invoked with once the frame has been transmitted. + * + * If ring_stop() is called after the packet has been enqueued @frame->callback + * will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(!ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + #endif /* THUNDERBOLT_H_ */ From 2a91ec63f8a11e70d4b958dd4df867fec0247179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:38 +0300 Subject: [PATCH 0465/2177] thunderbolt: Move ring descriptor flags to thunderbolt.h A Thunderbolt service driver might need to check if there was an error with the descriptor when in frame mode. We also add two Rx specific error flags RING_DESC_CRC_ERROR and RING_DESC_BUFFER_OVERRUN. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi_regs.h | 7 ------- include/linux/thunderbolt.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 491a4c0c18fc..5ed6934e31e7 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -17,13 +17,6 @@ enum ring_flags { RING_FLAG_ENABLE = 1 << 31, }; -enum ring_desc_flags { - RING_DESC_ISOCH = 0x1, /* TX only? */ - RING_DESC_COMPLETED = 0x2, /* set by NHI */ - RING_DESC_POSTED = 0x4, /* always set this */ - RING_DESC_INTERRUPT = 0x8, /* request an interrupt on completion */ -}; - /** * struct ring_desc - TX/RX ring entry * diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 9ddb83ad890f..e3b9af7be0ad 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -478,6 +478,24 @@ struct tb_ring { struct ring_frame; typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); +/** + * enum ring_desc_flags - Flags for DMA ring descriptor + * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * %RING_DESC_POSTED: Always set this + * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * %RING_DESC_INTERRUPT: Request an interrupt on completion + */ +enum ring_desc_flags { + RING_DESC_ISOCH = 0x1, + RING_DESC_CRC_ERROR = 0x1, + RING_DESC_COMPLETED = 0x2, + RING_DESC_POSTED = 0x4, + RING_DESC_BUFFER_OVERRUN = 0x04, + RING_DESC_INTERRUPT = 0x8, +}; + /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame From 22b7de1000e66d739c431d6be4e7e97c69fa7c98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:39 +0300 Subject: [PATCH 0466/2177] thunderbolt: Use spinlock in ring serialization This makes it possible to enqueue frames also from atomic context which is needed for example, when networking packets are sent over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 26 ++++++++++++++------------ include/linux/thunderbolt.h | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index e0a47f7581cb..7d1891ec3c47 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -212,8 +212,10 @@ static void ring_work(struct work_struct *work) struct tb_ring *ring = container_of(work, typeof(*ring), work); struct ring_frame *frame; bool canceled = false; + unsigned long flags; LIST_HEAD(done); - mutex_lock(&ring->lock); + + spin_lock_irqsave(&ring->lock, flags); if (!ring->running) { /* Move all frames to done and mark them as canceled. */ @@ -241,7 +243,8 @@ static void ring_work(struct work_struct *work) ring_write_descriptors(ring); invoke_callback: - mutex_unlock(&ring->lock); /* allow callbacks to schedule new work */ + /* allow callbacks to schedule new work */ + spin_unlock_irqrestore(&ring->lock, flags); while (!list_empty(&done)) { frame = list_first_entry(&done, typeof(*frame), list); /* @@ -255,15 +258,17 @@ invoke_callback: int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) { + unsigned long flags; int ret = 0; - mutex_lock(&ring->lock); + + spin_lock_irqsave(&ring->lock, flags); if (ring->running) { list_add_tail(&frame->list, &ring->queue); ring_write_descriptors(ring); } else { ret = -ESHUTDOWN; } - mutex_unlock(&ring->lock); + spin_unlock_irqrestore(&ring->lock, flags); return ret; } EXPORT_SYMBOL_GPL(__tb_ring_enqueue); @@ -338,7 +343,7 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (!ring) goto err; - mutex_init(&ring->lock); + spin_lock_init(&ring->lock); INIT_LIST_HEAD(&ring->queue); INIT_LIST_HEAD(&ring->in_flight); INIT_WORK(&ring->work, ring_work); @@ -371,8 +376,6 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, return ring; err: - if (ring) - mutex_destroy(&ring->lock); kfree(ring); mutex_unlock(&nhi->lock); return NULL; @@ -419,7 +422,7 @@ void tb_ring_start(struct tb_ring *ring) u32 flags; mutex_lock(&ring->nhi->lock); - mutex_lock(&ring->lock); + spin_lock_irq(&ring->lock); if (ring->nhi->going_away) goto err; if (ring->running) { @@ -466,7 +469,7 @@ void tb_ring_start(struct tb_ring *ring) ring_interrupt_active(ring, true); ring->running = true; err: - mutex_unlock(&ring->lock); + spin_unlock_irq(&ring->lock); mutex_unlock(&ring->nhi->lock); } EXPORT_SYMBOL_GPL(tb_ring_start); @@ -487,7 +490,7 @@ EXPORT_SYMBOL_GPL(tb_ring_start); void tb_ring_stop(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); - mutex_lock(&ring->lock); + spin_lock_irq(&ring->lock); dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n", RING_TYPE(ring), ring->hop); if (ring->nhi->going_away) @@ -508,7 +511,7 @@ void tb_ring_stop(struct tb_ring *ring) ring->running = false; err: - mutex_unlock(&ring->lock); + spin_unlock_irq(&ring->lock); mutex_unlock(&ring->nhi->lock); /* @@ -568,7 +571,6 @@ void tb_ring_free(struct tb_ring *ring) * to finish before freeing the ring. */ flush_work(&ring->work); - mutex_destroy(&ring->lock); kfree(ring); } EXPORT_SYMBOL_GPL(tb_ring_free); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index e3b9af7be0ad..cf9e42db780f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -448,7 +448,7 @@ struct tb_nhi { * @eof_mask: Bit mask used to detect end of frame PDF */ struct tb_ring { - struct mutex lock; + spinlock_t lock; struct tb_nhi *nhi; int size; int hop; From 59120e06101db72442acf4c8b364a0c76d8faa68 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:40 +0300 Subject: [PATCH 0467/2177] thunderbolt: Use spinlock in NHI serialization This is needed because ring polling functionality can be called from atomic contexts when networking and other high-speed traffic is transferred over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 75 ++++++++++++++++++++----------------- include/linux/thunderbolt.h | 2 +- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 7d1891ec3c47..0b3c0640048b 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -327,21 +327,9 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (transmit && hop == RING_E2E_UNUSED_HOPID) return NULL; - mutex_lock(&nhi->lock); - if (hop >= nhi->hop_count) { - dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); - goto err; - } - if (transmit && nhi->tx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); - goto err; - } else if (!transmit && nhi->rx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); - goto err; - } ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) - goto err; + return NULL; spin_lock_init(&ring->lock); INIT_LIST_HEAD(&ring->queue); @@ -359,25 +347,45 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->tail = 0; ring->running = false; - if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) - goto err; - ring->descriptors = dma_alloc_coherent(&ring->nhi->pdev->dev, size * sizeof(*ring->descriptors), &ring->descriptors_dma, GFP_KERNEL | __GFP_ZERO); if (!ring->descriptors) - goto err; + goto err_free_ring; + if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) + goto err_free_descs; + + spin_lock_irq(&nhi->lock); + if (hop >= nhi->hop_count) { + dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); + goto err_release_msix; + } + if (transmit && nhi->tx_rings[hop]) { + dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); + goto err_release_msix; + } else if (!transmit && nhi->rx_rings[hop]) { + dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); + goto err_release_msix; + } if (transmit) nhi->tx_rings[hop] = ring; else nhi->rx_rings[hop] = ring; - mutex_unlock(&nhi->lock); + spin_unlock_irq(&nhi->lock); + return ring; -err: +err_release_msix: + spin_unlock_irq(&nhi->lock); + ring_release_msix(ring); +err_free_descs: + dma_free_coherent(&ring->nhi->pdev->dev, + ring->size * sizeof(*ring->descriptors), + ring->descriptors, ring->descriptors_dma); +err_free_ring: kfree(ring); - mutex_unlock(&nhi->lock); + return NULL; } @@ -421,8 +429,8 @@ void tb_ring_start(struct tb_ring *ring) u16 frame_size; u32 flags; - mutex_lock(&ring->nhi->lock); - spin_lock_irq(&ring->lock); + spin_lock_irq(&ring->nhi->lock); + spin_lock(&ring->lock); if (ring->nhi->going_away) goto err; if (ring->running) { @@ -469,8 +477,8 @@ void tb_ring_start(struct tb_ring *ring) ring_interrupt_active(ring, true); ring->running = true; err: - spin_unlock_irq(&ring->lock); - mutex_unlock(&ring->nhi->lock); + spin_unlock(&ring->lock); + spin_unlock_irq(&ring->nhi->lock); } EXPORT_SYMBOL_GPL(tb_ring_start); @@ -489,8 +497,8 @@ EXPORT_SYMBOL_GPL(tb_ring_start); */ void tb_ring_stop(struct tb_ring *ring) { - mutex_lock(&ring->nhi->lock); - spin_lock_irq(&ring->lock); + spin_lock_irq(&ring->nhi->lock); + spin_lock(&ring->lock); dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n", RING_TYPE(ring), ring->hop); if (ring->nhi->going_away) @@ -511,8 +519,8 @@ void tb_ring_stop(struct tb_ring *ring) ring->running = false; err: - spin_unlock_irq(&ring->lock); - mutex_unlock(&ring->nhi->lock); + spin_unlock(&ring->lock); + spin_unlock_irq(&ring->nhi->lock); /* * schedule ring->work to invoke callbacks on all remaining frames. @@ -534,7 +542,7 @@ EXPORT_SYMBOL_GPL(tb_ring_stop); */ void tb_ring_free(struct tb_ring *ring) { - mutex_lock(&ring->nhi->lock); + spin_lock_irq(&ring->nhi->lock); /* * Dissociate the ring from the NHI. This also ensures that * nhi_interrupt_work cannot reschedule ring->work. @@ -564,7 +572,7 @@ void tb_ring_free(struct tb_ring *ring) RING_TYPE(ring), ring->hop); - mutex_unlock(&ring->nhi->lock); + spin_unlock_irq(&ring->nhi->lock); /** * ring->work can no longer be scheduled (it is scheduled only * by nhi_interrupt_work, ring_stop and ring_msix). Wait for it @@ -639,7 +647,7 @@ static void nhi_interrupt_work(struct work_struct *work) int type = 0; /* current interrupt type 0: TX, 1: RX, 2: RX overflow */ struct tb_ring *ring; - mutex_lock(&nhi->lock); + spin_lock_irq(&nhi->lock); /* * Starting at REG_RING_NOTIFY_BASE there are three status bitfields @@ -677,7 +685,7 @@ static void nhi_interrupt_work(struct work_struct *work) /* we do not check ring->running, this is done in ring->work */ schedule_work(&ring->work); } - mutex_unlock(&nhi->lock); + spin_unlock_irq(&nhi->lock); } static irqreturn_t nhi_msi(int irq, void *data) @@ -767,7 +775,6 @@ static void nhi_shutdown(struct tb_nhi *nhi) devm_free_irq(&nhi->pdev->dev, nhi->pdev->irq, nhi); flush_work(&nhi->interrupt_work); } - mutex_destroy(&nhi->lock); ida_destroy(&nhi->msix_ida); } @@ -856,7 +863,7 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id) return res; } - mutex_init(&nhi->lock); + spin_lock_init(&nhi->lock); pci_set_master(pdev); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index cf9e42db780f..d59e3f9a35c4 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -415,7 +415,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) * @hop_count: Number of rings (end point hops) supported by NHI. */ struct tb_nhi { - struct mutex lock; + spinlock_t lock; struct pci_dev *pdev; void __iomem *iobase; struct tb_ring **tx_rings; From 4ffe722eefcb07c76701f03e0d759fbaecedf79f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:41 +0300 Subject: [PATCH 0468/2177] thunderbolt: Add polling mode for rings In order to support things like networking over Thunderbolt cable, there needs to be a way to switch the ring to a mode where it can be polled with the interrupt masked. We implement such mode so that the caller can allocate a ring by passing pointer to a function that is then called when an interrupt is triggered. Completed frames can be fetched using tb_ring_poll() and the interrupt can be re-enabled when the caller is finished with polling by using tb_ring_poll_complete(). Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 2 +- drivers/thunderbolt/nhi.c | 126 +++++++++++++++++++++++++++++++++--- include/linux/thunderbolt.h | 23 +++++-- 3 files changed, 134 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index dd10789e1dbb..d079dbba2c03 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -619,7 +619,7 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) goto err; ctl->rx = tb_ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, - 0xffff); + 0xffff, NULL, NULL); if (!ctl->rx) goto err; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 0b3c0640048b..af0a80ddf594 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -252,7 +252,8 @@ invoke_callback: * Do not hold on to it. */ list_del_init(&frame->list); - frame->callback(ring, frame, canceled); + if (frame->callback) + frame->callback(ring, frame, canceled); } } @@ -273,11 +274,106 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) } EXPORT_SYMBOL_GPL(__tb_ring_enqueue); +/** + * tb_ring_poll() - Poll one completed frame from the ring + * @ring: Ring to poll + * + * This function can be called when @start_poll callback of the @ring + * has been called. It will read one completed frame from the ring and + * return it to the caller. Returns %NULL if there is no more completed + * frames. + */ +struct ring_frame *tb_ring_poll(struct tb_ring *ring) +{ + struct ring_frame *frame = NULL; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + if (!ring->running) + goto unlock; + if (ring_empty(ring)) + goto unlock; + + if (ring->descriptors[ring->tail].flags & RING_DESC_COMPLETED) { + frame = list_first_entry(&ring->in_flight, typeof(*frame), + list); + list_del_init(&frame->list); + + if (!ring->is_tx) { + frame->size = ring->descriptors[ring->tail].length; + frame->eof = ring->descriptors[ring->tail].eof; + frame->sof = ring->descriptors[ring->tail].sof; + frame->flags = ring->descriptors[ring->tail].flags; + } + + ring->tail = (ring->tail + 1) % ring->size; + } + +unlock: + spin_unlock_irqrestore(&ring->lock, flags); + return frame; +} +EXPORT_SYMBOL_GPL(tb_ring_poll); + +static void __ring_interrupt_mask(struct tb_ring *ring, bool mask) +{ + int idx = ring_interrupt_index(ring); + int reg = REG_RING_INTERRUPT_BASE + idx / 32 * 4; + int bit = idx % 32; + u32 val; + + val = ioread32(ring->nhi->iobase + reg); + if (mask) + val &= ~BIT(bit); + else + val |= BIT(bit); + iowrite32(val, ring->nhi->iobase + reg); +} + +/* Both @nhi->lock and @ring->lock should be held */ +static void __ring_interrupt(struct tb_ring *ring) +{ + if (!ring->running) + return; + + if (ring->start_poll) { + __ring_interrupt_mask(ring, false); + ring->start_poll(ring->poll_data); + } else { + schedule_work(&ring->work); + } +} + +/** + * tb_ring_poll_complete() - Re-start interrupt for the ring + * @ring: Ring to re-start the interrupt + * + * This will re-start (unmask) the ring interrupt once the user is done + * with polling. + */ +void tb_ring_poll_complete(struct tb_ring *ring) +{ + unsigned long flags; + + spin_lock_irqsave(&ring->nhi->lock, flags); + spin_lock(&ring->lock); + if (ring->start_poll) + __ring_interrupt_mask(ring, false); + spin_unlock(&ring->lock); + spin_unlock_irqrestore(&ring->nhi->lock, flags); +} +EXPORT_SYMBOL_GPL(tb_ring_poll_complete); + static irqreturn_t ring_msix(int irq, void *data) { struct tb_ring *ring = data; - schedule_work(&ring->work); + spin_lock(&ring->nhi->lock); + spin_lock(&ring->lock); + __ring_interrupt(ring); + spin_unlock(&ring->lock); + spin_unlock(&ring->nhi->lock); + return IRQ_HANDLED; } @@ -317,7 +413,9 @@ static void ring_release_msix(struct tb_ring *ring) static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, bool transmit, unsigned int flags, - u16 sof_mask, u16 eof_mask) + u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), + void *poll_data) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", @@ -346,6 +444,8 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->head = 0; ring->tail = 0; ring->running = false; + ring->start_poll = start_poll; + ring->poll_data = poll_data; ring->descriptors = dma_alloc_coherent(&ring->nhi->pdev->dev, size * sizeof(*ring->descriptors), @@ -399,7 +499,7 @@ err_free_ring: struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags) { - return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0); + return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0, NULL, NULL); } EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); @@ -411,11 +511,17 @@ EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); * @flags: Flags for the ring * @sof_mask: Mask of PDF values that start a frame * @eof_mask: Mask of PDF values that end a frame + * @start_poll: If not %NULL the ring will call this function when an + * interrupt is triggered and masked, instead of callback + * in each Rx frame. + * @poll_data: Optional data passed to @start_poll */ struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask) + unsigned int flags, u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), void *poll_data) { - return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); + return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask, + start_poll, poll_data); } EXPORT_SYMBOL_GPL(tb_ring_alloc_rx); @@ -556,6 +662,7 @@ void tb_ring_free(struct tb_ring *ring) dev_WARN(&ring->nhi->pdev->dev, "%s %d still running\n", RING_TYPE(ring), ring->hop); } + spin_unlock_irq(&ring->nhi->lock); ring_release_msix(ring); @@ -572,7 +679,6 @@ void tb_ring_free(struct tb_ring *ring) RING_TYPE(ring), ring->hop); - spin_unlock_irq(&ring->nhi->lock); /** * ring->work can no longer be scheduled (it is scheduled only * by nhi_interrupt_work, ring_stop and ring_msix). Wait for it @@ -682,8 +788,10 @@ static void nhi_interrupt_work(struct work_struct *work) hop); continue; } - /* we do not check ring->running, this is done in ring->work */ - schedule_work(&ring->work); + + spin_lock(&ring->lock); + __ring_interrupt(ring); + spin_unlock(&ring->lock); } spin_unlock_irq(&nhi->lock); } diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index d59e3f9a35c4..36925e3aec7c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -446,6 +446,9 @@ struct tb_nhi { * @flags: Ring specific flags * @sof_mask: Bit mask used to detect start of frame PDF * @eof_mask: Bit mask used to detect end of frame PDF + * @start_poll: Called when ring interrupt is triggered to start + * polling. Passing %NULL keeps the ring in interrupt mode. + * @poll_data: Data passed to @start_poll */ struct tb_ring { spinlock_t lock; @@ -466,6 +469,8 @@ struct tb_ring { unsigned int flags; u16 sof_mask; u16 eof_mask; + void (*start_poll)(void *data); + void *poll_data; }; /* Leave ring interrupt enabled on suspend */ @@ -499,7 +504,7 @@ enum ring_desc_flags { /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame - * @callback: Callback called when the frame is finished + * @callback: Callback called when the frame is finished (optional) * @list: Frame is linked to a queue using this * @size: Size of the frame in bytes (%0 means %4096) * @flags: Flags for the frame (see &enum ring_desc_flags) @@ -522,8 +527,8 @@ struct ring_frame { struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, - u16 eof_mask); + unsigned int flags, u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), void *poll_data); void tb_ring_start(struct tb_ring *ring); void tb_ring_stop(struct tb_ring *ring); void tb_ring_free(struct tb_ring *ring); @@ -535,8 +540,8 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * @ring: Ring to enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The - * buffer must contain at least %TB_FRAME_SIZE bytes. + * @frame->buffer, @frame->buffer_phy have to be set. The buffer must + * contain at least %TB_FRAME_SIZE bytes. * * @frame->callback will be invoked with @frame->size, @frame->flags, * @frame->eof, @frame->sof set once the frame has been received. @@ -557,8 +562,8 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * @ring: Ring the enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, - * @frame->eof and @frame->sof have to be set. + * @frame->buffer, @frame->buffer_phy, @frame->size, @frame->eof and + * @frame->sof have to be set. * * @frame->callback will be invoked with once the frame has been transmitted. * @@ -573,4 +578,8 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) return __tb_ring_enqueue(ring, frame); } +/* Used only when the ring is in polling mode */ +struct ring_frame *tb_ring_poll(struct tb_ring *ring); +void tb_ring_poll_complete(struct tb_ring *ring); + #endif /* THUNDERBOLT_H_ */ From 3304559e353f098d7e0ed5ca981e26c406513e12 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:42 +0300 Subject: [PATCH 0469/2177] thunderbolt: Add function to retrieve DMA device for the ring This is needed when Thunderbolt service drivers need to DMA map memory before it is passed down to the ring. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 36925e3aec7c..7b69853188b1 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -582,4 +583,16 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) struct ring_frame *tb_ring_poll(struct tb_ring *ring); void tb_ring_poll_complete(struct tb_ring *ring); +/** + * tb_ring_dma_device() - Return device used for DMA mapping + * @ring: Ring whose DMA device is retrieved + * + * Use this function when you are mapping DMA for buffers that are + * passed to the ring for sending/receiving. + */ +static inline struct device *tb_ring_dma_device(struct tb_ring *ring) +{ + return &ring->nhi->pdev->dev; +} + #endif /* THUNDERBOLT_H_ */ From 9a01c7c26cf7cbd1f58d06319e798833e85ff550 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:43 +0300 Subject: [PATCH 0470/2177] thunderbolt: Allocate ring HopID automatically if requested Thunderbolt services should not care which HopID (ring) they use for sending and receiving packets over the high-speed DMA path, so make tb_ring_alloc_rx() and tb_ring_alloc_tx() accept negative HopID. This means that the NHI will allocate next available HopID for the caller automatically. These HopIDs will be allocated from the range which is not reserved for the Thunderbolt protocol (8 .. hop_count - 1). The allocated HopID can be retrieved from ring->hop field after the ring has been allocated successfully if needed. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 78 ++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index af0a80ddf594..0e79eebfcbb7 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -26,6 +26,8 @@ * use this ring for anything else. */ #define RING_E2E_UNUSED_HOPID 2 +/* HopIDs 0-7 are reserved by the Thunderbolt protocol */ +#define RING_FIRST_USABLE_HOPID 8 /* * Minimal number of vectors when we use MSI-X. Two for control channel @@ -411,6 +413,62 @@ static void ring_release_msix(struct tb_ring *ring) ring->irq = 0; } +static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring) +{ + int ret = 0; + + spin_lock_irq(&nhi->lock); + + if (ring->hop < 0) { + unsigned int i; + + /* + * Automatically allocate HopID from the non-reserved + * range 8 .. hop_count - 1. + */ + for (i = RING_FIRST_USABLE_HOPID; i < nhi->hop_count; i++) { + if (ring->is_tx) { + if (!nhi->tx_rings[i]) { + ring->hop = i; + break; + } + } else { + if (!nhi->rx_rings[i]) { + ring->hop = i; + break; + } + } + } + } + + if (ring->hop < 0 || ring->hop >= nhi->hop_count) { + dev_warn(&nhi->pdev->dev, "invalid hop: %d\n", ring->hop); + ret = -EINVAL; + goto err_unlock; + } + if (ring->is_tx && nhi->tx_rings[ring->hop]) { + dev_warn(&nhi->pdev->dev, "TX hop %d already allocated\n", + ring->hop); + ret = -EBUSY; + goto err_unlock; + } else if (!ring->is_tx && nhi->rx_rings[ring->hop]) { + dev_warn(&nhi->pdev->dev, "RX hop %d already allocated\n", + ring->hop); + ret = -EBUSY; + goto err_unlock; + } + + if (ring->is_tx) + nhi->tx_rings[ring->hop] = ring; + else + nhi->rx_rings[ring->hop] = ring; + +err_unlock: + spin_unlock_irq(&nhi->lock); + + return ret; +} + static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, bool transmit, unsigned int flags, u16 sof_mask, u16 eof_mask, @@ -456,28 +514,12 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) goto err_free_descs; - spin_lock_irq(&nhi->lock); - if (hop >= nhi->hop_count) { - dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); + if (nhi_alloc_hop(nhi, ring)) goto err_release_msix; - } - if (transmit && nhi->tx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); - goto err_release_msix; - } else if (!transmit && nhi->rx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); - goto err_release_msix; - } - if (transmit) - nhi->tx_rings[hop] = ring; - else - nhi->rx_rings[hop] = ring; - spin_unlock_irq(&nhi->lock); return ring; err_release_msix: - spin_unlock_irq(&nhi->lock); ring_release_msix(ring); err_free_descs: dma_free_coherent(&ring->nhi->pdev->dev, @@ -506,7 +548,7 @@ EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); /** * tb_ring_alloc_rx() - Allocate DMA ring for receive * @nhi: Pointer to the NHI the ring is to be allocated - * @hop: HopID (ring) to allocate + * @hop: HopID (ring) to allocate. Pass %-1 for automatic allocation. * @size: Number of entries in the ring * @flags: Flags for the ring * @sof_mask: Mask of PDF values that start a frame From 467cd25bf2aec14c2e2990512248b72f46f94c53 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:44 +0300 Subject: [PATCH 0471/2177] MAINTAINERS: Add thunderbolt.h to the Thunderbolt driver entry The new API header (include/linux/thunderbolt.h) is maintained by the Thunderbolt driver maintainers. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6671f375f7fc..c1c90d962012 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13278,6 +13278,7 @@ M: Mika Westerberg M: Yehezkel Bernat S: Maintained F: drivers/thunderbolt/ +F: include/linux/thunderbolt.h THUNDERX GPIO DRIVER M: David Daney From e69b6c02b4c3b8d03be7136f90dd9551ad5a5a5e Mon Sep 17 00:00:00 2001 From: Amir Levy Date: Mon, 2 Oct 2017 13:38:45 +0300 Subject: [PATCH 0472/2177] net: Add support for networking over Thunderbolt cable ThunderboltIP is a protocol created by Apple to tunnel IP/ethernet traffic over a Thunderbolt cable. The protocol consists of configuration phase where each side sends ThunderboltIP login packets (the protocol is determined by UUID in the XDomain packet header) over the configuration channel. Once both sides get positive acknowledgment to their login packet, they configure high-speed DMA path accordingly. This DMA path is then used to transmit and receive networking traffic. This patch creates a virtual ethernet interface the host software can use in the same way as any other networking interface. Once the interface is brought up successfully network packets get tunneled over the Thunderbolt cable to the remote host and back. The connection is terminated by sending a ThunderboltIP logout packet over the configuration channel. We do this when the network interface is brought down by user or the driver is unloaded. Signed-off-by: Amir Levy Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- Documentation/admin-guide/thunderbolt.rst | 24 + drivers/net/Kconfig | 12 + drivers/net/Makefile | 3 + drivers/net/thunderbolt.c | 1362 +++++++++++++++++++++ 4 files changed, 1401 insertions(+) create mode 100644 drivers/net/thunderbolt.c diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index 6a4cd1f159ca..5c62d11d77e8 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -197,3 +197,27 @@ information is missing. To recover from this mode, one needs to flash a valid NVM image to the host host controller in the same way it is done in the previous chapter. + +Networking over Thunderbolt cable +--------------------------------- +Thunderbolt technology allows software communication across two hosts +connected by a Thunderbolt cable. + +It is possible to tunnel any kind of traffic over Thunderbolt link but +currently we only support Apple ThunderboltIP protocol. + +If the other host is running Windows or macOS only thing you need to +do is to connect Thunderbolt cable between the two hosts, the +``thunderbolt-net`` is loaded automatically. If the other host is also +Linux you should load ``thunderbolt-net`` manually on one host (it does +not matter which one):: + + # modprobe thunderbolt-net + +This triggers module load on the other host automatically. If the driver +is built-in to the kernel image, there is no need to do anything. + +The driver will create one virtual ethernet interface per Thunderbolt +port which are named like ``thunderbolt0`` and so on. From this point +you can either use standard userspace tools like ``ifconfig`` to +configure the interface or let your GUI to handle it automatically. diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index aba0d652095b..0936da592e12 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -483,6 +483,18 @@ config FUJITSU_ES This driver provides support for Extended Socket network device on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series. +config THUNDERBOLT_NET + tristate "Networking over Thunderbolt cable" + depends on THUNDERBOLT && INET + help + Select this if you want to create network between two + computers over a Thunderbolt cable. The driver supports Apple + ThunderboltIP protocol and allows communication with any host + supporting the same protocol including Windows and macOS. + + To compile this driver a module, choose M here. The module will be + called thunderbolt-net. + source "drivers/net/hyperv/Kconfig" endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 8dff900085d6..7c8f4dd3a7c5 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -74,3 +74,6 @@ obj-$(CONFIG_HYPERV_NET) += hyperv/ obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o obj-$(CONFIG_FUJITSU_ES) += fjes/ + +thunderbolt-net-y += thunderbolt.o +obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c new file mode 100644 index 000000000000..1a7bc0bf4598 --- /dev/null +++ b/drivers/net/thunderbolt.c @@ -0,0 +1,1362 @@ +/* + * Networking over Thunderbolt cable using Apple ThunderboltIP protocol + * + * Copyright (C) 2017, Intel Corporation + * Authors: Amir Levy + * Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Protocol timeouts in ms */ +#define TBNET_LOGIN_DELAY 4500 +#define TBNET_LOGIN_TIMEOUT 500 +#define TBNET_LOGOUT_TIMEOUT 100 + +#define TBNET_RING_SIZE 256 +#define TBNET_LOCAL_PATH 0xf +#define TBNET_LOGIN_RETRIES 60 +#define TBNET_LOGOUT_RETRIES 5 +#define TBNET_MATCH_FRAGS_ID BIT(1) +#define TBNET_MAX_MTU SZ_64K +#define TBNET_FRAME_SIZE SZ_4K +#define TBNET_MAX_PAYLOAD_SIZE \ + (TBNET_FRAME_SIZE - sizeof(struct thunderbolt_ip_frame_header)) +/* Rx packets need to hold space for skb_shared_info */ +#define TBNET_RX_MAX_SIZE \ + (TBNET_FRAME_SIZE + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define TBNET_RX_PAGE_ORDER get_order(TBNET_RX_MAX_SIZE) +#define TBNET_RX_PAGE_SIZE (PAGE_SIZE << TBNET_RX_PAGE_ORDER) + +#define TBNET_L0_PORT_NUM(route) ((route) & GENMASK(5, 0)) + +/** + * struct thunderbolt_ip_frame_header - Header for each Thunderbolt frame + * @frame_size: size of the data with the frame + * @frame_index: running index on the frames + * @frame_id: ID of the frame to match frames to specific packet + * @frame_count: how many frames assembles a full packet + * + * Each data frame passed to the high-speed DMA ring has this header. If + * the XDomain network directory announces that %TBNET_MATCH_FRAGS_ID is + * supported then @frame_id is filled, otherwise it stays %0. + */ +struct thunderbolt_ip_frame_header { + u32 frame_size; + u16 frame_index; + u16 frame_id; + u32 frame_count; +}; + +enum thunderbolt_ip_frame_pdf { + TBIP_PDF_FRAME_START = 1, + TBIP_PDF_FRAME_END, +}; + +enum thunderbolt_ip_type { + TBIP_LOGIN, + TBIP_LOGIN_RESPONSE, + TBIP_LOGOUT, + TBIP_STATUS, +}; + +struct thunderbolt_ip_header { + u32 route_hi; + u32 route_lo; + u32 length_sn; + uuid_t uuid; + uuid_t initiator_uuid; + uuid_t target_uuid; + u32 type; + u32 command_id; +}; + +#define TBIP_HDR_LENGTH_MASK GENMASK(5, 0) +#define TBIP_HDR_SN_MASK GENMASK(28, 27) +#define TBIP_HDR_SN_SHIFT 27 + +struct thunderbolt_ip_login { + struct thunderbolt_ip_header hdr; + u32 proto_version; + u32 transmit_path; + u32 reserved[4]; +}; + +#define TBIP_LOGIN_PROTO_VERSION 1 + +struct thunderbolt_ip_login_response { + struct thunderbolt_ip_header hdr; + u32 status; + u32 receiver_mac[2]; + u32 receiver_mac_len; + u32 reserved[4]; +}; + +struct thunderbolt_ip_logout { + struct thunderbolt_ip_header hdr; +}; + +struct thunderbolt_ip_status { + struct thunderbolt_ip_header hdr; + u32 status; +}; + +struct tbnet_stats { + u64 tx_packets; + u64 rx_packets; + u64 tx_bytes; + u64 rx_bytes; + u64 rx_errors; + u64 tx_errors; + u64 rx_length_errors; + u64 rx_over_errors; + u64 rx_crc_errors; + u64 rx_missed_errors; +}; + +struct tbnet_frame { + struct net_device *dev; + struct page *page; + struct ring_frame frame; +}; + +struct tbnet_ring { + struct tbnet_frame frames[TBNET_RING_SIZE]; + unsigned int cons; + unsigned int prod; + struct tb_ring *ring; +}; + +/** + * struct tbnet - ThunderboltIP network driver private data + * @svc: XDomain service the driver is bound to + * @xd: XDomain the service blongs to + * @handler: ThunderboltIP configuration protocol handler + * @dev: Networking device + * @napi: NAPI structure for Rx polling + * @stats: Network statistics + * @skb: Network packet that is currently processed on Rx path + * @command_id: ID used for next configuration protocol packet + * @login_sent: ThunderboltIP login message successfully sent + * @login_received: ThunderboltIP login message received from the remote + * host + * @transmit_path: HopID the other end needs to use building the + * opposite side path. + * @connection_lock: Lock serializing access to @login_sent, + * @login_received and @transmit_path. + * @login_retries: Number of login retries currently done + * @login_work: Worker to send ThunderboltIP login packets + * @connected_work: Worker that finalizes the ThunderboltIP connection + * setup and enables DMA paths for high speed data + * transfers + * @rx_hdr: Copy of the currently processed Rx frame. Used when a + * network packet consists of multiple Thunderbolt frames. + * In host byte order. + * @rx_ring: Software ring holding Rx frames + * @frame_id: Frame ID use for next Tx packet + * (if %TBNET_MATCH_FRAGS_ID is supported in both ends) + * @tx_ring: Software ring holding Tx frames + */ +struct tbnet { + const struct tb_service *svc; + struct tb_xdomain *xd; + struct tb_protocol_handler handler; + struct net_device *dev; + struct napi_struct napi; + struct tbnet_stats stats; + struct sk_buff *skb; + atomic_t command_id; + bool login_sent; + bool login_received; + u32 transmit_path; + struct mutex connection_lock; + int login_retries; + struct delayed_work login_work; + struct work_struct connected_work; + struct thunderbolt_ip_frame_header rx_hdr; + struct tbnet_ring rx_ring; + atomic_t frame_id; + struct tbnet_ring tx_ring; +}; + +/* Network property directory UUID: c66189ca-1cce-4195-bdb8-49592e5f5a4f */ +static const uuid_t tbnet_dir_uuid = + UUID_INIT(0xc66189ca, 0x1cce, 0x4195, + 0xbd, 0xb8, 0x49, 0x59, 0x2e, 0x5f, 0x5a, 0x4f); + +/* ThunderboltIP protocol UUID: 798f589e-3616-8a47-97c6-5664a920c8dd */ +static const uuid_t tbnet_svc_uuid = + UUID_INIT(0x798f589e, 0x3616, 0x8a47, + 0x97, 0xc6, 0x56, 0x64, 0xa9, 0x20, 0xc8, 0xdd); + +static struct tb_property_dir *tbnet_dir; + +static void tbnet_fill_header(struct thunderbolt_ip_header *hdr, u64 route, + u8 sequence, const uuid_t *initiator_uuid, const uuid_t *target_uuid, + enum thunderbolt_ip_type type, size_t size, u32 command_id) +{ + u32 length_sn; + + /* Length does not include route_hi/lo and length_sn fields */ + length_sn = (size - 3 * 4) / 4; + length_sn |= (sequence << TBIP_HDR_SN_SHIFT) & TBIP_HDR_SN_MASK; + + hdr->route_hi = upper_32_bits(route); + hdr->route_lo = lower_32_bits(route); + hdr->length_sn = length_sn; + uuid_copy(&hdr->uuid, &tbnet_svc_uuid); + uuid_copy(&hdr->initiator_uuid, initiator_uuid); + uuid_copy(&hdr->target_uuid, target_uuid); + hdr->type = type; + hdr->command_id = command_id; +} + +static int tbnet_login_response(struct tbnet *net, u64 route, u8 sequence, + u32 command_id) +{ + struct thunderbolt_ip_login_response reply; + struct tb_xdomain *xd = net->xd; + + memset(&reply, 0, sizeof(reply)); + tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_LOGIN_RESPONSE, sizeof(reply), + command_id); + memcpy(reply.receiver_mac, net->dev->dev_addr, ETH_ALEN); + reply.receiver_mac_len = ETH_ALEN; + + return tb_xdomain_response(xd, &reply, sizeof(reply), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tbnet_login_request(struct tbnet *net, u8 sequence) +{ + struct thunderbolt_ip_login_response reply; + struct thunderbolt_ip_login request; + struct tb_xdomain *xd = net->xd; + + memset(&request, 0, sizeof(request)); + tbnet_fill_header(&request.hdr, xd->route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_LOGIN, sizeof(request), + atomic_inc_return(&net->command_id)); + + request.proto_version = TBIP_LOGIN_PROTO_VERSION; + request.transmit_path = TBNET_LOCAL_PATH; + + return tb_xdomain_request(xd, &request, sizeof(request), + TB_CFG_PKG_XDOMAIN_RESP, &reply, + sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP, + TBNET_LOGIN_TIMEOUT); +} + +static int tbnet_logout_response(struct tbnet *net, u64 route, u8 sequence, + u32 command_id) +{ + struct thunderbolt_ip_status reply; + struct tb_xdomain *xd = net->xd; + + memset(&reply, 0, sizeof(reply)); + tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_STATUS, sizeof(reply), + atomic_inc_return(&net->command_id)); + return tb_xdomain_response(xd, &reply, sizeof(reply), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tbnet_logout_request(struct tbnet *net) +{ + struct thunderbolt_ip_logout request; + struct thunderbolt_ip_status reply; + struct tb_xdomain *xd = net->xd; + + memset(&request, 0, sizeof(request)); + tbnet_fill_header(&request.hdr, xd->route, 0, xd->local_uuid, + xd->remote_uuid, TBIP_LOGOUT, sizeof(request), + atomic_inc_return(&net->command_id)); + + return tb_xdomain_request(xd, &request, sizeof(request), + TB_CFG_PKG_XDOMAIN_RESP, &reply, + sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP, + TBNET_LOGOUT_TIMEOUT); +} + +static void start_login(struct tbnet *net) +{ + mutex_lock(&net->connection_lock); + net->login_sent = false; + net->login_received = false; + mutex_unlock(&net->connection_lock); + + queue_delayed_work(system_long_wq, &net->login_work, + msecs_to_jiffies(1000)); +} + +static void stop_login(struct tbnet *net) +{ + cancel_delayed_work_sync(&net->login_work); + cancel_work_sync(&net->connected_work); +} + +static inline unsigned int tbnet_frame_size(const struct tbnet_frame *tf) +{ + return tf->frame.size ? : TBNET_FRAME_SIZE; +} + +static void tbnet_free_buffers(struct tbnet_ring *ring) +{ + unsigned int i; + + for (i = 0; i < TBNET_RING_SIZE; i++) { + struct device *dma_dev = tb_ring_dma_device(ring->ring); + struct tbnet_frame *tf = &ring->frames[i]; + enum dma_data_direction dir; + unsigned int order; + size_t size; + + if (!tf->page) + continue; + + if (ring->ring->is_tx) { + dir = DMA_TO_DEVICE; + order = 0; + size = tbnet_frame_size(tf); + } else { + dir = DMA_FROM_DEVICE; + order = TBNET_RX_PAGE_ORDER; + size = TBNET_RX_PAGE_SIZE; + } + + if (tf->frame.buffer_phy) + dma_unmap_page(dma_dev, tf->frame.buffer_phy, size, + dir); + + __free_pages(tf->page, order); + tf->page = NULL; + } + + ring->cons = 0; + ring->prod = 0; +} + +static void tbnet_tear_down(struct tbnet *net, bool send_logout) +{ + netif_carrier_off(net->dev); + netif_stop_queue(net->dev); + + stop_login(net); + + mutex_lock(&net->connection_lock); + + if (net->login_sent && net->login_received) { + int retries = TBNET_LOGOUT_RETRIES; + + while (send_logout && retries-- > 0) { + int ret = tbnet_logout_request(net); + if (ret != -ETIMEDOUT) + break; + } + + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); + tbnet_free_buffers(&net->rx_ring); + tbnet_free_buffers(&net->tx_ring); + + if (tb_xdomain_disable_paths(net->xd)) + netdev_warn(net->dev, "failed to disable DMA paths\n"); + } + + net->login_retries = 0; + net->login_sent = false; + net->login_received = false; + + mutex_unlock(&net->connection_lock); +} + +static int tbnet_handle_packet(const void *buf, size_t size, void *data) +{ + const struct thunderbolt_ip_login *pkg = buf; + struct tbnet *net = data; + u32 command_id; + int ret = 0; + u8 sequence; + u64 route; + + /* Make sure the packet is for us */ + if (size < sizeof(struct thunderbolt_ip_header)) + return 0; + if (!uuid_equal(&pkg->hdr.initiator_uuid, net->xd->remote_uuid)) + return 0; + if (!uuid_equal(&pkg->hdr.target_uuid, net->xd->local_uuid)) + return 0; + + route = ((u64)pkg->hdr.route_hi << 32) | pkg->hdr.route_lo; + route &= ~BIT_ULL(63); + if (route != net->xd->route) + return 0; + + sequence = pkg->hdr.length_sn & TBIP_HDR_SN_MASK; + sequence >>= TBIP_HDR_SN_SHIFT; + command_id = pkg->hdr.command_id; + + switch (pkg->hdr.type) { + case TBIP_LOGIN: + if (!netif_running(net->dev)) + break; + + ret = tbnet_login_response(net, route, sequence, + pkg->hdr.command_id); + if (!ret) { + mutex_lock(&net->connection_lock); + net->login_received = true; + net->transmit_path = pkg->transmit_path; + + /* If we reached the number of max retries or + * previous logout, schedule another round of + * login retries + */ + if (net->login_retries >= TBNET_LOGIN_RETRIES || + !net->login_sent) { + net->login_retries = 0; + queue_delayed_work(system_long_wq, + &net->login_work, 0); + } + mutex_unlock(&net->connection_lock); + + queue_work(system_long_wq, &net->connected_work); + } + break; + + case TBIP_LOGOUT: + ret = tbnet_logout_response(net, route, sequence, command_id); + if (!ret) + tbnet_tear_down(net, false); + break; + + default: + return 0; + } + + if (ret) + netdev_warn(net->dev, "failed to send ThunderboltIP response\n"); + + return 1; +} + +static unsigned int tbnet_available_buffers(const struct tbnet_ring *ring) +{ + return ring->prod - ring->cons; +} + +static int tbnet_alloc_rx_buffers(struct tbnet *net, unsigned int nbuffers) +{ + struct tbnet_ring *ring = &net->rx_ring; + int ret; + + while (nbuffers--) { + struct device *dma_dev = tb_ring_dma_device(ring->ring); + unsigned int index = ring->prod & (TBNET_RING_SIZE - 1); + struct tbnet_frame *tf = &ring->frames[index]; + dma_addr_t dma_addr; + + if (tf->page) + break; + + /* Allocate page (order > 0) so that it can hold maximum + * ThunderboltIP frame (4kB) and the additional room for + * SKB shared info required by build_skb(). + */ + tf->page = dev_alloc_pages(TBNET_RX_PAGE_ORDER); + if (!tf->page) { + ret = -ENOMEM; + goto err_free; + } + + dma_addr = dma_map_page(dma_dev, tf->page, 0, + TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { + ret = -ENOMEM; + goto err_free; + } + + tf->frame.buffer_phy = dma_addr; + tf->dev = net->dev; + + tb_ring_rx(ring->ring, &tf->frame); + + ring->prod++; + } + + return 0; + +err_free: + tbnet_free_buffers(ring); + return ret; +} + +static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) +{ + struct tbnet_ring *ring = &net->tx_ring; + struct tbnet_frame *tf; + unsigned int index; + + if (!tbnet_available_buffers(ring)) + return NULL; + + index = ring->cons++ & (TBNET_RING_SIZE - 1); + + tf = &ring->frames[index]; + tf->frame.size = 0; + tf->frame.buffer_phy = 0; + + return tf; +} + +static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, + bool canceled) +{ + struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame); + struct device *dma_dev = tb_ring_dma_device(ring); + struct tbnet *net = netdev_priv(tf->dev); + + dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), + DMA_TO_DEVICE); + + /* Return buffer to the ring */ + net->tx_ring.prod++; + + if (tbnet_available_buffers(&net->tx_ring) >= TBNET_RING_SIZE / 2) + netif_wake_queue(net->dev); +} + +static int tbnet_alloc_tx_buffers(struct tbnet *net) +{ + struct tbnet_ring *ring = &net->tx_ring; + unsigned int i; + + for (i = 0; i < TBNET_RING_SIZE; i++) { + struct tbnet_frame *tf = &ring->frames[i]; + + tf->page = alloc_page(GFP_KERNEL); + if (!tf->page) { + tbnet_free_buffers(ring); + return -ENOMEM; + } + + tf->dev = net->dev; + tf->frame.callback = tbnet_tx_callback; + tf->frame.sof = TBIP_PDF_FRAME_START; + tf->frame.eof = TBIP_PDF_FRAME_END; + } + + ring->cons = 0; + ring->prod = TBNET_RING_SIZE - 1; + + return 0; +} + +static void tbnet_connected_work(struct work_struct *work) +{ + struct tbnet *net = container_of(work, typeof(*net), connected_work); + bool connected; + int ret; + + if (netif_carrier_ok(net->dev)) + return; + + mutex_lock(&net->connection_lock); + connected = net->login_sent && net->login_received; + mutex_unlock(&net->connection_lock); + + if (!connected) + return; + + /* Both logins successful so enable the high-speed DMA paths and + * start the network device queue. + */ + ret = tb_xdomain_enable_paths(net->xd, TBNET_LOCAL_PATH, + net->rx_ring.ring->hop, + net->transmit_path, + net->tx_ring.ring->hop); + if (ret) { + netdev_err(net->dev, "failed to enable DMA paths\n"); + return; + } + + tb_ring_start(net->tx_ring.ring); + tb_ring_start(net->rx_ring.ring); + + ret = tbnet_alloc_rx_buffers(net, TBNET_RING_SIZE); + if (ret) + goto err_stop_rings; + + ret = tbnet_alloc_tx_buffers(net); + if (ret) + goto err_free_rx_buffers; + + netif_carrier_on(net->dev); + netif_start_queue(net->dev); + return; + +err_free_rx_buffers: + tbnet_free_buffers(&net->rx_ring); +err_stop_rings: + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); +} + +static void tbnet_login_work(struct work_struct *work) +{ + struct tbnet *net = container_of(work, typeof(*net), login_work.work); + unsigned long delay = msecs_to_jiffies(TBNET_LOGIN_DELAY); + int ret; + + if (netif_carrier_ok(net->dev)) + return; + + ret = tbnet_login_request(net, net->login_retries % 4); + if (ret) { + if (net->login_retries++ < TBNET_LOGIN_RETRIES) { + queue_delayed_work(system_long_wq, &net->login_work, + delay); + } else { + netdev_info(net->dev, "ThunderboltIP login timed out\n"); + } + } else { + net->login_retries = 0; + + mutex_lock(&net->connection_lock); + net->login_sent = true; + mutex_unlock(&net->connection_lock); + + queue_work(system_long_wq, &net->connected_work); + } +} + +static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf, + const struct thunderbolt_ip_frame_header *hdr) +{ + u32 frame_id, frame_count, frame_size, frame_index; + unsigned int size; + + if (tf->frame.flags & RING_DESC_CRC_ERROR) { + net->stats.rx_crc_errors++; + return false; + } else if (tf->frame.flags & RING_DESC_BUFFER_OVERRUN) { + net->stats.rx_over_errors++; + return false; + } + + /* Should be greater than just header i.e. contains data */ + size = tbnet_frame_size(tf); + if (size <= sizeof(*hdr)) { + net->stats.rx_length_errors++; + return false; + } + + frame_count = le32_to_cpu(hdr->frame_count); + frame_size = le32_to_cpu(hdr->frame_size); + frame_index = le16_to_cpu(hdr->frame_index); + frame_id = le16_to_cpu(hdr->frame_id); + + if ((frame_size > size - sizeof(*hdr)) || !frame_size) { + net->stats.rx_length_errors++; + return false; + } + + /* In case we're in the middle of packet, validate the frame + * header based on first fragment of the packet. + */ + if (net->skb && net->rx_hdr.frame_count) { + /* Check the frame count fits the count field */ + if (frame_count != net->rx_hdr.frame_count) { + net->stats.rx_length_errors++; + return false; + } + + /* Check the frame identifiers are incremented correctly, + * and id is matching. + */ + if (frame_index != net->rx_hdr.frame_index + 1 || + frame_id != net->rx_hdr.frame_id) { + net->stats.rx_missed_errors++; + return false; + } + + if (net->skb->len + frame_size > TBNET_MAX_MTU) { + net->stats.rx_length_errors++; + return false; + } + + return true; + } + + /* Start of packet, validate the frame header */ + if (frame_count == 0 || frame_count > TBNET_RING_SIZE / 4) { + net->stats.rx_length_errors++; + return false; + } + if (frame_index != 0) { + net->stats.rx_missed_errors++; + return false; + } + + return true; +} + +static int tbnet_poll(struct napi_struct *napi, int budget) +{ + struct tbnet *net = container_of(napi, struct tbnet, napi); + unsigned int cleaned_count = tbnet_available_buffers(&net->rx_ring); + struct device *dma_dev = tb_ring_dma_device(net->rx_ring.ring); + unsigned int rx_packets = 0; + + while (rx_packets < budget) { + const struct thunderbolt_ip_frame_header *hdr; + unsigned int hdr_size = sizeof(*hdr); + struct sk_buff *skb = NULL; + struct ring_frame *frame; + struct tbnet_frame *tf; + struct page *page; + bool last = true; + u32 frame_size; + + /* Return some buffers to hardware, one at a time is too + * slow so allocate MAX_SKB_FRAGS buffers at the same + * time. + */ + if (cleaned_count >= MAX_SKB_FRAGS) { + tbnet_alloc_rx_buffers(net, cleaned_count); + cleaned_count = 0; + } + + frame = tb_ring_poll(net->rx_ring.ring); + if (!frame) + break; + + dma_unmap_page(dma_dev, frame->buffer_phy, + TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE); + + tf = container_of(frame, typeof(*tf), frame); + + page = tf->page; + tf->page = NULL; + net->rx_ring.cons++; + cleaned_count++; + + hdr = page_address(page); + if (!tbnet_check_frame(net, tf, hdr)) { + __free_pages(page, TBNET_RX_PAGE_ORDER); + dev_kfree_skb_any(net->skb); + net->skb = NULL; + continue; + } + + frame_size = le32_to_cpu(hdr->frame_size); + + skb = net->skb; + if (!skb) { + skb = build_skb(page_address(page), + TBNET_RX_PAGE_SIZE); + if (!skb) { + __free_pages(page, TBNET_RX_PAGE_ORDER); + net->stats.rx_errors++; + break; + } + + skb_reserve(skb, hdr_size); + skb_put(skb, frame_size); + + net->skb = skb; + } else { + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + page, hdr_size, frame_size, + TBNET_RX_PAGE_SIZE - hdr_size); + } + + net->rx_hdr.frame_size = frame_size; + net->rx_hdr.frame_count = le32_to_cpu(hdr->frame_count); + net->rx_hdr.frame_index = le16_to_cpu(hdr->frame_index); + net->rx_hdr.frame_id = le16_to_cpu(hdr->frame_id); + last = net->rx_hdr.frame_index == net->rx_hdr.frame_count - 1; + + rx_packets++; + net->stats.rx_bytes += frame_size; + + if (last) { + skb->protocol = eth_type_trans(skb, net->dev); + napi_gro_receive(&net->napi, skb); + net->skb = NULL; + } + } + + net->stats.rx_packets += rx_packets; + + if (cleaned_count) + tbnet_alloc_rx_buffers(net, cleaned_count); + + if (rx_packets >= budget) + return budget; + + napi_complete_done(napi, rx_packets); + /* Re-enable the ring interrupt */ + tb_ring_poll_complete(net->rx_ring.ring); + + return rx_packets; +} + +static void tbnet_start_poll(void *data) +{ + struct tbnet *net = data; + + napi_schedule(&net->napi); +} + +static int tbnet_open(struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + struct tb_xdomain *xd = net->xd; + u16 sof_mask, eof_mask; + struct tb_ring *ring; + + netif_carrier_off(dev); + + ring = tb_ring_alloc_tx(xd->tb->nhi, -1, TBNET_RING_SIZE, + RING_FLAG_FRAME); + if (!ring) { + netdev_err(dev, "failed to allocate Tx ring\n"); + return -ENOMEM; + } + net->tx_ring.ring = ring; + + sof_mask = BIT(TBIP_PDF_FRAME_START); + eof_mask = BIT(TBIP_PDF_FRAME_END); + + ring = tb_ring_alloc_rx(xd->tb->nhi, -1, TBNET_RING_SIZE, + RING_FLAG_FRAME | RING_FLAG_E2E, sof_mask, + eof_mask, tbnet_start_poll, net); + if (!ring) { + netdev_err(dev, "failed to allocate Rx ring\n"); + tb_ring_free(net->tx_ring.ring); + net->tx_ring.ring = NULL; + return -ENOMEM; + } + net->rx_ring.ring = ring; + + napi_enable(&net->napi); + start_login(net); + + return 0; +} + +static int tbnet_stop(struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + + napi_disable(&net->napi); + + tbnet_tear_down(net, true); + + tb_ring_free(net->rx_ring.ring); + net->rx_ring.ring = NULL; + tb_ring_free(net->tx_ring.ring); + net->tx_ring.ring = NULL; + + return 0; +} + +static bool tbnet_xmit_map(struct device *dma_dev, struct tbnet_frame *tf) +{ + dma_addr_t dma_addr; + + dma_addr = dma_map_page(dma_dev, tf->page, 0, tbnet_frame_size(tf), + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) + return false; + + tf->frame.buffer_phy = dma_addr; + return true; +} + +static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, + struct tbnet_frame **frames, u32 frame_count) +{ + struct thunderbolt_ip_frame_header *hdr = page_address(frames[0]->page); + struct device *dma_dev = tb_ring_dma_device(net->tx_ring.ring); + __wsum wsum = htonl(skb->len - skb_transport_offset(skb)); + unsigned int i, len, offset = skb_transport_offset(skb); + __be16 protocol = skb->protocol; + void *data = skb->data; + void *dest = hdr + 1; + __sum16 *tucso; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* No need to calculate checksum so we just update the + * total frame count and map the frames for DMA. + */ + for (i = 0; i < frame_count; i++) { + hdr = page_address(frames[i]->page); + hdr->frame_count = cpu_to_le32(frame_count); + if (!tbnet_xmit_map(dma_dev, frames[i])) + goto err_unmap; + } + + return true; + } + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, vh; + + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh); + if (!vhdr) + return false; + + protocol = vhdr->h_vlan_encapsulated_proto; + } + + /* Data points on the beginning of packet. + * Check is the checksum absolute place in the packet. + * ipcso will update IP checksum. + * tucso will update TCP/UPD checksum. + */ + if (protocol == htons(ETH_P_IP)) { + __sum16 *ipcso = dest + ((void *)&(ip_hdr(skb)->check) - data); + + *ipcso = 0; + *ipcso = ip_fast_csum(dest + skb_network_offset(skb), + ip_hdr(skb)->ihl); + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); + else if (ip_hdr(skb)->protocol == IPPROTO_UDP) + tucso = dest + ((void *)&(udp_hdr(skb)->check) - data); + else + return false; + + *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + ip_hdr(skb)->protocol, 0); + } else if (skb_is_gso_v6(skb)) { + tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + return false; + } else if (protocol == htons(ETH_P_IPV6)) { + tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset; + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + ipv6_hdr(skb)->nexthdr, 0); + } else { + return false; + } + + /* First frame was headers, rest of the frames contain data. + * Calculate checksum over each frame. + */ + for (i = 0; i < frame_count; i++) { + hdr = page_address(frames[i]->page); + dest = (void *)(hdr + 1) + offset; + len = le32_to_cpu(hdr->frame_size) - offset; + wsum = csum_partial(dest, len, wsum); + hdr->frame_count = cpu_to_le32(frame_count); + + offset = 0; + } + + *tucso = csum_fold(wsum); + + /* Checksum is finally calculated and we don't touch the memory + * anymore, so DMA map the frames now. + */ + for (i = 0; i < frame_count; i++) { + if (!tbnet_xmit_map(dma_dev, frames[i])) + goto err_unmap; + } + + return true; + +err_unmap: + while (i--) + dma_unmap_page(dma_dev, frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); + + return false; +} + +static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num, + unsigned int *len) +{ + const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num]; + + *len = skb_frag_size(frag); + return kmap_atomic(skb_frag_page(frag)) + frag->page_offset; +} + +static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + struct tbnet_frame *frames[MAX_SKB_FRAGS]; + u16 frame_id = atomic_read(&net->frame_id); + struct thunderbolt_ip_frame_header *hdr; + unsigned int len = skb_headlen(skb); + unsigned int data_len = skb->len; + unsigned int nframes, i; + unsigned int frag = 0; + void *src = skb->data; + u32 frame_index = 0; + bool unmap = false; + void *dest; + + nframes = DIV_ROUND_UP(data_len, TBNET_MAX_PAYLOAD_SIZE); + if (tbnet_available_buffers(&net->tx_ring) < nframes) { + netif_stop_queue(net->dev); + return NETDEV_TX_BUSY; + } + + frames[frame_index] = tbnet_get_tx_buffer(net); + if (!frames[frame_index]) + goto err_drop; + + hdr = page_address(frames[frame_index]->page); + dest = hdr + 1; + + /* If overall packet is bigger than the frame data size */ + while (data_len > TBNET_MAX_PAYLOAD_SIZE) { + unsigned int size_left = TBNET_MAX_PAYLOAD_SIZE; + + hdr->frame_size = cpu_to_le32(TBNET_MAX_PAYLOAD_SIZE); + hdr->frame_index = cpu_to_le16(frame_index); + hdr->frame_id = cpu_to_le16(frame_id); + + do { + if (len > size_left) { + /* Copy data onto Tx buffer data with + * full frame size then break and go to + * next frame + */ + memcpy(dest, src, size_left); + len -= size_left; + dest += size_left; + src += size_left; + break; + } + + memcpy(dest, src, len); + size_left -= len; + dest += len; + + if (unmap) { + kunmap_atomic(src); + unmap = false; + } + + /* Ensure all fragments have been processed */ + if (frag < skb_shinfo(skb)->nr_frags) { + /* Map and then unmap quickly */ + src = tbnet_kmap_frag(skb, frag++, &len); + unmap = true; + } else if (unlikely(size_left > 0)) { + goto err_drop; + } + } while (size_left > 0); + + data_len -= TBNET_MAX_PAYLOAD_SIZE; + frame_index++; + + frames[frame_index] = tbnet_get_tx_buffer(net); + if (!frames[frame_index]) + goto err_drop; + + hdr = page_address(frames[frame_index]->page); + dest = hdr + 1; + } + + hdr->frame_size = cpu_to_le32(data_len); + hdr->frame_index = cpu_to_le16(frame_index); + hdr->frame_id = cpu_to_le16(frame_id); + + frames[frame_index]->frame.size = data_len + sizeof(*hdr); + + /* In case the remaining data_len is smaller than a frame */ + while (len < data_len) { + memcpy(dest, src, len); + data_len -= len; + dest += len; + + if (unmap) { + kunmap_atomic(src); + unmap = false; + } + + if (frag < skb_shinfo(skb)->nr_frags) { + src = tbnet_kmap_frag(skb, frag++, &len); + unmap = true; + } else if (unlikely(data_len > 0)) { + goto err_drop; + } + } + + memcpy(dest, src, data_len); + + if (unmap) + kunmap_atomic(src); + + if (!tbnet_xmit_csum_and_map(net, skb, frames, frame_index + 1)) + goto err_drop; + + for (i = 0; i < frame_index + 1; i++) + tb_ring_tx(net->tx_ring.ring, &frames[i]->frame); + + if (net->svc->prtcstns & TBNET_MATCH_FRAGS_ID) + atomic_inc(&net->frame_id); + + net->stats.tx_packets++; + net->stats.tx_bytes += skb->len; + + dev_consume_skb_any(skb); + + return NETDEV_TX_OK; + +err_drop: + /* We can re-use the buffers */ + net->tx_ring.cons -= frame_index; + + dev_kfree_skb_any(skb); + net->stats.tx_errors++; + + return NETDEV_TX_OK; +} + +static void tbnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct tbnet *net = netdev_priv(dev); + + stats->tx_packets = net->stats.tx_packets; + stats->rx_packets = net->stats.rx_packets; + stats->tx_bytes = net->stats.tx_bytes; + stats->rx_bytes = net->stats.rx_bytes; + stats->rx_errors = net->stats.rx_errors + net->stats.rx_length_errors + + net->stats.rx_over_errors + net->stats.rx_crc_errors + + net->stats.rx_missed_errors; + stats->tx_errors = net->stats.tx_errors; + stats->rx_length_errors = net->stats.rx_length_errors; + stats->rx_over_errors = net->stats.rx_over_errors; + stats->rx_crc_errors = net->stats.rx_crc_errors; + stats->rx_missed_errors = net->stats.rx_missed_errors; +} + +static const struct net_device_ops tbnet_netdev_ops = { + .ndo_open = tbnet_open, + .ndo_stop = tbnet_stop, + .ndo_start_xmit = tbnet_start_xmit, + .ndo_get_stats64 = tbnet_get_stats64, +}; + +static void tbnet_generate_mac(struct net_device *dev) +{ + const struct tbnet *net = netdev_priv(dev); + const struct tb_xdomain *xd = net->xd; + u8 phy_port; + u32 hash; + + phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route)); + + /* Unicast and locally administered MAC */ + dev->dev_addr[0] = phy_port << 4 | 0x02; + hash = jhash2((u32 *)xd->local_uuid, 4, 0); + memcpy(dev->dev_addr + 1, &hash, sizeof(hash)); + hash = jhash2((u32 *)xd->local_uuid, 4, hash); + dev->dev_addr[5] = hash & 0xff; +} + +static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id) +{ + struct tb_xdomain *xd = tb_service_parent(svc); + struct net_device *dev; + struct tbnet *net; + int ret; + + dev = alloc_etherdev(sizeof(*net)); + if (!dev) + return -ENOMEM; + + SET_NETDEV_DEV(dev, &svc->dev); + + net = netdev_priv(dev); + INIT_DELAYED_WORK(&net->login_work, tbnet_login_work); + INIT_WORK(&net->connected_work, tbnet_connected_work); + mutex_init(&net->connection_lock); + atomic_set(&net->command_id, 0); + atomic_set(&net->frame_id, 0); + net->svc = svc; + net->dev = dev; + net->xd = xd; + + tbnet_generate_mac(dev); + + strcpy(dev->name, "thunderbolt%d"); + dev->netdev_ops = &tbnet_netdev_ops; + + /* ThunderboltIP takes advantage of TSO packets but instead of + * segmenting them we just split the packet into Thunderbolt + * frames (maximum payload size of each frame is 4084 bytes) and + * calculate checksum over the whole packet here. + * + * The receiving side does the opposite if the host OS supports + * LRO, otherwise it needs to split the large packet into MTU + * sized smaller packets. + * + * In order to receive large packets from the networking stack, + * we need to announce support for most of the offloading + * features here. + */ + dev->hw_features = NETIF_F_SG | NETIF_F_ALL_TSO | NETIF_F_GRO | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->features = dev->hw_features | NETIF_F_HIGHDMA; + dev->hard_header_len += sizeof(struct thunderbolt_ip_frame_header); + + netif_napi_add(dev, &net->napi, tbnet_poll, NAPI_POLL_WEIGHT); + + /* MTU range: 68 - 65522 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = TBNET_MAX_MTU - ETH_HLEN; + + net->handler.uuid = &tbnet_svc_uuid; + net->handler.callback = tbnet_handle_packet, + net->handler.data = net; + tb_register_protocol_handler(&net->handler); + + tb_service_set_drvdata(svc, net); + + ret = register_netdev(dev); + if (ret) { + tb_unregister_protocol_handler(&net->handler); + free_netdev(dev); + return ret; + } + + return 0; +} + +static void tbnet_remove(struct tb_service *svc) +{ + struct tbnet *net = tb_service_get_drvdata(svc); + + unregister_netdev(net->dev); + tb_unregister_protocol_handler(&net->handler); + free_netdev(net->dev); +} + +static void tbnet_shutdown(struct tb_service *svc) +{ + tbnet_tear_down(tb_service_get_drvdata(svc), true); +} + +static int __maybe_unused tbnet_suspend(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tbnet *net = tb_service_get_drvdata(svc); + + stop_login(net); + if (netif_running(net->dev)) { + netif_device_detach(net->dev); + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); + tbnet_free_buffers(&net->rx_ring); + tbnet_free_buffers(&net->tx_ring); + } + + return 0; +} + +static int __maybe_unused tbnet_resume(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tbnet *net = tb_service_get_drvdata(svc); + + netif_carrier_off(net->dev); + if (netif_running(net->dev)) { + netif_device_attach(net->dev); + start_login(net); + } + + return 0; +} + +static const struct dev_pm_ops tbnet_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tbnet_suspend, tbnet_resume) +}; + +static const struct tb_service_id tbnet_ids[] = { + { TB_SERVICE("network", 1) }, + { }, +}; +MODULE_DEVICE_TABLE(tbsvc, tbnet_ids); + +static struct tb_service_driver tbnet_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "thunderbolt-net", + .pm = &tbnet_pm_ops, + }, + .probe = tbnet_probe, + .remove = tbnet_remove, + .shutdown = tbnet_shutdown, + .id_table = tbnet_ids, +}; + +static int __init tbnet_init(void) +{ + int ret; + + tbnet_dir = tb_property_create_dir(&tbnet_dir_uuid); + if (!tbnet_dir) + return -ENOMEM; + + tb_property_add_immediate(tbnet_dir, "prtcid", 1); + tb_property_add_immediate(tbnet_dir, "prtcvers", 1); + tb_property_add_immediate(tbnet_dir, "prtcrevs", 1); + tb_property_add_immediate(tbnet_dir, "prtcstns", + TBNET_MATCH_FRAGS_ID); + + ret = tb_register_property_dir("network", tbnet_dir); + if (ret) { + tb_property_free_dir(tbnet_dir); + return ret; + } + + return tb_register_service_driver(&tbnet_driver); +} +module_init(tbnet_init); + +static void __exit tbnet_exit(void) +{ + tb_unregister_service_driver(&tbnet_driver); + tb_unregister_property_dir("network", tbnet_dir); + tb_property_free_dir(tbnet_dir); +} +module_exit(tbnet_exit); + +MODULE_AUTHOR("Amir Levy "); +MODULE_AUTHOR("Michael Jamet "); +MODULE_AUTHOR("Mika Westerberg "); +MODULE_DESCRIPTION("Thunderbolt network driver"); +MODULE_LICENSE("GPL v2"); From c024297e7be34b6f0e578afc76c5b9f7708a5832 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:46 +0300 Subject: [PATCH 0473/2177] MAINTAINERS: Add entry for Thunderbolt network driver I will be maintaining the Thunderbolt network driver along with Michael and Yehezkel. Signed-off-by: Michael Jamet Signed-off-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c1c90d962012..5231392cf4bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13280,6 +13280,14 @@ S: Maintained F: drivers/thunderbolt/ F: include/linux/thunderbolt.h +THUNDERBOLT NETWORK DRIVER +M: Michael Jamet +M: Mika Westerberg +M: Yehezkel Bernat +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/thunderbolt.c + THUNDERX GPIO DRIVER M: David Daney S: Maintained From e50d5751c807853cd0fce0b5c46479cc6274014f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Jul 2017 18:17:42 -0700 Subject: [PATCH 0474/2177] i40e: limit lan queue count in large CPU count machine When a machine has more CPUs than queue pairs, e.g. 512 cores, the counting gets a little funky and turns off Flow Director with the message: not enough queues for Flow Director. Flow Director feature is disabled This patch limits the number of lan queues initially allocated to be sure we have some left for FD and other features. Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 47f71d7c3ae0..387f0863f794 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11093,6 +11093,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) static void i40e_determine_queue_usage(struct i40e_pf *pf) { int queues_left; + int q_max; pf->num_lan_qps = 0; @@ -11139,10 +11140,12 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_DCB_ENABLED); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } - pf->num_lan_qps = max_t(int, pf->rss_size_max, - num_online_cpus()); - pf->num_lan_qps = min_t(int, pf->num_lan_qps, - pf->hw.func_caps.num_tx_qp); + + /* limit lan qps to the smaller of qps, cpus or msix */ + q_max = max_t(int, pf->rss_size_max, num_online_cpus()); + q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp); + q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors); + pf->num_lan_qps = q_max; queues_left -= pf->num_lan_qps; } From 5872866e166c38ad1c1028fb9cf7dd756c0ef43e Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Thu, 27 Jul 2017 03:17:09 -0700 Subject: [PATCH 0475/2177] i40e: remove logically dead code This patch removes the !vf condition check that cannot be true in i40e_ndo_set_vf_trust function Detected by CoverityScan, CID 1397531 Logically dead code Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a75396c157d9..e6b95e1e1a33 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3354,8 +3354,6 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) vf = &pf->vf[vf_id]; - if (!vf) - return -EINVAL; if (setting == vf->trusted) goto out; From 54902349ee95045b67e2f0c39b75f5418540064b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Aug 2017 23:37:01 +0200 Subject: [PATCH 0476/2177] i40e: Fix a potential NULL pointer dereference If 'kzalloc()' fails, a NULL pointer will be dereferenced. Return an error code (-ENOMEM) instead. Signed-off-by: Christophe JAILLET Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e6b95e1e1a33..9e3667fc7f6a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -423,6 +423,9 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, (sizeof(struct virtchnl_iwarp_qv_info) * (qvlist_info->num_vectors - 1)); vf->qvlist_info = kzalloc(size, GFP_KERNEL); + if (!vf->qvlist_info) + return -ENOMEM; + vf->qvlist_info->num_vectors = qvlist_info->num_vectors; msix_vf = pf->hw.func_caps.num_msix_vectors_vf; From d60bcc798000e015940fb47eb23b79dd2fda5c9e Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 22 Aug 2017 06:57:43 -0400 Subject: [PATCH 0477/2177] i40e: Fix reporting of supported link modes This patch fixes incorrect reporting of supported link modes on some NICs. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 20 +++++++++++++++++-- drivers/net/ethernet/intel/i40e/i40e_common.c | 11 +++++++++- .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 20 +++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index e2a9ec80a623..5d0291c1337e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1734,6 +1734,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1752,6 +1754,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, I40E_PHY_TYPE_MAX }; @@ -1942,19 +1946,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 7346d8850c8e..64c15f4c9d2b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1821,7 +1821,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; - hw_link_info->loopback = resp->loopback; + hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK; @@ -1852,6 +1852,15 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; + if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= 7) { + __le32 tmp; + + memcpy(&tmp, resp->link_type, sizeof(tmp)); + hw->phy.phy_types = le32_to_cpu(tmp); + hw->phy.phy_types |= ((u64)resp->link_type_ext << 32); + } + /* save link status information */ if (link) *link = *hw_link_info; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index f9f48d1900b0..709d114fc305 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1730,6 +1730,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1748,6 +1750,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, I40E_PHY_TYPE_MAX }; @@ -1938,19 +1942,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); From 9c0e5caf6398d6b892dc5046c421890e32ab5fa3 Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 22 Aug 2017 06:57:44 -0400 Subject: [PATCH 0478/2177] i40e: Add support for 'ethtool -m' This patch adds support for 'ethtool -m' command which displays information about (Q)SFP+ module plugged into NIC's cage. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 18 ++ drivers/net/ethernet/intel/i40e/i40e_common.c | 69 ++++++++ .../net/ethernet/intel/i40e/i40e_ethtool.c | 154 ++++++++++++++++++ .../net/ethernet/intel/i40e/i40e_prototype.h | 9 + drivers/net/ethernet/intel/i40e/i40e_type.h | 13 ++ .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 18 ++ .../net/ethernet/intel/i40evf/i40e_common.c | 69 ++++++++ .../ethernet/intel/i40evf/i40e_prototype.h | 9 + drivers/net/ethernet/intel/i40evf/i40e_type.h | 12 ++ 9 files changed, 371 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5d0291c1337e..ed7bbe14bc6e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -244,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -2053,6 +2055,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 64c15f4c9d2b..fada03799850 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -5062,6 +5062,75 @@ do_retry: wr32(hw, reg_addr, reg_val); } +/** + * i40e_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Write the external PHY register. + **/ +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Read the external PHY register. + **/ +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + /** * i40e_aq_write_ppp - Write pipeline personalization profile (ppp) * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 05e89864f781..1136d02e2e95 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4196,6 +4196,158 @@ flags_complete: return 0; } +/** + * i40e_get_module_info - get (Q)SFP+ module type info + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + **/ +static int i40e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u32 sff8472_comp = 0; + u32 sff8472_swap = 0; + u32 sff8636_rev = 0; + i40e_status status; + u32 type = 0; + + /* Check if firmware supports reading module EEPROM. */ + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) { + netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n"); + return -EINVAL; + } + + status = i40e_update_link_info(hw); + if (status) + return -EIO; + + if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) { + netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n"); + return -EINVAL; + } + + type = hw->phy.link_info.module_type[0]; + + switch (type) { + case I40E_MODULE_TYPE_SFP: + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_COMP, + &sff8472_comp, NULL); + if (status) + return -EIO; + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_SWAP, + &sff8472_swap, NULL); + if (status) + return -EIO; + + /* Check if the module requires address swap to access + * the other EEPROM memory page. + */ + if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) { + netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n"); + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp == 0x00) { + /* Module is not SFF-8472 compliant */ + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP_PLUS: + /* Read from memory page 0. */ + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + 0, + I40E_MODULE_REVISION_ADDR, + &sff8636_rev, NULL); + if (status) + return -EIO; + /* Determine revision compliance byte */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + break; + default: + netdev_err(vsi->netdev, "Module type unrecognized\n"); + return -EINVAL; + } + return 0; +} + +/** + * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + **/ +static int i40e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + bool is_sfp = false; + i40e_status status; + u32 value = 0; + int i; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + u32 offset = i + ee->offset; + u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = I40E_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + addr++; + } + } + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + addr, offset, &value, NULL); + if (status) + return -EIO; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops i40e_ethtool_ops = { .get_drvinfo = i40e_get_drvinfo, .get_regs_len = i40e_get_regs_len, @@ -4228,6 +4380,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .set_rxfh = i40e_set_rxfh, .get_channels = i40e_get_channels, .set_channels = i40e_set_channels, + .get_module_info = i40e_get_module_info, + .get_module_eeprom = i40e_get_module_eeprom, .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index a39b13197891..01502561035c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -360,6 +360,15 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index fd4bbdd88b57..8b0b9f826b7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -428,6 +428,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, @@ -598,6 +610,7 @@ struct i40e_hw { struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) u64 flags; /* debug mask */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 709d114fc305..eee7ece42b39 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -244,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -2046,6 +2048,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 8d3a2bfe186a..7d70bf69b249 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -1041,6 +1041,75 @@ do_retry: wr32(hw, reg_addr, reg_val); } +/** + * i40evf_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + /** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index c9836bba487d..b624b5994075 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -111,6 +111,15 @@ i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 2ea919d9cdcf..b53584e3d580 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -401,6 +401,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, From 00f6c2f5e20bbdb638e58c50c6e6b1d8b796d6f6 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Tue, 22 Aug 2017 06:57:45 -0400 Subject: [PATCH 0479/2177] i40e: use admin queue for setting LEDs behavior Instead of accessing register directly, use newly added AQC in order to blink LEDs. Introduce and utilize a new flag to prevent excessive API version checking. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 6 + drivers/net/ethernet/intel/i40e/i40e_common.c | 113 ++++++++++++++---- drivers/net/ethernet/intel/i40evf/i40e_type.h | 2 + 3 files changed, 98 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index ba04988e0598..08f63226105a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -607,6 +607,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) &oem_lo); hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo; + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { + hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; + } + if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index fada03799850..a4838779de5d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -4836,6 +4836,74 @@ phy_blinking_end: return status; } +/** + * i40e_led_get_reg - read LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: read register value + **/ +static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr, + u32 *reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + *reg_val = 0; + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16 *)reg_val); + } + return status; +} + +/** + * i40e_led_set_reg - write LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: register value to write + **/ +static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr, + u32 reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_set_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16)reg_val); + } + + return status; +} + /** * i40e_led_get_phy - return current on/off mode * @hw: pointer to the hw struct @@ -4853,7 +4921,19 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u16 temp_addr; u8 port_num; u32 i; + u32 reg_val_aq; + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + ®_val_aq, NULL); + if (status == I40E_SUCCESS) + *val = (u16)reg_val_aq; + return status; + } temp_addr = I40E_PHY_LED_PROV_REG_1; i = rd32(hw, I40E_PFGEN_PORTNUM); port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); @@ -4888,51 +4968,38 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, u16 led_addr, u32 mode) { i40e_status status = 0; - u16 led_ctl = 0; - u16 led_reg = 0; - u8 phy_addr = 0; - u8 port_num; - u32 i; + u32 led_ctl = 0; + u32 led_reg = 0; - i = rd32(hw, I40E_PFGEN_PORTNUM); - port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); - phy_addr = i40e_get_phy_address(hw, port_num); - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) return status; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) return status; } - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) goto restore_config; if (on) led_reg = I40E_PHY_LED_MANUAL_ON; else led_reg = 0; - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) goto restore_config; if (mode & I40E_PHY_LED_MODE_ORIG) { led_ctl = (mode & I40E_PHY_LED_MODE_MASK); - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); } return status; + restore_config: - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); return status; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index b53584e3d580..48eacf5e73e4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -568,6 +568,8 @@ struct i40e_hw { /* LLDP/DCBX Status */ u16 dcbx_status; +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) + /* DCBX info */ struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */ struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ From ba4e003d29c1d32776f156695fb00adf7df86ee2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:46 -0400 Subject: [PATCH 0480/2177] i40e: don't hold spinlock while resetting VF When we refactored handling of the PVID in commit 9af52f60b2d9 ("i40e: use (add|rm)_vlan_all_mac helper functions when changing PVID") we introduced a scheduling while atomic regression. This occurred because we now held the spinlock across a call to i40e_reset_vf(), which results in a usleep_range() call that triggers a scheduling while atomic bug. This was rare as it only occurred if the user configured a VLAN on a VF and also attempted to reconfigure the VF from the host system with a port VLAN. We do need to hold the lock while calling i40e_is_vsi_in_vlan(), but we should not be holding it while we reset the VF. We'll fix this by introducing a separate helper function i40e_vsi_has_vlans which checks whether we have a PVID and whether the VSI has configured VLANs. This helper function will manage its own need for the mac_filter_hash_lock. Then, we can move the acquiring of the spinlock until after we reset the VF, which ensures that we do not sleep while holding the lock. Using a separate function like this makes the code more clear and is easier to read than attempting to release and re-acquire the spinlock when we reset the VF. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 9e3667fc7f6a..53ead127b293 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2925,6 +2925,34 @@ error_param: return ret; } +/** + * i40e_vsi_has_vlans - True if VSI has configured VLANs + * @vsi: pointer to the vsi + * + * Check if a VSI has configured any VLANs. False if we have a port VLAN or if + * we have no configured VLANs. Do not call while holding the + * mac_filter_hash_lock. + */ +static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) +{ + bool have_vlans; + + /* If we have a port VLAN, then the VSI cannot have any VLANs + * configured, as all MAC/VLAN filters will be assigned to the PVID. + */ + if (vsi->info.pvid) + return false; + + /* Since we don't have a PVID, we know that if the device is in VLAN + * mode it must be because of a VLAN filter configured on this VSI. + */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + have_vlans = i40e_is_vsi_in_vlan(vsi); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + return have_vlans; +} + /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure @@ -2977,10 +3005,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - /* Locked once because multiple functions below iterate list */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - - if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) { + if (i40e_vsi_has_vlans(vsi)) { dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", vf_id); @@ -2993,6 +3018,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, vsi = pf->vsi[vf->lan_vsi_idx]; } + /* Locked once because multiple functions below iterate list */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + /* Check for condition where there was already a port VLAN ID * filter set and now it is being deleted by setting it to zero. * Additionally check for the condition where there was a port From eeeddbb80640ef63466a54bc118f66c81487bc42 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:47 -0400 Subject: [PATCH 0481/2177] i40e: drop i40e_pf *pf from i40e_vc_disable_vf() It's never used, and the vf structure could get back to the PF if necessary. Lets just drop the extra unneeded parameter. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 53ead127b293..70a79864177a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -154,12 +154,11 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) /** * i40e_vc_disable_vf - * @pf: pointer to the PF info * @vf: pointer to the VF info * * Disable the VF through a SW reset **/ -static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf) +static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { i40e_vc_notify_vf_reset(vf); i40e_reset_vf(vf, false); @@ -2918,7 +2917,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } /* Force the VF driver stop so it has to reload with new MAC address */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); error_param: @@ -3013,7 +3012,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * the right thing by reconfiguring his network correctly * and then reloading the VF driver. */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); /* During reset the VF got a new VSI, so refresh the pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; } From f18d20218a14d11d8fd6ed32e66ad199c8c93280 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:48 -0400 Subject: [PATCH 0482/2177] i40e: make use of i40e_vc_disable_vf Replace i40e_vc_notify_vf_reset and i40e_reset_vf with a call to i40e_vc_disable_vf which does this exact thing. This matches similar code patterns throughout the driver. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 70a79864177a..94ee243f110e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3388,8 +3388,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); out: From d43d60e5eb9504aa6f8f390aa0313cc8e3816b82 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:49 -0400 Subject: [PATCH 0483/2177] i40e: ensure reset occurs when disabling VF It is possible although rare that we may not reset when i40e_vc_disable_vf() is called. This can lead to some weird circumstances with some values not being properly set. Modify i40e_reset_vf() to return a code indicating whether it reset or not. Now, i40e_vc_disable_vf() can wait until a reset actually occurs. If it fails to free up within a reasonable time frame we'll display a warning message. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 42 +++++++++++++++---- .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 4 +- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 94ee243f110e..7742cf3d38d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -156,12 +156,28 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) * i40e_vc_disable_vf * @vf: pointer to the VF info * - * Disable the VF through a SW reset + * Disable the VF through a SW reset. **/ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { + int i; + i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + + /* We want to ensure that an actual reset occurs initiated after this + * function was called. However, we do not want to wait forever, so + * we'll give a reasonable time and print a message if we failed to + * ensure a reset. + */ + for (i = 0; i < 20; i++) { + if (i40e_reset_vf(vf, false)) + return; + usleep_range(10000, 20000); + } + + dev_warn(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); } /** @@ -1051,9 +1067,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * reset the VF + * Returns true if the VF is reset, false otherwise. **/ -void i40e_reset_vf(struct i40e_vf *vf, bool flr) +bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; @@ -1061,9 +1077,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) u32 reg; int i; - /* If VFs have been disabled, there is no need to reset */ + /* If the VFs have been disabled, this means something else is + * resetting the VF, so we shouldn't continue. + */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; i40e_trigger_vf_reset(vf, flr); @@ -1100,6 +1118,8 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** @@ -1111,8 +1131,10 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) * VF, then do all the waiting in one chunk, and finally finish restoring each * VF after the wait. This is useful during PF routines which need to reset * all VFs, as otherwise it must perform these resets in a serialized fashion. + * + * Returns true if any VFs were reset, and false otherwise. **/ -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; @@ -1121,11 +1143,11 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) - return; + return false; /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; /* Begin reset on all VFs at once */ for (v = 0; v < pf->num_alloc_vfs; v++) @@ -1200,6 +1222,8 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5111d05d5f2f..5ea42ad094bc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -122,8 +122,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs); int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen); int i40e_vc_process_vflr_event(struct i40e_pf *pf); -void i40e_reset_vf(struct i40e_vf *vf, bool flr); -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); +bool i40e_reset_vf(struct i40e_vf *vf, bool flr); +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ From 5b36e8d04b4439c9ceb814bfdfe1284737f9c632 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:50 -0400 Subject: [PATCH 0484/2177] i40evf: Enable VF to request an alternate queue allocation Currently the VF gets a default number of allocated queues from HW on init and it could choose to enable or disable those allocated queues. This makes it such that the VF can request more or less underlying allocated queues from the PF. First the VF negotiates the number of queues it wants that can be supported by the PF and if successful asks for a reset. During reset the PF will reallocate the HW queues for the VF and will then remap the new queues. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf.h | 4 + .../ethernet/intel/i40evf/i40evf_ethtool.c | 38 +++++++- .../net/ethernet/intel/i40evf/i40evf_main.c | 94 ++++++++++++++++++- .../ethernet/intel/i40evf/i40evf_virtchnl.c | 44 ++++++++- 4 files changed, 173 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 82f69031e5cd..5982362c5643 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -102,6 +102,7 @@ struct i40e_vsi { #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) #define MAX_QUEUES 16 +#define I40EVF_MAX_REQ_QUEUES 4 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) @@ -200,6 +201,7 @@ struct i40evf_adapter { struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; + int num_req_queues; /* TX */ struct i40e_ring *tx_rings; @@ -235,6 +237,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_PROMISC_ON BIT(18) #define I40EVF_FLAG_ALLMULTI_ON BIT(19) #define I40EVF_FLAG_LEGACY_RX BIT(20) +#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(21) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -349,6 +352,7 @@ void i40evf_deconfigure_queues(struct i40evf_adapter *adapter); void i40evf_enable_queues(struct i40evf_adapter *adapter); void i40evf_disable_queues(struct i40evf_adapter *adapter); void i40evf_map_queues(struct i40evf_adapter *adapter); +int i40evf_request_queues(struct i40evf_adapter *adapter, int num); void i40evf_add_ether_addrs(struct i40evf_adapter *adapter); void i40evf_del_ether_addrs(struct i40evf_adapter *adapter); void i40evf_add_vlans(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 65874d6b3ab9..da006fa3fec1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -669,7 +669,7 @@ static void i40evf_get_channels(struct net_device *netdev, struct i40evf_adapter *adapter = netdev_priv(netdev); /* Report maximum channels */ - ch->max_combined = adapter->num_active_queues; + ch->max_combined = I40EVF_MAX_REQ_QUEUES; ch->max_other = NONQ_VECS; ch->other_count = NONQ_VECS; @@ -677,6 +677,41 @@ static void i40evf_get_channels(struct net_device *netdev, ch->combined_count = adapter->num_active_queues; } +/** + * i40evf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with the PF then do a reset. During + * reset we'll realloc queues and fix the RSS table. Returns 0 on success, + * negative on failure. + **/ +static int i40evf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + int num_req = ch->combined_count; + + if (num_req != adapter->num_active_queues && + !(adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) { + dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n"); + return -EINVAL; + } + + /* All of these should have already been checked by ethtool before this + * even gets to us, but just to be sure. + */ + if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES) + return -EINVAL; + + if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS) + return -EINVAL; + + adapter->num_req_queues = num_req; + return i40evf_request_queues(adapter, num_req); +} + /** * i40evf_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure @@ -785,6 +820,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_rxfh = i40evf_get_rxfh, .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, + .set_channels = i40evf_set_channels, .get_rxfh_key_size = i40evf_get_rxfh_key_size, .get_link_ksettings = i40evf_get_link_ksettings, }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 69ef6c1d5364..8c513ce84345 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1189,9 +1189,18 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i, num_active_queues; - num_active_queues = min_t(int, - adapter->vsi_res->num_queue_pairs, - (int)(num_online_cpus())); + /* If we're in reset reallocating queues we don't actually know yet for + * certain the PF gave us the number of queues we asked for but we'll + * assume it did. Once basic reset is finished we'll confirm once we + * start negotiating config with PF. + */ + if (adapter->num_req_queues) + num_active_queues = adapter->num_req_queues; + else + num_active_queues = min_t(int, + adapter->vsi_res->num_queue_pairs, + (int)(num_online_cpus())); + adapter->tx_rings = kcalloc(num_active_queues, sizeof(struct i40e_ring), GFP_KERNEL); @@ -1539,6 +1548,48 @@ static void i40evf_free_rss(struct i40evf_adapter *adapter) adapter->rss_lut = NULL; } +/** + * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int err; + + if (netif_running(netdev)) + i40evf_free_traffic_irqs(adapter); + i40evf_free_misc_irq(adapter); + i40evf_reset_interrupt_capability(adapter); + i40evf_free_q_vectors(adapter); + i40evf_free_queues(adapter); + + err = i40evf_init_interrupt_scheme(adapter); + if (err) + goto err; + + netif_tx_stop_all_queues(netdev); + + err = i40evf_request_misc_irq(adapter); + if (err) + goto err; + + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + + err = i40evf_map_rings_to_vectors(adapter); + if (err) + goto err; + + if (RSS_AQ(adapter)) + adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; + else + err = i40evf_init_rss(adapter); +err: + return err; +} + /** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long @@ -1885,8 +1936,15 @@ continue_reset: if (err) dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", err); + adapter->aq_required = 0; - adapter->aq_required = I40EVF_FLAG_AQ_GET_CONFIG; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_reinit_interrupt_scheme(adapter); + if (err) + goto reset_err; + } + + adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG; adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; /* re-add all MAC filters */ @@ -1916,6 +1974,15 @@ continue_reset: if (err) goto reset_err; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_request_traffic_irqs(adapter, + netdev->name); + if (err) + goto reset_err; + + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + } + i40evf_configure(adapter); i40evf_up_complete(adapter); @@ -2431,9 +2498,9 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw) int i40evf_process_config(struct i40evf_adapter *adapter) { struct virtchnl_vf_resource *vfres = adapter->vf_res; + int i, num_req_queues = adapter->num_req_queues; struct net_device *netdev = adapter->netdev; struct i40e_vsi *vsi = &adapter->vsi; - int i; netdev_features_t hw_enc_features; netdev_features_t hw_features; @@ -2447,6 +2514,23 @@ int i40evf_process_config(struct i40evf_adapter *adapter) return -ENODEV; } + if (num_req_queues && + num_req_queues != adapter->vsi_res->num_queue_pairs) { + /* Problem. The PF gave us fewer queues than what we had + * negotiated in our request. Need a reset to see if we can't + * get back to a working state. + */ + dev_err(&adapter->pdev->dev, + "Requested %d queues, but PF only gave us %d.\n", + num_req_queues, + adapter->vsi_res->num_queue_pairs); + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; + i40evf_schedule_reset(adapter); + return -ENODEV; + } + adapter->num_req_queues = 0; + hw_enc_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2bb0fe00361f..2bb81c39d85f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -160,7 +160,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM; + VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -384,6 +385,32 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) kfree(vimi); } +/** + * i40evf_request_queues + * @adapter: adapter structure + * @num: number of requested queues + * + * We get a default number of queues from the PF. This enables us to request a + * different number. Returns 0 on success, negative on failure + **/ +int i40evf_request_queues(struct i40evf_adapter *adapter, int num) +{ + struct virtchnl_vf_res_request vfres; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n", + adapter->current_op); + return -EBUSY; + } + + vfres.num_queue_pairs = num; + + adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; + return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, + (u8 *)&vfres, sizeof(vfres)); +} + /** * i40evf_add_ether_addrs * @adapter: adapter structure @@ -1068,6 +1095,21 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, "Invalid message %d from PF\n", v_opcode); } break; + case VIRTCHNL_OP_REQUEST_QUEUES: { + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + if (vfres->num_queue_pairs == adapter->num_req_queues) { + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + i40evf_schedule_reset(adapter); + } else { + dev_info(&adapter->pdev->dev, + "Requested %d queues, PF can support %d\n", + adapter->num_req_queues, + vfres->num_queue_pairs); + adapter->num_req_queues = 0; + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", From 1b7b7596aeebc21913bad49eb6a2c364c4b2988a Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Tue, 22 Aug 2017 06:57:51 -0400 Subject: [PATCH 0485/2177] i40e: make i40evf_map_rings_to_vectors void This function cannot fail, so why is it returning a value? And why are we checking it? Why shouldn't we just make it void? Why is this commit message made up of only questions? Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 8c513ce84345..f2f1e754c2ce 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -430,12 +430,11 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) * group the rings as "efficiently" as possible. You would add new * mapping configurations in here. **/ -static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) +static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) { int rings_remaining = adapter->num_active_queues; int ridx = 0, vidx = 0; int q_vectors; - int err = 0; q_vectors = adapter->num_msix_vectors - NONQ_VECS; @@ -451,8 +450,6 @@ static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) } adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; - - return err; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1578,9 +1575,7 @@ static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter) set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - err = i40evf_map_rings_to_vectors(adapter); - if (err) - goto err; + i40evf_map_rings_to_vectors(adapter); if (RSS_AQ(adapter)) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; From 41d0a4d0c8b144e44d92ea95e975d2434748d806 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:52 -0400 Subject: [PATCH 0486/2177] i40e: fix handling of vf_states variable Currently we inappropriately clear the vf_states variable with a null assignment. This is problematic because we should be using atomic bitops on this variable and we don't actually want to clear all the flags. We should just clear the ones we know we want to clear. Additionally remove the I40E_VF_STATE_FCOEENA bit because it is no longer being used. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 7742cf3d38d9..989a65d60ac9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -879,7 +879,8 @@ static void i40e_free_vf_res(struct i40e_vf *vf) } /* reset some of the state variables keeping track of the resources */ vf->num_queue_pairs = 0; - vf->vf_states = 0; + clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); + clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); } /** @@ -1586,6 +1587,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP; set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); + } else { + clear_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5ea42ad094bc..5efc4f92bb37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -56,7 +56,6 @@ enum i40e_vf_states { I40E_VF_STATE_INIT = 0, I40E_VF_STATE_ACTIVE, I40E_VF_STATE_IWARPENA, - I40E_VF_STATE_FCOEENA, I40E_VF_STATE_DISABLED, I40E_VF_STATE_MC_PROMISC, I40E_VF_STATE_UC_PROMISC, From c53d11f669c0e7d0daf46a717b6712ad0b09de99 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:53 -0400 Subject: [PATCH 0487/2177] i40e: fix client notify of VF reset Currently there is a bug in which the PF driver fails to inform clients of a VF reset which then causes clients to leak resources. The bug exists because we were incorrectly checking the I40E_VF_STATE_PRE_ENABLE bit. When a VF is first init we go through a reset to initialize variables and allocate resources but we don't want to inform clients of this first reset since the client isn't fully enabled yet so we set a state bit signifying we're in a "pre-enabled" client state. During the first reset we should be clearing the bit, allowing all following resets to notify the client of the reset when the bit is not set. This patch fixes the issue by negating the 'test_and_clear_bit' check to accurately reflect the behavior we want. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 989a65d60ac9..04568137e029 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1050,8 +1050,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); /* Do not notify the client during VF init */ - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, - &vf->vf_states)) + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, + &vf->vf_states)) i40e_notify_client_of_vf_reset(pf, abs_vf_id); vf->num_vlan = 0; } From ab243ec9401d164531cc9bc07fb32231d72d1280 Mon Sep 17 00:00:00 2001 From: Scott Peterson Date: Tue, 22 Aug 2017 06:57:54 -0400 Subject: [PATCH 0488/2177] i40e: Stop dropping 802.1ad tags - eth proto 0x88a8 Enable i40e to pass traffic with VLAN tags using the 802.1ad ethernet protocol ID (0x88a8). This requires NIC firmware providing version 1.7 of the API. With older NIC firmware 802.1ad tagged packets will continue to be dropped. No VLAN offloads nor RSS are supported for 802.1ad VLANs. Signed-off-by: Scott Peterson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 6 ++++++ .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 17 ++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_common.c | 6 +++++- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++++++ drivers/net/ethernet/intel/i40e/i40e_type.h | 6 ++++++ .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 17 ++++++++++++++++- drivers/net/ethernet/intel/i40evf/i40e_type.h | 6 ++++++ 7 files changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 08f63226105a..9dcb2a961197 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -613,6 +613,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; } + /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ + if (hw->aq.api_maj_ver > 1 || + (hw->aq.api_maj_ver == 1 && + hw->aq.api_min_ver >= 7)) + hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; + if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index ed7bbe14bc6e..4c85ea9cd89a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -775,7 +775,22 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + u8 reserved[6]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index a4838779de5d..60542beda7ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2402,7 +2402,11 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, i40e_aqc_opc_set_switch_config); scfg->flags = cpu_to_le16(flags); scfg->valid_flags = cpu_to_le16(valid_flags); - + if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) { + scfg->switch_tag = cpu_to_le16(hw->switch_tag); + scfg->first_tag = cpu_to_le16(hw->first_tag); + scfg->second_tag = cpu_to_le16(hw->second_tag); + } status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); return status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 387f0863f794..3f9e89b054ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11361,6 +11361,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; + /* Select something other than the 802.1ad ethertype for the + * switch to use internally and drop on ingress. + */ + hw->switch_tag = 0xffff; + hw->first_tag = ETH_P_8021AD; + hw->second_tag = ETH_P_8021Q; + INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 8b0b9f826b7f..4b32b1d38a66 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -610,9 +610,15 @@ struct i40e_hw { struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) u64 flags; + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index eee7ece42b39..ed5602f4bbcd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -771,7 +771,22 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + u8 reserved[6]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 48eacf5e73e4..9364b67fff9c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -568,6 +568,7 @@ struct i40e_hw { /* LLDP/DCBX Status */ u16 dcbx_status; +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) /* DCBX info */ @@ -575,6 +576,11 @@ struct i40e_hw { struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; From 0b40f457488d966878eec413a91f27d9b21e6ce5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:14 -0700 Subject: [PATCH 0489/2177] fm10k: prepare_for_reset() when we lose PCIe Link If we lose PCIe link, such as when an unannounced PFLR event occurs, or when a device is surprise removed, we currently detach the device and close the netdev. This unfortunately leaves a lot of things still active, such as the msix_mbx_pf IRQ, and Tx/Rx resources. This can cause problems because the register reads will return potentially invalid values which may result in unknown driver behavior. Begin the process of resetting using fm10k_prepare_for_reset(), much in the same way as the suspend and resume cycle does. This will attempt to shutdown as much as possible, in order to prevent possible issues. A naive implementation for this has issues, because there are now multiple flows calling the reset logic and setting a reset bit. This would cause problems, because the "re-attach" routine might call fm10k_handle_reset() prior to the reset actually finishing. Instead, we'll add state bits to indicate which flow actually initiated the reset. For the general reset flow, we'll assume that if someone else is resetting that we do not need to handle it at all, so it does not need its own state bit. For the suspend case, we will simply issue a warning indicating that we are attempting to recover from this case when resuming. For the detached subtask, we'll simply refuse to re-attach until we've actually initiated a reset as part of that flow. Finally, we'll stop attempting to manage the mailbox subtask when we're detached, since there's nothing we can do if we don't have a PCIe address. Overall this produces a much cleaner shutdown and recovery cycle for a PCIe surprise remove event. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 + drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 105 ++++++++++++++----- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 689c413b7782..ba70c58ca920 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -270,6 +270,8 @@ enum fm10k_flags_t { enum fm10k_state_t { __FM10K_RESETTING, + __FM10K_RESET_DETACHED, + __FM10K_RESET_SUSPENDED, __FM10K_DOWN, __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 9575f7c1862d..4e5e3e64beda 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -153,7 +153,15 @@ static void fm10k_service_timer(unsigned long data) fm10k_service_event_schedule(interface); } -static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) +/** + * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset + * @interface: fm10k private data structure + * + * This function prepares for a device reset by shutting as much down as we + * can. It does nothing and returns false if __FM10K_RESETTING was already set + * prior to calling this function. It returns true if it actually did work. + */ +static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface) { struct net_device *netdev = interface->netdev; @@ -162,8 +170,9 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) /* put off any impending NetWatchDogTimeout */ netif_trans_update(netdev); - while (test_and_set_bit(__FM10K_RESETTING, interface->state)) - usleep_range(1000, 2000); + /* Nothing to do if a reset is already in progress */ + if (test_and_set_bit(__FM10K_RESETTING, interface->state)) + return false; rtnl_lock(); @@ -181,6 +190,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) interface->last_reset = jiffies + (10 * HZ); rtnl_unlock(); + + return true; } static int fm10k_handle_reset(struct fm10k_intfc *interface) @@ -189,6 +200,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + WARN_ON(!test_bit(__FM10K_RESETTING, interface->state)); + rtnl_lock(); pci_set_master(interface->pdev); @@ -267,35 +280,70 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface) struct net_device *netdev = interface->netdev; u32 __iomem *hw_addr; u32 value; + int err; - /* do nothing if device is still present or hw_addr is set */ + /* do nothing if netdev is still present or hw_addr is set */ if (netif_device_present(netdev) || interface->hw.hw_addr) return; + /* We've lost the PCIe register space, and can no longer access the + * device. Shut everything except the detach subtask down and prepare + * to reset the device in case we recover. If we actually prepare for + * reset, indicate that we're detached. + */ + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_DETACHED, interface->state); + /* check the real address space to see if we've recovered */ hw_addr = READ_ONCE(interface->uc_addr); value = readl(hw_addr); if (~value) { + /* Make sure the reset was initiated because we detached, + * otherwise we might race with a different reset flow. + */ + if (!test_and_clear_bit(__FM10K_RESET_DETACHED, + interface->state)) + return; + + /* Restore the hardware address */ interface->hw.hw_addr = interface->uc_addr; + + /* PCIe link has been restored, and the device is active + * again. Restore everything and reset the device. + */ + err = fm10k_handle_reset(interface); + if (err) { + netdev_err(netdev, "Unable to reset device: %d\n", err); + interface->hw.hw_addr = NULL; + return; + } + + /* Re-attach the netdev */ netif_device_attach(netdev); - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); netdev_warn(netdev, "PCIe link restored, device now attached\n"); return; } - - rtnl_lock(); - - if (netif_running(netdev)) - dev_close(netdev); - - rtnl_unlock(); } -static void fm10k_reinit(struct fm10k_intfc *interface) +static void fm10k_reset_subtask(struct fm10k_intfc *interface) { int err; - fm10k_prepare_for_reset(interface); + if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED, + interface->flags)) + return; + + /* If another thread has already prepared to reset the device, we + * should not attempt to handle a reset here, since we'd race with + * that thread. This may happen if we suspend the device or if the + * PCIe link is lost. In this case, we'll just ignore the RESET + * request, as it will (eventually) be taken care of when the thread + * which actually started the reset is finished. + */ + if (!fm10k_prepare_for_reset(interface)) + return; + + netdev_err(interface->netdev, "Reset interface\n"); err = fm10k_handle_reset(interface); if (err) @@ -303,17 +351,6 @@ static void fm10k_reinit(struct fm10k_intfc *interface) "fm10k_handle_reset failed: %d\n", err); } -static void fm10k_reset_subtask(struct fm10k_intfc *interface) -{ - if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED, - interface->flags)) - return; - - netdev_err(interface->netdev, "Reset interface\n"); - - fm10k_reinit(interface); -} - /** * fm10k_configure_swpri_map - Configure Receive SWPRI to PC mapping * @interface: board private structure @@ -381,6 +418,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface) **/ static void fm10k_mbx_subtask(struct fm10k_intfc *interface) { + /* If we're resetting, bail out */ + if (test_bit(__FM10K_RESETTING, interface->state)) + return; + /* process upstream mailbox and update device state */ fm10k_watchdog_update_host_state(interface); @@ -630,9 +671,11 @@ static void fm10k_service_task(struct work_struct *work) interface = container_of(work, struct fm10k_intfc, service_task); + /* Check whether we're detached first */ + fm10k_detach_subtask(interface); + /* tasks run even when interface is down */ fm10k_mbx_subtask(interface); - fm10k_detach_subtask(interface); fm10k_reset_subtask(interface); /* tasks only run when interface is up */ @@ -2177,7 +2220,8 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) */ fm10k_stop_service_event(interface); - fm10k_prepare_for_reset(interface); + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_SUSPENDED, interface->state); } static int fm10k_handle_resume(struct fm10k_intfc *interface) @@ -2185,6 +2229,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + /* Even if we didn't properly prepare for reset in + * fm10k_prepare_suspend, we'll attempt to resume anyways. + */ + if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state)) + dev_warn(&interface->pdev->dev, + "Device was shut down as part of suspend... Attempting to recover\n"); + /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); From b4fcd43661df0d84cc4e030ab7a26533114889b9 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:15 -0700 Subject: [PATCH 0490/2177] fm10k: use spinlock to implement mailbox lock Lets not re-invent the locking wheel. Remove our bitlock and use a proper spinlock instead. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 15 +++++---------- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 3 +++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index ba70c58ca920..74542e9d63c7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -276,7 +276,6 @@ enum fm10k_state_t { __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, __FM10K_SERVICE_DISABLE, - __FM10K_MBX_LOCK, __FM10K_LINK_DOWN, __FM10K_UPDATING_STATS, /* This value must be last and determines the BITMAP size */ @@ -346,6 +345,8 @@ struct fm10k_intfc { struct fm10k_hw_stats stats; struct fm10k_hw hw; + /* Mailbox lock */ + spinlock_t mbx_lock; u32 __iomem *uc_addr; u32 __iomem *sw_addr; u16 msg_enable; @@ -386,23 +387,17 @@ struct fm10k_intfc { static inline void fm10k_mbx_lock(struct fm10k_intfc *interface) { - /* busy loop if we cannot obtain the lock as some calls - * such as ndo_set_rx_mode may be made in atomic context - */ - while (test_and_set_bit(__FM10K_MBX_LOCK, interface->state)) - udelay(20); + spin_lock(&interface->mbx_lock); } static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface) { - /* flush memory to make sure state is correct */ - smp_mb__before_atomic(); - clear_bit(__FM10K_MBX_LOCK, interface->state); + spin_unlock(&interface->mbx_lock); } static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface) { - return !test_and_set_bit(__FM10K_MBX_LOCK, interface->state); + return spin_trylock(&interface->mbx_lock); } /* fm10k_test_staterr - test bits in Rx descriptor status and error fields */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 4e5e3e64beda..240772ad5d69 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1921,6 +1921,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); + /* Initialize the mailbox lock */ + spin_lock_init(&interface->mbx_lock); + /* Start off interface as being down */ set_bit(__FM10K_DOWN, interface->state); set_bit(__FM10K_UPDATING_STATS, interface->state); From 8249c47c6ba48cd3eba7c3ca7f8e733ee815c39b Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:16 -0700 Subject: [PATCH 0491/2177] fm10k: use generic PM hooks instead of legacy PCIe power hooks Replace the PCI specific legacy power management hooks with the new generic power management hooks which work properly for both suspend and hibernate. The new generic system is better and properly handles the lower level PCIe power management rather than forcing the driver to handle it. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 67 +++++++------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 240772ad5d69..aef39909e4a2 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2264,36 +2264,19 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) #ifdef CONFIG_PM /** - * fm10k_resume - Restore device to pre-sleep state - * @pdev: PCI device information struct + * fm10k_resume - Generic PM resume hook + * @dev: generic device structure * - * fm10k_resume is called after the system has powered back up from a sleep - * state and is ready to resume operation. This function is meant to restore - * the device back to its pre-sleep state. + * Generic PM hook used when waking the device from a low power state after + * suspend or hibernation. This function does not need to handle lower PCIe + * device state as the stack takes care of that for us. **/ -static int fm10k_resume(struct pci_dev *pdev) +static int fm10k_resume(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - /* pci_restore_state clears dev->state_saved so call - * pci_save_state to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - pci_wake_from_d3(pdev, false); + int err; /* refresh hw_addr in case it was dropped */ hw->hw_addr = interface->uc_addr; @@ -2308,36 +2291,27 @@ static int fm10k_resume(struct pci_dev *pdev) } /** - * fm10k_suspend - Prepare the device for a system sleep state - * @pdev: PCI device information struct + * fm10k_suspend - Generic PM suspend hook + * @dev: generic device structure * - * fm10k_suspend is meant to shutdown the device prior to the system entering - * a sleep state. The fm10k hardware does not support wake on lan so the - * driver simply needs to shut down the device so it is in a low power state. + * Generic PM hook used when setting the device into a low power state for + * system suspend or hibernation. This function does not need to handle lower + * PCIe device state as the stack takes care of that for us. **/ -static int fm10k_suspend(struct pci_dev *pdev, - pm_message_t __always_unused state) +static int fm10k_suspend(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; - int err = 0; netif_device_detach(netdev); fm10k_prepare_suspend(interface); - err = pci_save_state(pdev); - if (err) - return err; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - return 0; } #endif /* CONFIG_PM */ + /** * fm10k_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device @@ -2447,15 +2421,18 @@ static const struct pci_error_handlers fm10k_err_handler = { .reset_done = fm10k_io_reset_done, }; +static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); + static struct pci_driver fm10k_driver = { .name = fm10k_driver_name, .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, #ifdef CONFIG_PM - .suspend = fm10k_suspend, - .resume = fm10k_resume, -#endif + .driver = { + .pm = &fm10k_pm_ops, + }, +#endif /* CONFIG_PM */ .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; From fc9173682dcf73cfe3324267424ef17e854bb444 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:17 -0700 Subject: [PATCH 0492/2177] fm10k: introduce a message queue for MAC/VLAN messages Under some circumstances, when dealing with a large number of MAC address or VLAN updates at once, the fm10k driver, particularly the VFs can overload the mailbox with too many messages at once. This results in a mailbox timeout, which causes the driver to initiate a reset. During the reset, we re-send all the same messages that originally caused the timeout. This results in a cycle of resets each triggering a future reset. To fix or avoid this, we introduce a workqueue item which monitors a queue of MAC and VLAN requests. These requests are queued to the end of the list, and we process as a FIFO periodically. Initially we only handle requests for the netdev, but we do handle unicast MAC addresses, multicast MAC addresses, and update VLAN requests. A future patch will add support to use this queue for handling MAC update requests from the VF<->PF mailbox. The MAC/VLAN work item will keep checking to make sure that each request does not overflow the mailbox and cause a timeout. If it might, then the work item will reschedule itself a short time later. This avoids any reset cycle, since we never send the message if the mailbox is not ready. As an alternative, we tried increasing the mailbox message FIFO, but this just delays the problem and results in needless memory waste on the system. Our new message queue is dynamically allocated so only uses as much memory as it needs. Additionally, it need not be contiguous like the Tx and Rx FIFOs. Note that this patch chose to only create a queue for MAC and VLAN messages, since these are the only messages sent in a large enough volume to cause the reset loop. Other messages are very unlikely to overflow the mailbox Tx FIFO so easily. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 39 ++++ .../net/ethernet/intel/fm10k/fm10k_netdev.c | 199 +++++++++++++---- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 201 ++++++++++++++++++ 3 files changed, 397 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 74542e9d63c7..40856bc0f3b9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -248,6 +248,29 @@ struct fm10k_udp_port { __be16 port; }; +enum fm10k_macvlan_request_type { + FM10K_UC_MAC_REQUEST, + FM10K_MC_MAC_REQUEST, + FM10K_VLAN_REQUEST +}; + +struct fm10k_macvlan_request { + enum fm10k_macvlan_request_type type; + struct list_head list; + union { + struct fm10k_mac_request { + u8 addr[ETH_ALEN]; + u16 glort; + u16 vid; + } mac; + struct fm10k_vlan_request { + u32 vid; + u8 vsi; + } vlan; + }; + bool set; +}; + /* one work queue for entire driver */ extern struct workqueue_struct *fm10k_workqueue; @@ -276,6 +299,9 @@ enum fm10k_state_t { __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, __FM10K_SERVICE_DISABLE, + __FM10K_MACVLAN_SCHED, + __FM10K_MACVLAN_REQUEST, + __FM10K_MACVLAN_DISABLE, __FM10K_LINK_DOWN, __FM10K_UPDATING_STATS, /* This value must be last and determines the BITMAP size */ @@ -368,6 +394,12 @@ struct fm10k_intfc { struct list_head vxlan_port; struct list_head geneve_port; + /* MAC/VLAN update queue */ + struct list_head macvlan_requests; + struct delayed_work macvlan_task; + /* MAC/VLAN update queue lock */ + spinlock_t macvlan_lock; + #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; #endif /* CONFIG_DEBUG_FS */ @@ -487,6 +519,7 @@ void fm10k_up(struct fm10k_intfc *interface); void fm10k_down(struct fm10k_intfc *interface); void fm10k_update_stats(struct fm10k_intfc *interface); void fm10k_service_event_schedule(struct fm10k_intfc *interface); +void fm10k_macvlan_schedule(struct fm10k_intfc *interface); void fm10k_update_rx_drop_en(struct fm10k_intfc *interface); #ifdef CONFIG_NET_POLL_CONTROLLER void fm10k_netpoll(struct net_device *netdev); @@ -507,6 +540,12 @@ void fm10k_reset_rx_state(struct fm10k_intfc *); int fm10k_setup_tc(struct net_device *dev, u8 tc); int fm10k_open(struct net_device *netdev); int fm10k_close(struct net_device *netdev); +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid, + u8 vsi, bool set); +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set); +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans); /* Ethtool */ void fm10k_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 77d495fedced..81e4425f0529 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -758,11 +758,132 @@ static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface) return (hw->mac.type == fm10k_mac_vf || interface->host_ready); } +/** + * fm10k_queue_vlan_request - Queue a VLAN update request + * @interface: the fm10k interface structure + * @vid: the VLAN vid + * @vsi: VSI index number + * @set: whether to set or clear + * + * This function queues up a VLAN update. For VFs, this must be sent to the + * managing PF over the mailbox. For PFs, we'll use the same handling so that + * it's similar to the VF. This avoids storming the PF<->VF mailbox with too + * many VLAN updates during reset. + */ +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, + u32 vid, u8 vsi, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + request->type = FM10K_VLAN_REQUEST; + request->vlan.vid = vid; + request->vlan.vsi = vsi; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_queue_mac_request - Queue a MAC update request + * @interface: the fm10k interface structure + * @glort: the target glort for this update + * @addr: the address to update + * @vid: the vid to update + * @sync: whether to add or remove + * + * This function queues up a MAC request for sending to the switch manager. + * A separate thread monitors the queue and sends updates to the switch + * manager. Return 0 on success, and negative error code on failure. + **/ +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + if (is_multicast_ether_addr(addr)) + request->type = FM10K_MC_MAC_REQUEST; + else + request->type = FM10K_UC_MAC_REQUEST; + + ether_addr_copy(request->mac.addr, addr); + request->mac.glort = glort; + request->mac.vid = vid; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort + * @interface: the fm10k interface structure + * @glort: the target glort to clear + * @vlans: true to clear VLAN messages, false to ignore them + * + * Cancel any outstanding MAC/VLAN requests for a given glort. This is + * expected to be called when a logical port goes down. + **/ +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans) + +{ + struct fm10k_macvlan_request *r, *tmp; + unsigned long flags; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + + /* Free any outstanding MAC/VLAN requests for this interface */ + list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) { + switch (r->type) { + case FM10K_MC_MAC_REQUEST: + case FM10K_UC_MAC_REQUEST: + /* Don't free requests for other interfaces */ + if (r->mac.glort != glort) + break; + /* fall through */ + case FM10K_VLAN_REQUEST: + if (vlans) { + list_del(&r->list); + kfree(r); + } + break; + } + } + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); +} + static int fm10k_uc_vlan_unsync(struct net_device *netdev, const unsigned char *uc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -771,10 +892,7 @@ static int fm10k_uc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, glort, uc_addr, - vid, set, 0); - + err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set); if (err) return err; @@ -786,7 +904,6 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, const unsigned char *mc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -795,9 +912,7 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_mc_addr(hw, glort, mc_addr, vid, set); - + err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set); if (err) return err; @@ -855,18 +970,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) /* only need to update the VLAN if not in promiscuous mode */ if (!(netdev->flags & IFF_PROMISC)) { - err = hw->mac.ops.update_vlan(hw, vid, 0, set); + err = fm10k_queue_vlan_request(interface, vid, 0, set); if (err) goto err_out; } - /* update our base MAC address if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, interface->glort, - hw->mac.addr, vid, set, 0); - else - err = -EHOSTDOWN; - + /* Update our base MAC address */ + err = fm10k_queue_mac_request(interface, interface->glort, + hw->mac.addr, vid, set); if (err) goto err_out; @@ -910,7 +1021,6 @@ static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid) static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) { - struct fm10k_hw *hw = &interface->hw; u32 vid, prev_vid; /* loop through and find any gaps in the table */ @@ -922,7 +1032,7 @@ static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) /* send request to clear multiple bits at a time */ prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT; - hw->mac.ops.update_vlan(hw, prev_vid, 0, false); + fm10k_queue_vlan_request(interface, prev_vid, 0, false); } } @@ -937,15 +1047,11 @@ static int __fm10k_uc_sync(struct net_device *dev, if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return -EHOSTDOWN; - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - err = hw->mac.ops.update_uc_addr(hw, glort, addr, - vid, sync, 0); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); if (err) return err; } @@ -1002,15 +1108,18 @@ static int __fm10k_mc_sync(struct net_device *dev, struct fm10k_intfc *interface = netdev_priv(dev); struct fm10k_hw *hw = &interface->hw; u16 vid, glort = interface->glort; + s32 err; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return 0; + if (!is_multicast_ether_addr(addr)) + return -EADDRNOTAVAIL; for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); + if (err) + return err; } return 0; @@ -1050,7 +1159,8 @@ static void fm10k_set_rx_mode(struct net_device *dev) if (interface->xcast_mode != xcast_mode) { /* update VLAN table */ if (xcast_mode == FM10K_XCAST_MODE_PROMISC) - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, true); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, + 0, true); if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC) fm10k_clear_unused_vlans(interface); @@ -1098,22 +1208,20 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) interface->glort_count, true); /* update VLAN table */ - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, - xcast_mode == FM10K_XCAST_MODE_PROMISC); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0, + xcast_mode == FM10K_XCAST_MODE_PROMISC); /* Add filter for VLAN 0 */ - hw->mac.ops.update_vlan(hw, 0, 0, true); + fm10k_queue_vlan_request(interface, 0, 0, true); /* update table with current entries */ for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_vlan(hw, vid, 0, true); + fm10k_queue_vlan_request(interface, vid, 0, true); - /* Update unicast entries if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - hw->mac.ops.update_uc_addr(hw, glort, hw->mac.addr, - vid, true, 0); + fm10k_queue_mac_request(interface, glort, + hw->mac.addr, vid, true); } /* update xcast mode before synchronizing addresses if host's mailbox @@ -1140,6 +1248,13 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface) struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; + /* Wait for MAC/VLAN work to finish */ + while (test_bit(__FM10K_MACVLAN_SCHED, interface->state)) + usleep_range(1000, 2000); + + /* Cancel pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); + fm10k_mbx_lock(interface); /* clear the logical port state on lower device if host's mailbox is @@ -1374,8 +1489,8 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_MULTI); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, true, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, true); } fm10k_mbx_unlock(interface); @@ -1414,8 +1529,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_NONE); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, false, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, false); } fm10k_mbx_unlock(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index aef39909e4a2..58538ce997e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -91,6 +91,76 @@ static int fm10k_hw_ready(struct fm10k_intfc *interface) return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0; } +/** + * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task + * @interface: fm10k private interface structure + * + * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be + * started immediately, request that it be restarted when possible. + */ +void fm10k_macvlan_schedule(struct fm10k_intfc *interface) +{ + /* Avoid processing the MAC/VLAN queue when the service task is + * disabled, or when we're resetting the device. + */ + if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) && + !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) { + clear_bit(__FM10K_MACVLAN_REQUEST, interface->state); + /* We delay the actual start of execution in order to allow + * multiple MAC/VLAN updates to accumulate before handling + * them, and to allow some time to let the mailbox drain + * between runs. + */ + queue_delayed_work(fm10k_workqueue, + &interface->macvlan_task, 10); + } else { + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + } +} + +/** + * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Wait until the MAC/VLAN queue task has stopped, and cancel any future + * requests. + */ +static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface) +{ + /* Disable the MAC/VLAN work item */ + set_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* Make sure we waited until any current invocations have stopped */ + cancel_delayed_work_sync(&interface->macvlan_task); + + /* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task. + * However, it may not be unset of the MAC/VLAN task never actually + * got a chance to run. Since we've canceled the task here, and it + * cannot be rescheuled right now, we need to ensure the scheduled bit + * gets unset. + */ + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); +} + +/** + * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule + * the MAC/VLAN work monitor. + */ +static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface) +{ + /* Re-enable the MAC/VLAN work item */ + clear_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* We might have received a MAC/VLAN request while disabled. If so, + * kick off the queue now. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + void fm10k_service_event_schedule(struct fm10k_intfc *interface) { if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) && @@ -174,6 +244,12 @@ static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface) if (test_and_set_bit(__FM10K_RESETTING, interface->state)) return false; + /* As the MAC/VLAN task will be accessing registers it must not be + * running while we reset. Although the task will not be scheduled + * once we start resetting it may already be running + */ + fm10k_stop_macvlan_task(interface); + rtnl_lock(); fm10k_iov_suspend(interface->pdev); @@ -258,6 +334,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) rtnl_unlock(); + fm10k_resume_macvlan_task(interface); + clear_bit(__FM10K_RESETTING, interface->state); return err; @@ -686,6 +764,112 @@ static void fm10k_service_task(struct work_struct *work) fm10k_service_event_complete(interface); } +/** + * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager + * @work: pointer to work_struct containing our data + * + * This work item handles sending MAC/VLAN updates to the switch manager. When + * the interface is up, it will attempt to queue mailbox messages to the + * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the + * mailbox is full, it will reschedule itself to try again in a short while. + * This ensures that the driver does not overload the switch mailbox with too + * many simultaneous requests, causing an unnecessary reset. + **/ +static void fm10k_macvlan_task(struct work_struct *work) +{ + struct fm10k_macvlan_request *item; + struct fm10k_intfc *interface; + struct delayed_work *dwork; + struct list_head *requests; + struct fm10k_hw *hw; + unsigned long flags; + + dwork = to_delayed_work(work); + interface = container_of(dwork, struct fm10k_intfc, macvlan_task); + hw = &interface->hw; + requests = &interface->macvlan_requests; + + do { + /* Pop the first item off the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + item = list_first_entry_or_null(requests, + struct fm10k_macvlan_request, + list); + if (item) + list_del_init(&item->list); + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + /* We have no more items to process */ + if (!item) + goto done; + + fm10k_mbx_lock(interface); + + /* Check that we have plenty of space to send the message. We + * want to ensure that the mailbox stays low enough to avoid a + * change in the host state, otherwise we may see spurious + * link up / link down notifications. + */ + if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) { + hw->mbx.ops.process(hw, &hw->mbx); + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + fm10k_mbx_unlock(interface); + + /* Put the request back on the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add(&item->list, requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + break; + } + + switch (item->type) { + case FM10K_MC_MAC_REQUEST: + hw->mac.ops.update_mc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set); + break; + case FM10K_UC_MAC_REQUEST: + hw->mac.ops.update_uc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set, + 0); + break; + case FM10K_VLAN_REQUEST: + hw->mac.ops.update_vlan(hw, + item->vlan.vid, + item->vlan.vsi, + item->set); + break; + default: + break; + } + + fm10k_mbx_unlock(interface); + + /* Free the item now that we've sent the update */ + kfree(item); + } while (true); + +done: + WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state)); + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); + + /* If a MAC/VLAN request was scheduled since we started, we should + * re-schedule. However, there is no reason to re-schedule if there is + * no work to do. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + /** * fm10k_configure_tx_ring - Configure Tx ring after Reset * @interface: board private structure @@ -1918,11 +2102,15 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, INIT_LIST_HEAD(&interface->vxlan_port); INIT_LIST_HEAD(&interface->geneve_port); + /* Initialize the MAC/VLAN queue */ + INIT_LIST_HEAD(&interface->macvlan_requests); + netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); /* Initialize the mailbox lock */ spin_lock_init(&interface->mbx_lock); + spin_lock_init(&interface->macvlan_lock); /* Start off interface as being down */ set_bit(__FM10K_DOWN, interface->state); @@ -2131,6 +2319,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (unsigned long)interface); INIT_WORK(&interface->service_task, fm10k_service_task); + /* Setup the MAC/VLAN queue */ + INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task); + /* kick off service timer now, even when interface is down */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); @@ -2184,6 +2375,10 @@ static void fm10k_remove(struct pci_dev *pdev) del_timer_sync(&interface->service_timer); fm10k_stop_service_event(interface); + fm10k_stop_macvlan_task(interface); + + /* Remove all pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); /* free netdev, this may bounce the interrupts due to setup_tc */ if (netdev->reg_state == NETREG_REGISTERED) @@ -2220,6 +2415,9 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) * a surprise remove if the PCIe device is disabled while we're * stopped. We stop the watchdog task until after we resume software * activity. + * + * Note that the MAC/VLAN task will be stopped as part of preparing + * for reset so we don't need to handle it here. */ fm10k_stop_service_event(interface); @@ -2259,6 +2457,9 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) /* restart the service task */ fm10k_start_service_event(interface); + /* Restart the MAC/VLAN request queue in-case of outstanding events */ + fm10k_macvlan_schedule(interface); + return err; } From 1f5c27e52857c9ba8f1ee4ed5093bee1a341f330 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:18 -0700 Subject: [PATCH 0493/2177] fm10k: use the MAC/VLAN queue for VF<->PF MAC/VLAN requests Now that we have a working MAC/VLAN queue for handling MAC/VLAN messages from the netdev, replace the default handler for the VF<->PF messages. This new handler is very similar to the default code, but uses the MAC/VLAN queue instead of sending the message directly. Unfortunately we can't easily re-use the default code, so we'll just replace the entire function. This ensures that a VF requesting a large number of VLANs or MAC addresses does not start a reset cycle, as explained in the commit which introduced the message queue. Signed-off-by: Jacob Keller Reviewed-by: Ngai-mint Kwan Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 132 ++++++++++++++++++- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_pf.h | 3 +- 3 files changed, 133 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 03897720bf0b..4a17cc903eed 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -35,10 +35,133 @@ static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results, return fm10k_tlv_msg_error(hw, results, mbx); } +/** + * fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF + * @hw: Pointer to hardware structure + * @results: Pointer array to message, results[0] is pointer to message + * @mbx: Pointer to mailbox information structure + * + * This function is a custom handler for MAC/VLAN requests from the VF. The + * assumption is that it is acceptable to directly hand off the message from + * the VF to the PF's switch manager. However, we use a MAC/VLAN message + * queue to avoid overloading the mailbox when a large number of requests + * come in. + **/ +static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results, + struct fm10k_mbx_info *mbx) +{ + struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx; + struct fm10k_intfc *interface = hw->back; + u8 mac[ETH_ALEN]; + u32 *result; + int err = 0; + bool set; + u16 vlan; + u32 vid; + + /* we shouldn't be updating rules on a disabled interface */ + if (!FM10K_VF_FLAG_ENABLED(vf_info)) + err = FM10K_ERR_PARAM; + + if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) { + result = results[FM10K_MAC_VLAN_MSG_VLAN]; + + /* record VLAN id requested */ + err = fm10k_tlv_attr_get_u32(result, &vid); + if (err) + return err; + + set = !(vid & FM10K_VLAN_CLEAR); + vid &= ~FM10K_VLAN_CLEAR; + + /* if the length field has been set, this is a multi-bit + * update request. For multi-bit requests, simply disallow + * them when the pf_vid has been set. In this case, the PF + * should have already cleared the VLAN_TABLE, and if we + * allowed them, it could allow a rogue VF to receive traffic + * on a VLAN it was not assigned. In the single-bit case, we + * need to modify requests for VLAN 0 to use the default PF or + * SW vid when assigned. + */ + + if (vid >> 16) { + /* prevent multi-bit requests when PF has + * administratively set the VLAN for this VF + */ + if (vf_info->pf_vid) + return FM10K_ERR_PARAM; + } else { + err = fm10k_iov_select_vid(vf_info, (u16)vid); + if (err < 0) + return err; + + vid = err; + } + + /* update VSI info for VF in regards to VLAN table */ + err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) { + result = results[FM10K_MAC_VLAN_MSG_MAC]; + + /* record unicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* block attempts to set MAC for a locked device */ + if (is_valid_ether_addr(vf_info->mac) && + !ether_addr_equal(mac, vf_info->mac)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) { + result = results[FM10K_MAC_VLAN_MSG_MULTICAST]; + + /* record multicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* verify that the VF is allowed to request multicast */ + if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + return err; +} + static const struct fm10k_msg_data iov_mbx_data[] = { FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf), - FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf), + FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan), FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf), FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error), }; @@ -126,8 +249,10 @@ process_mbx: hw->mbx.ops.process(hw, &hw->mbx); /* verify port mapping is valid, if not reset port */ - if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) + if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) { hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, glort, false); + } /* reset VFs that have mailbox timed out */ if (!mbx->timeout) { @@ -190,6 +315,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev) hw->iov.ops.reset_resources(hw, vf_info); hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); } } @@ -414,6 +540,8 @@ static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface, /* disable LPORT for this VF which clears switch rules */ hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); + /* assign new MAC+VLAN for this VF */ hw->iov.ops.assign_default_mac_vlan(hw, vf_info); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 9e4fb3a44376..425d814aed4d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1186,7 +1186,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, * Will report an error if the VLAN ID is out of range. For VID = 0, it will * return either the pf_vid or sw_vid depending on which one is set. */ -static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) { if (!vid) return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index 3336d3c10760..e04d41f1a532 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -114,6 +114,7 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[]; #define FM10K_PF_MSG_ERR_HANDLER(msg, func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid); s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); From ef57ab791c81b3a83c75a312b15b42f7440bb425 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:19 -0700 Subject: [PATCH 0494/2177] fm10k: bump version number Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 189d52a8a605..5d56ed5ad7a6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,7 +28,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.21.7-k" +#define DRV_VERSION "0.22.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; From c0ad8ef3df091ef179d78dccb810024612dcfa44 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 11 Aug 2017 09:17:15 -0700 Subject: [PATCH 0495/2177] fm10k: Fix misuse of net_ratelimit() Correct the backward logic using !net_ratelimit() Miscellanea: o Add a blank line before the error return label Signed-off-by: Joe Perches Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 5d56ed5ad7a6..dbd69310f263 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -806,9 +806,10 @@ static int fm10k_tso(struct fm10k_ring *tx_ring, tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); return 1; + err_vxlan: tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; - if (!net_ratelimit()) + if (net_ratelimit()) netdev_err(tx_ring->netdev, "TSO requested for unsupported tunnel, disabling offload\n"); return -1; From 87be98927eb0bfa5484dfbe5ba2f6b7f91dd9187 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 11 Aug 2017 11:14:37 -0700 Subject: [PATCH 0496/2177] fm10k: prefer %s and __func__ for diagnostic prints Don't hard code the function names in the diagnostic output when these reset related routines fail. Instead, use %s and __func__ so that future refactors don't need to change the print outs. Additionally, while we are here, add missing function header comments for the new reset_prepare and reset_done function handlers. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 58538ce997e1..1e9ae3197b17 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2588,11 +2588,18 @@ static void fm10k_io_resume(struct pci_dev *pdev) if (err) dev_warn(&pdev->dev, - "fm10k_io_resume failed: %d\n", err); + "%s failed: %d\n", __func__, err); else netif_device_attach(netdev); } +/** + * fm10k_io_reset_prepare - called when PCI function is about to be reset + * @pdev: Pointer to PCI device + * + * This callback is called when the PCI function is about to be reset, + * allowing the device driver to prepare for it. + */ static void fm10k_io_reset_prepare(struct pci_dev *pdev) { /* warn incase we have any active VF devices */ @@ -2602,6 +2609,13 @@ static void fm10k_io_reset_prepare(struct pci_dev *pdev) fm10k_prepare_suspend(pci_get_drvdata(pdev)); } +/** + * fm10k_io_reset_done - called when PCI function has finished resetting + * @pdev: Pointer to PCI device + * + * This callback is called just after the PCI function is reset, such as via + * /sys/class/net//device/reset or similar. + */ static void fm10k_io_reset_done(struct pci_dev *pdev) { struct fm10k_intfc *interface = pci_get_drvdata(pdev); @@ -2609,7 +2623,7 @@ static void fm10k_io_reset_done(struct pci_dev *pdev) if (err) { dev_warn(&pdev->dev, - "fm10k_io_reset_notify failed: %d\n", err); + "%s failed: %d\n", __func__, err); netif_device_detach(interface->netdev); } } From 3e256ac5b1ec307e5dd5a4c99fbdbc651446c738 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 11 Aug 2017 11:14:58 -0700 Subject: [PATCH 0497/2177] fm10k: fix mis-ordered parameters in declaration for .ndo_set_vf_bw We've had support for setting both a minimum and maximum bandwidth via .ndo_set_vf_bw since commit 883a9ccbae56 ("fm10k: Add support for SR-IOV to driver", 2014-09-20). Likely because we do not support minimum rates, the declaration mis-ordered the "unused" parameter, which causes warnings when analyzed with cppcheck. Fix this warning by properly declaring the min_rate and max_rate variables in the declaration and definition (rather than using "unused"). Also rename "rate" to max_rate so as to clarify that we only support setting the maximum rate. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 40856bc0f3b9..46973fb234c5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -562,8 +562,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, - int unused); +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, + int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 4a17cc903eed..ea3ab24265ee 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -613,7 +613,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, } int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, - int __always_unused unused, int rate) + int __always_unused min_rate, int max_rate) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -624,14 +624,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, return -EINVAL; /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) + if (max_rate && + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) return -EINVAL; /* store values */ - iov_data->vf_info[vf_idx].rate = rate; + iov_data->vf_info[vf_idx].rate = max_rate; /* update hardware configuration */ - hw->iov.ops.configure_tc(hw, vf_idx, rate); + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); return 0; } From a047fbae23e1d94da28f81fb0f86fab4e473a094 Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Tue, 3 Oct 2017 11:43:05 +0530 Subject: [PATCH 0498/2177] cxgb4: Update comment for min_mtu We have lost a comment for minimum mtu value set for netdevice with 'commit d894be57ca92 ("ethernet: use net core MTU range checking in more drivers"). Updating it accordingly. Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 13b636b0af5f..fe4cbe22d5d7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5024,7 +5024,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 81 - 9600 */ - netdev->min_mtu = 81; + netdev->min_mtu = 81; /* accommodate SACK */ netdev->max_mtu = MAX_MTU; netdev->netdev_ops = &cxgb4_netdev_ops; From abf4bb6b63d0a54266f8e7eff3720c1974063971 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:06 +0200 Subject: [PATCH 0499/2177] skbuff: Add the offload_mr_fwd_mark field Similarly to the offload_fwd_mark field, the offload_mr_fwd_mark field is used to allow partial offloading of MFC multicast routes. Switchdev drivers can offload MFC multicast routes to the hardware by registering to the FIB notification chain. When one of the route output interfaces is not offload-able, i.e. has different parent ID, the route cannot be fully offloaded by the hardware. Examples to non-offload-able devices are a management NIC, dummy device, pimreg device, etc. Similar problem exists in the bridge module, as one bridge can hold interfaces with different parent IDs. At the bridge, the problem is solved by the offload_fwd_mark skb field. Currently, when a route cannot go through full offload, the only solution for a switchdev driver is not to offload it at all and let the packet go through slow path. Using the offload_mr_fwd_mark field, a driver can indicate that a packet was already forwarded by hardware to all the devices with the same parent ID as the input device. Further patches in this patch-set are going to enhance ipmr to skip multicast forwarding to devices with the same parent ID if a packets is marked with that field. The reason why the already existing "offload_fwd_mark" bit cannot be used is that a switchdev driver would want to make the distinction between a packet that has already gone through L2 forwarding but did not go through multicast forwarding, and a packet that has already gone through both L2 and multicast forwarding. For example: when a packet is ingressing from a switchport enslaved to a bridge, which is configured with multicast forwarding, the following scenarios are possible: - The packet can be trapped to the CPU due to exception while multicast forwarding (for example, MTU error). In that case, it had already gone through L2 forwarding in the hardware, thus A switchdev driver would want to set the skb->offload_fwd_mark and not the skb->offload_mr_fwd_mark. - The packet can also be trapped due to a pimreg/dummy device used as one of the output interfaces. In that case, it can go through both L2 and (partial) multicast forwarding inside the hardware, thus a switchdev driver would want to set both the skb->offload_fwd_mark and skb->offload_mr_fwd_mark. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 19e64bfb1a66..ada821466e88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -772,6 +772,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; + __u8 offload_mr_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; From 5d8b3e69fc5e5ccafc9db1251bb7c78a8622fddd Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:07 +0200 Subject: [PATCH 0500/2177] ipv4: ipmr: Add the parent ID field to VIF struct In order to allow the ipmr module to do partial multicast forwarding according to the device parent ID, add the device parent ID field to the VIF struct. This way, the forwarding path can use the parent ID field without invoking switchdev calls, which requires the RTNL lock. When a new VIF is added, set the device parent ID field in it by invoking the switchdev_port_attr_get call. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + net/ipv4/ipmr.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index b072a84fbe1c..8242d05df35e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -57,6 +57,7 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) struct vif_device { struct net_device *dev; /* Device we are using */ + struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ unsigned long rate_limit; /* Traffic shaping (NI) */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a844738b38bd..1b161ada7ae6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,6 +67,7 @@ #include #include #include +#include struct ipmr_rule { struct fib_rule common; @@ -868,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -942,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* Fill in the VIF structures */ + attr.orig_dev = dev; + if (!switchdev_port_attr_get(dev, &attr)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else { + v->dev_parent_id.id_len = 0; + } v->rate_limit = vifc->vifc_rate_limit; v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; From a5bc9294d70fe85729bb343eef281ccbe78ff119 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:08 +0200 Subject: [PATCH 0501/2177] ipv4: ipmr: Don't forward packets already forwarded by hardware Change the ipmr module to not forward packets if: - The packet is marked with the offload_mr_fwd_mark, and - Both input interface and output interface share the same parent ID. This way, a packet can go through partial multicast forwarding in the hardware, where it will be forwarded only to the devices that share the same parent ID (AKA, reside inside the same hardware). The kernel will forward the packet to all other interfaces. To do this, add the ipmr_offload_forward helper, which per skb, ingress VIF and egress VIF, returns whether the forwarding was offloaded to hardware. The ipmr_queue_xmit frees the skb and does not forward it if the result is a true value. All the forwarding path code compiles out when the CONFIG_NET_SWITCHDEV is not set. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1b161ada7ae6..b3ee01b0551b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1859,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } +#ifdef CONFIG_NET_SWITCHDEV +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + struct vif_device *out_vif = &mrt->vif_table[out_vifi]; + struct vif_device *in_vif = &mrt->vif_table[in_vifi]; + + if (!skb->offload_mr_fwd_mark) + return false; + if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) + return false; + return netdev_phys_item_id_same(&out_vif->dev_parent_id, + &in_vif->dev_parent_id); +} +#else +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + return false; +} +#endif + /* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *c, int vifi) + int in_vifi, struct sk_buff *skb, + struct mfc_cache *c, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; @@ -1883,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } + if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) + goto out_free; + if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, @@ -2060,8 +2086,8 @@ forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, - psend); + ipmr_queue_xmit(net, mrt, true_vifi, + skb2, cache, psend); } psend = ct; } @@ -2072,9 +2098,10 @@ last_forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb2, + cache, psend); } else { - ipmr_queue_xmit(net, mrt, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); return; } } From 267872435515185e2e600a721fdddeea90f96ffa Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:09 +0200 Subject: [PATCH 0502/2177] mlxsw: acl: Introduce ACL trap and forward action Use trap/discard flex action to implement trap and forward. The action will later be used for multicast routing, as the multicast routing mechanism is done using ACL flexible actions in Spectrum hardware. Using that action, it will be possible to implement a trap-and-forward route. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 17 +++++++++++++++++ .../mellanox/mlxsw/core_acl_flex_actions.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index bc55d0e76705..6a979a09ab72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -676,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action { MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -729,6 +730,22 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 06b0be432b8f..a8d3314c3a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -61,6 +61,8 @@ int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, From a0040c8c935548e1efb1a28f07f15d7ec7918055 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:10 +0200 Subject: [PATCH 0503/2177] mlxsw: spectrum: Add trap for multicast trap-and-forward routes When a multicast route is configured with trap-and-forward action, the packets should be marked with skb->offload_mr_fwd_mark, in order to prevent the packets from being forwarded again by the kernel ipmr module. Due to this, it is not possible to use the already existing multicast trap (MLXSW_TRAP_ID_ACL1) as the packet should be marked differently. Add the MLXSW_TRAP_ID_ACL2 which is for trap-and-forward multicast routes, and set the offload_mr_fwd_mark skb field in its handler. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e9b94430afed..3adf237c951a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3312,6 +3312,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) +{ + skb->offload_mr_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -3355,6 +3363,10 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) +#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + #define MLXSW_SP_EVENTL(_func, _trap_id) \ MLXSW_EVENTL(_func, _trap_id, SP_EVENT) @@ -3425,6 +3437,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index a98103539f6b..ec6cef8267ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -93,6 +93,8 @@ enum { MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, MLXSW_TRAP_ID_MAX = 0x1FF }; From 607feadef89ac806df5a0be983afef77247e1541 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:11 +0200 Subject: [PATCH 0504/2177] mlxsw: spectrum: mr_tcam: Add trap-and-forward multicast route In addition to the current multicast route actions, which include trap route action and a forward route action, add the trap-and-forward multicast route action, and implement it in the multicast routing hardware logic. To implement that, add a trap-and-forward ACL action as the last action in the route flexible action set. The used trap is the ACL2 trap, which marks the packets with offload_mr_forward_mark, to prevent the packet from being forwarded again by the kernel. Note: At that stage the offloading logic does not support trap-and-forward multicast routes. This patch adds the support only in the hardware logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h index c851b237d253..5d26a122af49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -42,6 +42,7 @@ enum mlxsw_sp_mr_route_action { MLXSW_SP_MR_ROUTE_ACTION_FORWARD, MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, }; enum mlxsw_sp_mr_route_prio { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index cda9e9ad10e3..3ffb28dd4057 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -253,6 +253,7 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err; break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: /* If we are about to append a multicast router action, commit * the erif_list. @@ -266,6 +267,13 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, erif_list->kvdl_index); if (err) goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } break; default: err = -EINVAL; From f60c254998de80feaec8e4122960ab64e8045214 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:12 +0200 Subject: [PATCH 0505/2177] mlxsw: spectrum: mr: Support trap-and-forward routes Add the support of trap-and-forward route action in the multicast routing offloading logic. A route will be set to trap-and-forward action if one (or more) of its output interfaces is not offload-able, i.e. does not have a valid Spectrum RIF. This way, a route with mixed output VIFs list, which contains both offload-able and un-offload-able devices can go through partial offloading in hardware, and the rest will be done in the kernel ipmr module. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 4aaf6ca1be7c..1f84bb8e9135 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -114,9 +114,9 @@ static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; } -static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif) +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) { - return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif; + return vif->dev; } static bool @@ -182,14 +182,13 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - /* If either one of the eVIFs is not regular (VIF of type pimreg or - * tunnel) or one of the VIFs has no matching RIF, trap the packet. + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. */ - list_for_each_entry(rve, &mr_route->evif_list, route_node) { - if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) || - mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif)) - return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - } + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; } From 161ae6b04d5da0de518ea600d8fffe61208e243c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 11:39:18 +0100 Subject: [PATCH 0506/2177] net: dsa: lan9303: make functions lan9303_mdio_phy_{read|write} static The functions lan9303_mdio_phy_write and lan9303_mdio_phy_read are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 'lan9303_mdio_phy_write' was not declared. Should it be static? symbol 'lan9303_mdio_phy_read' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303_mdio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index fc16668a487f..0bc56b9900f9 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -67,14 +67,15 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) return 0; } -int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val) +static int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, + u16 val) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val); } -int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) +static int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); From 360cc342c9037ed487adf07212000f69c0ffa14d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 11:46:33 +0100 Subject: [PATCH 0507/2177] net: dsa: mt7530: make functions mt7530_phy_write static The function mt7530_phy_write is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warnings: symbol 'mt7530_phy_write' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index faa3b88d2206..034241696ce2 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -564,7 +564,8 @@ static int mt7530_phy_read(struct dsa_switch *ds, int port, int regnum) return mdiobus_read_nested(priv->bus, port, regnum); } -int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) +static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, + u16 val) { struct mt7530_priv *priv = ds->priv; From b508e0b6e47c85a095ef056f3de6ba9d396c490c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Oct 2017 13:53:03 +0300 Subject: [PATCH 0508/2177] mlxsw: spectrum: Fix check for IS_ERR() instead of NULL mlxsw_afa_block_create() doesn't return error pointers, it returns NULL on error. Fixes: 0e14c7777acb ("mlxsw: spectrum: Add the multicast routing hardware logic") Signed-off-by: Dan Carpenter Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 3ffb28dd4057..3a61896ae4d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -239,8 +239,8 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, int err; afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); - if (IS_ERR(afa_block)) - return afa_block; + if (!afa_block) + return ERR_PTR(-ENOMEM); err = mlxsw_afa_block_append_counter(afa_block, counter_index); if (err) From b5c7d4e54c9ab830e5c03f92377fe15cbae64d0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Oct 2017 13:53:41 +0300 Subject: [PATCH 0509/2177] mlxsw: spectrum: Add missing error code on allocation failure We accidentally return success if the kmalloc_array() call fails. Fixes: 0e14c7777acb ("mlxsw: spectrum: Add the multicast routing hardware logic") Signed-off-by: Dan Carpenter Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 3a61896ae4d8..39c21c70ac32 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -771,8 +771,10 @@ mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, sizeof(*parman_prios), GFP_KERNEL); - if (!parman_prios) + if (!parman_prios) { + err = -ENOMEM; goto err_parman_prios_alloc; + } mr_tcam_region->parman_prios = parman_prios; for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) From 4d2c0cda07448ea6980f00102dc3964eb25e241c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 27 Sep 2017 18:03:49 -0700 Subject: [PATCH 0510/2177] bonding: speed/duplex update at NETDEV_UP event Some NIC drivers don't have correct speed/duplex settings at the time they send NETDEV_UP notification and that messes up the bonding state. Especially 802.3ad mode which is very sensitive to these settings. In the current implementation we invoke bond_update_speed_duplex() when we receive NETDEV_UP, however, ignore the return value. If the values we get are invalid (UNKNOWN), then slave gets removed from the aggregator with speed and duplex set to UNKNOWN while link is still marked as UP. This patch fixes this scenario. Also 802.3ad mode is sensitive to these conditions while other modes are not, so making sure that it doesn't change the behavior for other modes. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d2e94b8559f0..b19dc033fb36 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3073,7 +3073,16 @@ static int bond_slave_netdev_event(unsigned long event, break; case NETDEV_UP: case NETDEV_CHANGE: - bond_update_speed_duplex(slave); + /* For 802.3ad mode only: + * Getting invalid Speed/Duplex values here will put slave + * in weird state. So mark it as link-down for the time + * being and let link-monitoring (miimon) set it right when + * correct speeds/duplex are available. + */ + if (bond_update_speed_duplex(slave) && + BOND_MODE(bond) == BOND_MODE_8023AD) + slave->link = BOND_LINK_DOWN; + if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_adapter_speed_duplex_changed(slave); /* Fallthrough */ From 6c5570016b972d9b1f0f6c2dca9cc0422b1f92bf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 2 Oct 2017 23:50:05 +0200 Subject: [PATCH 0511/2177] net: core: decouple ifalias get/set from rtnl lock Device alias can be set by either rtnetlink (rtnl is held) or sysfs. rtnetlink hold the rtnl mutex, sysfs acquires it for this purpose. Add an extra mutex for it and use rcu to protect concurrent accesses. This allows the sysfs path to not take rtnl and would later allow to not hold it when dumping ifalias. Based on suggestion from Eric Dumazet. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++- net/core/dev.c | 52 +++++++++++++++++++++++++++++---------- net/core/net-sysfs.c | 17 ++++++------- net/core/rtnetlink.c | 13 ++++++++-- 4 files changed, 65 insertions(+), 25 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e1d6ef130611..d04424cfffba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -826,6 +826,11 @@ struct xfrmdev_ops { }; #endif +struct dev_ifalias { + struct rcu_head rcuhead; + char ifalias[]; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1632,7 +1637,7 @@ enum netdev_priv_flags { struct net_device { char name[IFNAMSIZ]; struct hlist_node name_hlist; - char *ifalias; + struct dev_ifalias __rcu *ifalias; /* * I/O specific fields * FIXME: Merge these and struct ifmap into one @@ -3275,6 +3280,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); +int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); diff --git a/net/core/dev.c b/net/core/dev.c index e350c768d4b5..1770097cfd86 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id); DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static DEFINE_MUTEX(ifalias_mutex); + /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); @@ -1265,29 +1267,53 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { - char *new_ifalias; - - ASSERT_RTNL(); + struct dev_ifalias *new_alias = NULL; if (len >= IFALIASZ) return -EINVAL; - if (!len) { - kfree(dev->ifalias); - dev->ifalias = NULL; - return 0; + if (len) { + new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL); + if (!new_alias) + return -ENOMEM; + + memcpy(new_alias->ifalias, alias, len); + new_alias->ifalias[len] = 0; } - new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!new_ifalias) - return -ENOMEM; - dev->ifalias = new_ifalias; - memcpy(dev->ifalias, alias, len); - dev->ifalias[len] = 0; + mutex_lock(&ifalias_mutex); + rcu_swap_protected(dev->ifalias, new_alias, + mutex_is_locked(&ifalias_mutex)); + mutex_unlock(&ifalias_mutex); + + if (new_alias) + kfree_rcu(new_alias, rcuhead); return len; } +/** + * dev_get_alias - get ifalias of a device + * @dev: device + * @alias: buffer to store name of ifalias + * @len: size of buffer + * + * get ifalias for a device. Caller must make sure dev cannot go + * away, e.g. rcu read lock or own a reference count to device. + */ +int dev_get_alias(const struct net_device *dev, char *name, size_t len) +{ + const struct dev_ifalias *alias; + int ret = 0; + + rcu_read_lock(); + alias = rcu_dereference(dev->ifalias); + if (alias) + ret = snprintf(name, len, "%s", alias->ifalias); + rcu_read_unlock(); + + return ret; +} /** * netdev_features_change - device changes features diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 927a6dcbad96..51d5836d8fb9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -391,10 +391,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); ret = dev_set_alias(netdev, buf, count); - rtnl_unlock(); return ret < 0 ? ret : len; } @@ -403,13 +400,12 @@ static ssize_t ifalias_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); + char tmp[IFALIASZ]; ssize_t ret = 0; - if (!rtnl_trylock()) - return restart_syscall(); - if (netdev->ifalias) - ret = sprintf(buf, "%s\n", netdev->ifalias); - rtnl_unlock(); + ret = dev_get_alias(netdev, tmp, sizeof(tmp)); + if (ret > 0) + ret = sprintf(buf, "%s\n", tmp); return ret; } static DEVICE_ATTR_RW(ifalias); @@ -1488,7 +1484,10 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); - kfree(dev->ifalias); + /* no need to wait for rcu grace period: + * device is dead and about to be freed. + */ + kfree(rcu_access_pointer(dev->ifalias)); netdev_freemem(dev); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e6955da0d58d..3961f87cdc76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1366,6 +1366,16 @@ static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) return nla_put_u32(skb, IFLA_LINK, ifindex); } +static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, + struct net_device *dev) +{ + char buf[IFALIASZ]; + int ret; + + ret = dev_get_alias(dev, buf, sizeof(buf)); + return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0; +} + static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev) { @@ -1425,8 +1435,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || - (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_changes)) || nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) From 1ae2eaaa229bc350b6f38fbf4ab9c873532aecfb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:08 -0300 Subject: [PATCH 0512/2177] sctp: silence warns on sctp_stream_init allocations As SCTP supports up to 65535 streams, that can lead to very large allocations in sctp_stream_init(). As Xin Long noticed, systems with small amounts of memory are more prone to not have enough memory and dump warnings on dmesg initiated by user actions. Thus, silence them. Also, if the reallocation of stream->out is not necessary, skip it and keep the memory we already have. Reported-by: Xin Long Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 63ea15503714..1afa95558083 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, { int i; + gfp |= __GFP_NOWARN; + /* Initial stream->out size may be very big, so free it and alloc - * a new one with new outcnt to save memory. + * a new one with new outcnt to save memory if needed. */ + if (outcnt == stream->outcnt) + goto in; + kfree(stream->out); stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; +in: if (!incnt) return 0; From e090abd0d81c7bdbf0c0ba2224624be2b15ded0d Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:09 -0300 Subject: [PATCH 0513/2177] sctp: factor out stream->out allocation There is 1 place allocating it and 2 other reallocating. Move such procedures to a common function. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 52 +++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1afa95558083..6d0e997d301f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,6 +35,30 @@ #include #include +static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, + gfp_t gfp) +{ + struct sctp_stream_out *out; + + out = kmalloc_array(outcnt, sizeof(*out), gfp); + if (!out) + return -ENOMEM; + + if (stream->out) { + memcpy(out, stream->out, min(outcnt, stream->outcnt) * + sizeof(*out)); + kfree(stream->out); + } + + if (outcnt > stream->outcnt) + memset(out + stream->outcnt, 0, + (outcnt - stream->outcnt) * sizeof(*out)); + + stream->out = out; + + return 0; +} + int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { @@ -48,11 +72,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, if (outcnt == stream->outcnt) goto in; - kfree(stream->out); - - stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); - if (!stream->out) - return -ENOMEM; + i = sctp_stream_alloc_out(stream, outcnt, gfp); + if (i) + return i; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -276,15 +298,9 @@ int sctp_send_add_streams(struct sctp_association *asoc, } if (out) { - struct sctp_stream_out *streamout; - - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_KERNEL); - if (!streamout) + retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL); + if (retval) goto out; - - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; } chunk = sctp_make_strreset_addstrm(asoc, out, in); @@ -682,10 +698,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_out *streamout; struct sctp_chunk *chunk = NULL; __u32 request_seq, outcnt; __u16 out, i; + int ret; request_seq = ntohl(addstrm->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -714,14 +730,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( if (!out || outcnt > SCTP_MAX_STREAM) goto out; - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_ATOMIC); - if (!streamout) + ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC); + if (ret) goto out; - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; - chunk = sctp_make_strreset_addstrm(asoc, out, 0); if (!chunk) goto out; From 1fdb8d8fefe2e7320ea15a65051758a4c4332f05 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:10 -0300 Subject: [PATCH 0514/2177] sctp: factor out stream->in allocation There is 1 place allocating it and another reallocating. Move such procedures to a common function. v2: updated changelog Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6d0e997d301f..952437d656cc 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -59,6 +59,31 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, return 0; } +static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, + gfp_t gfp) +{ + struct sctp_stream_in *in; + + in = kmalloc_array(incnt, sizeof(*stream->in), gfp); + + if (!in) + return -ENOMEM; + + if (stream->in) { + memcpy(in, stream->in, min(incnt, stream->incnt) * + sizeof(*in)); + kfree(stream->in); + } + + if (incnt > stream->incnt) + memset(in + stream->incnt, 0, + (incnt - stream->incnt) * sizeof(*in)); + + stream->in = in; + + return 0; +} + int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { @@ -84,8 +109,8 @@ in: if (!incnt) return 0; - stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); - if (!stream->in) { + i = sctp_stream_alloc_in(stream, incnt, gfp); + if (i) { kfree(stream->out); stream->out = NULL; return -ENOMEM; @@ -623,7 +648,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_in *streamin; __u32 request_seq, incnt; __u16 in, i; @@ -670,13 +694,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( if (!in || incnt > SCTP_MAX_STREAM) goto out; - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_ATOMIC); - if (!streamin) + if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) goto out; - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); - stream->in = streamin; stream->incnt = incnt; result = SCTP_STRRESET_PERFORMED; From f952be79cebd49d04154781d99408867a069d375 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:11 -0300 Subject: [PATCH 0515/2177] sctp: introduce struct sctp_stream_out_ext With the stream schedulers, sctp_stream_out will become too big to be allocated by kmalloc and as we need to allocate with BH disabled, we cannot use __vmalloc in sctp_stream_init(). This patch moves out the stats from sctp_stream_out to sctp_stream_out_ext, which will be allocated only when the application tries to sendmsg something on it. Just the introduction of sctp_stream_out_ext would already fix the issue described above by splitting the allocation in two. Moving the stats to it also reduces the pressure on the allocator as we will ask for less memory atomically when creating the socket and we will use GFP_KERNEL later. Then, for stream schedulers, we will just use sctp_stream_out_ext. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++-- net/sctp/chunk.c | 6 +++--- net/sctp/outqueue.c | 4 ++-- net/sctp/socket.c | 27 +++++++++++++++++++++------ net/sctp/stream.c | 16 ++++++++++++++++ 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0477945de1a3..9b2b30b3ba4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,6 +84,7 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; +struct sctp_stream_out; #include @@ -380,6 +381,7 @@ struct sctp_sender_hb_info { int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); @@ -1315,11 +1317,15 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_out_ext { + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; +}; + struct sctp_stream_out { __u16 ssn; __u8 state; - __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; - __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct sctp_stream_out_ext *ext; }; struct sctp_stream_in { diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3afac275ee82..7b261afc47b9 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++; } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && @@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 2966ff400755..746b07b7937d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -366,7 +366,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -404,7 +404,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_stream_out *streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; - streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } msg_len -= SCTP_DATA_SNDSIZE(chk) + diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d4730ada7f32..d207734326b0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1927,6 +1927,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_free; } + /* Allocate sctp_stream_out_ext if not already done */ + if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) { + err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); + if (err) + goto out_free; + } + if (sctp_wspace(asoc) < msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); @@ -6645,7 +6652,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_stream_out *streamout; + struct sctp_stream_out_ext *streamoute; struct sctp_association *asoc; struct sctp_prstatus params; int retval = -EINVAL; @@ -6668,21 +6675,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamout = &asoc->stream.out[params.sprstat_sid]; + streamoute = asoc->stream.out[params.sprstat_sid].ext; + if (!streamoute) { + /* Not allocated yet, means all stats are 0 */ + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + retval = 0; + goto out; + } + if (policy == SCTP_PR_SCTP_NONE) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { params.sprstat_abandoned_unsent += - streamout->abandoned_unsent[policy]; + streamoute->abandoned_unsent[policy]; params.sprstat_abandoned_sent += - streamout->abandoned_sent[policy]; + streamoute->abandoned_sent[policy]; } } else { params.sprstat_abandoned_unsent = - streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)]; params.sprstat_abandoned_sent = - streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)]; } if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 952437d656cc..055ca25bbc91 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -121,8 +121,24 @@ in: return 0; } +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_out_ext *soute; + + soute = kzalloc(sizeof(*soute), GFP_KERNEL); + if (!soute) + return -ENOMEM; + stream->out[sid].ext = soute; + + return 0; +} + void sctp_stream_free(struct sctp_stream *stream) { + int i; + + for (i = 0; i < stream->outcnt; i++) + kfree(stream->out[i].ext); kfree(stream->out); kfree(stream->in); } From 2fc019f790312e703efa1a44204c586112a430dc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:12 -0300 Subject: [PATCH 0516/2177] sctp: introduce sctp_chunk_stream_no Add a helper to fetch the stream number from a given chunk. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9b2b30b3ba4d..c48f7999fe9b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -642,6 +642,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, union sctp_addr *); const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); +static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch) +{ + return ntohs(ch->subh.data_hdr->stream); +} + enum { SCTP_ADDR_NEW, /* new address added to assoc/ep */ SCTP_ADDR_SRC, /* address can be used as source */ From 5bbbbe32a43199c2b9ea5ea66fab6241c64beb51 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:13 -0300 Subject: [PATCH 0517/2177] sctp: introduce stream scheduler foundations This patch introduces the hooks necessary to do stream scheduling, as per RFC Draft ndata. It also introduces the first scheduler, which is what we do today but now factored out: first come first served (FCFS). With stream scheduling now we have to track which chunk was enqueued on which stream and be able to select another other than the in front of the main outqueue. So we introduce a list on sctp_stream_out_ext structure for this purpose. We reuse sctp_chunk->transmitted_list space for the list above, as the chunk cannot belong to the two lists at the same time. By using the union in there, we can have distinct names for these moments. sctp_sched_ops are the operations expected to be implemented by each scheduler. The dequeueing is a bit particular to this implementation but it is to match how we dequeue packets today. We first dequeue and then check if it fits the packet and if not, we requeue it at head. Thus why we don't have a peek operation but have dequeue_done instead, which is called once the chunk can be safely considered as transmitted. The check removed from sctp_outq_flush is now performed by sctp_stream_outq_migrate, which is only called during assoc setup. (sctp_sendmsg() also checks for it) The only operation that is foreseen but not yet added here is a way to signalize that a new packet is starting or that the packet is done, for round robin scheduler per packet, but is intentionally left to the patch that actually implements it. Support for I-DATA chunks, also described in this RFC, with user message interleaving is straightforward as it just requires the schedulers to probe for the feature and ignore datamsg boundaries when dequeueing. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/stream_sched.h | 72 +++++++++ include/net/sctp/structs.h | 15 +- include/uapi/linux/sctp.h | 6 + net/sctp/Makefile | 2 +- net/sctp/outqueue.c | 59 +++---- net/sctp/sm_sideeffect.c | 3 + net/sctp/stream.c | 88 ++++++++++- net/sctp/stream_sched.c | 270 ++++++++++++++++++++++++++++++++ 8 files changed, 477 insertions(+), 38 deletions(-) create mode 100644 include/net/sctp/stream_sched.h create mode 100644 net/sctp/stream_sched.c diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h new file mode 100644 index 000000000000..c676550a4c7d --- /dev/null +++ b/include/net/sctp/stream_sched.h @@ -0,0 +1,72 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#ifndef __sctp_stream_sched_h__ +#define __sctp_stream_sched_h__ + +struct sctp_sched_ops { + /* Property handling for a given stream */ + int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value, + gfp_t gfp); + int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value); + + /* Init the specific scheduler */ + int (*init)(struct sctp_stream *stream); + /* Init a stream */ + int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* Frees the entire thing */ + void (*free)(struct sctp_stream *stream); + + /* Enqueue a chunk */ + void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); + /* Dequeue a chunk */ + struct sctp_chunk *(*dequeue)(struct sctp_outq *q); + /* Called only if the chunk fit the packet */ + void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); + /* Sched all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *steam); + /* Unched all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *steam); +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched); +int sctp_sched_get_sched(struct sctp_association *asoc); +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp); +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value); +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); + +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); + +#endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c48f7999fe9b..3c22a30fd71b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,7 +84,6 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; -struct sctp_stream_out; #include @@ -531,8 +530,12 @@ struct sctp_chunk { /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; - /* This is our link to the per-transport transmitted list. */ - struct list_head transmitted_list; + union { + /* This is our link to the per-transport transmitted list. */ + struct list_head transmitted_list; + /* List in specific stream outq */ + struct list_head stream_list; + }; /* This field is used by chunks that hold fragmented data. * For the first fragment this is the list that holds the rest of @@ -1019,6 +1022,9 @@ struct sctp_outq { /* Data pending that has never been transmitted. */ struct list_head out_chunk_list; + /* Stream scheduler being used */ + struct sctp_sched_ops *sched; + unsigned int out_qlen; /* Total length of queued data chunks. */ /* Error of send failed, may used in SCTP_SEND_FAILED event. */ @@ -1325,6 +1331,7 @@ struct sctp_inithdr_host { struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct list_head outq; /* chunks enqueued by this stream */ }; struct sctp_stream_out { @@ -1342,6 +1349,8 @@ struct sctp_stream { struct sctp_stream_in *in; __u16 outcnt; __u16 incnt; + /* Current stream being sent, if any */ + struct sctp_stream_out *out_curr; }; #define SCTP_STREAM_CLOSED 0x00 diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6217ff8500a1..4487e7625ddb 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1088,4 +1088,10 @@ struct sctp_add_streams { uint16_t sas_outstrms; }; +/* SCTP Stream schedulers */ +enum sctp_sched_type { + SCTP_SS_FCFS, + SCTP_SS_MAX = SCTP_SS_FCFS +}; + #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 70f1b570bab9..0f6e6d1d69fd 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o + offload.o stream_sched.o sctp_probe-y := probe.o diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 746b07b7937d..4db012aa25f7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -50,6 +50,7 @@ #include #include +#include /* Declare internal functions here. */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); @@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, - struct sctp_chunk *ch) + struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add(&ch->stream_list, &oute->outq); } /* Take data from the front of the queue. */ static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) { - struct sctp_chunk *ch = NULL; - - if (!list_empty(&q->out_chunk_list)) { - struct list_head *entry = q->out_chunk_list.next; - - ch = list_entry(entry, struct sctp_chunk, list); - list_del_init(entry); - q->out_qlen -= ch->skb->len; - } - return ch; + return q->sched->dequeue(q); } + /* Add data chunk to the end of the queue. */ static inline void sctp_outq_tail_data(struct sctp_outq *q, struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add_tail(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add_tail(&ch->stream_list, &oute->outq); } /* @@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + sctp_sched_set_sched(asoc, SCTP_SS_FCFS); } /* Free the outqueue structure and any related pending chunks. @@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q) /* Throw away any leftover data chunks. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { + sctp_sched_dequeue_done(q, chunk); /* Mark as send failure. */ sctp_chunk_fail(chunk, q->error); @@ -391,13 +400,14 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; + q->sched->unsched_all(&asoc->stream); + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; - list_del_init(&chk->list); - q->out_qlen -= chk->skb->len; + sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { @@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, break; } + q->sched->sched_all(&asoc->stream); + return msg_len; } @@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { __u32 sid = ntohs(chunk->subh.data_hdr->stream); - /* RFC 2960 6.5 Every DATA chunk MUST carry a valid - * stream identifier. - */ - if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) { - - /* Mark as failed send. */ - sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->peer.prsctp_capable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - asoc->sent_cnt_removable--; - sctp_chunk_free(chunk); - continue; - } - /* Has this chunk expired? */ if (sctp_chunk_abandoned(chunk)) { + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) { WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) else asoc->stats.oodchunks++; + /* Only now it's safe to consider this + * chunk as sent, sched-wise. + */ + sctp_sched_dequeue_done(q, chunk); + break; default: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e6a2974e020e..402bfbb888cd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -50,6 +50,7 @@ #include #include #include +#include static int sctp_cmd_interpreter(enum sctp_event event_type, union sctp_subtype subtype, @@ -1089,6 +1090,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, list_for_each_entry(chunk, &msg->chunks, frag_list) sctp_outq_tail(&asoc->outqueue, chunk, gfp); + + asoc->outqueue.sched->enqueue(&asoc->outqueue, msg); } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 055ca25bbc91..5ea33a2c453b 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -32,8 +32,61 @@ * Xin Long */ +#include #include #include +#include + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch, *temp; + struct sctp_outq *outq; + int i; + + asoc = container_of(stream, struct sctp_association, stream); + outq = &asoc->outqueue; + + list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (sid < outcnt) + continue; + + sctp_sched_dequeue_common(outq, ch); + /* No need to call dequeue_done here because + * the chunks are not scheduled by now. + */ + + /* Mark as failed send. */ + sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + if (asoc->peer.prsctp_capable && + SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) + asoc->sent_cnt_removable--; + + sctp_chunk_free(ch); + } + + if (new) { + /* Here we actually move the old ext stuff into the new + * buffer, because we want to keep it. Then + * sctp_stream_update will swap ->out pointers. + */ + for (i = 0; i < outcnt; i++) { + kfree(new->out[i].ext); + new->out[i].ext = stream->out[i].ext; + stream->out[i].ext = NULL; + } + } + + for (i = outcnt; i < stream->outcnt; i++) + kfree(stream->out[i].ext); +} static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, gfp_t gfp) @@ -87,7 +140,8 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { - int i; + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + int i, ret = 0; gfp |= __GFP_NOWARN; @@ -97,6 +151,11 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, if (outcnt == stream->outcnt) goto in; + /* Filter out chunks queued on streams that won't exist anymore */ + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, NULL, outcnt); + sched->sched_all(stream); + i = sctp_stream_alloc_out(stream, outcnt, gfp); if (i) return i; @@ -105,20 +164,27 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; + sched->init(stream); + in: if (!incnt) - return 0; + goto out; i = sctp_stream_alloc_in(stream, incnt, gfp); if (i) { - kfree(stream->out); - stream->out = NULL; - return -ENOMEM; + ret = -ENOMEM; + goto free; } stream->incnt = incnt; + goto out; - return 0; +free: + sched->free(stream); + kfree(stream->out); + stream->out = NULL; +out: + return ret; } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) @@ -130,13 +196,15 @@ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) return -ENOMEM; stream->out[sid].ext = soute; - return 0; + return sctp_sched_init_sid(stream, sid, GFP_KERNEL); } void sctp_stream_free(struct sctp_stream *stream) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; + sched->free(stream); for (i = 0; i < stream->outcnt; i++) kfree(stream->out[i].ext); kfree(stream->out); @@ -156,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream) void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, new, new->outcnt); sctp_stream_free(stream); stream->out = new->out; @@ -163,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) stream->outcnt = new->outcnt; stream->incnt = new->incnt; + sched->sched_all(stream); + new->out = NULL; new->in = NULL; } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c new file mode 100644 index 000000000000..40a9a9de2b98 --- /dev/null +++ b/net/sctp/stream_sched.c @@ -0,0 +1,270 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* First Come First Serve (a.k.a. FIFO) + * RFC DRAFT ndata Section 3.1 + */ +static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, + __u16 value, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = 0; + return 0; +} + +static int sctp_sched_fcfs_init(struct sctp_stream *stream) +{ + return 0; +} + +static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + return 0; +} + +static void sctp_sched_fcfs_free(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ +} + +static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_chunk *ch = NULL; + struct list_head *entry; + + if (list_empty(&q->out_chunk_list)) + goto out; + + if (stream->out_curr) { + ch = list_entry(stream->out_curr->ext->outq.next, + struct sctp_chunk, stream_list); + } else { + entry = q->out_chunk_list.next; + ch = list_entry(entry, struct sctp_chunk, list); + } + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *chunk) +{ +} + +static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) +{ +} + +static struct sctp_sched_ops sctp_sched_fcfs = { + .set = sctp_sched_fcfs_set, + .get = sctp_sched_fcfs_get, + .init = sctp_sched_fcfs_init, + .init_sid = sctp_sched_fcfs_init_sid, + .free = sctp_sched_fcfs_free, + .enqueue = sctp_sched_fcfs_enqueue, + .dequeue = sctp_sched_fcfs_dequeue, + .dequeue_done = sctp_sched_fcfs_dequeue_done, + .sched_all = sctp_sched_fcfs_sched_all, + .unsched_all = sctp_sched_fcfs_unsched_all, +}; + +/* API to other parts of the stack */ + +struct sctp_sched_ops *sctp_sched_ops[] = { + &sctp_sched_fcfs, +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched) +{ + struct sctp_sched_ops *n = sctp_sched_ops[sched]; + struct sctp_sched_ops *old = asoc->outqueue.sched; + struct sctp_datamsg *msg = NULL; + struct sctp_chunk *ch; + int i, ret = 0; + + if (old == n) + return ret; + + if (sched > SCTP_SS_MAX) + return -EINVAL; + + if (old) { + old->free(&asoc->stream); + + /* Give the next scheduler a clean slate. */ + for (i = 0; i < asoc->stream.outcnt; i++) { + void *p = asoc->stream.out[i].ext; + + if (!p) + continue; + + p += offsetofend(struct sctp_stream_out_ext, outq); + memset(p, 0, sizeof(struct sctp_stream_out_ext) - + offsetofend(struct sctp_stream_out_ext, outq)); + } + } + + asoc->outqueue.sched = n; + n->init(&asoc->stream); + for (i = 0; i < asoc->stream.outcnt; i++) { + if (!asoc->stream.out[i].ext) + continue; + + ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + if (ret) + goto err; + } + + /* We have to requeue all chunks already queued. */ + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + if (ch->msg == msg) + continue; + msg = ch->msg; + n->enqueue(&asoc->outqueue, msg); + } + + return ret; + +err: + n->free(&asoc->stream); + asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ + + return ret; +} + +int sctp_sched_get_sched(struct sctp_association *asoc) +{ + int i; + + for (i = 0; i <= SCTP_SS_MAX; i++) + if (asoc->outqueue.sched == sctp_sched_ops[i]) + return i; + + return 0; +} + +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) { + int ret; + + ret = sctp_stream_init_ext(&asoc->stream, sid); + if (ret) + return ret; + } + + return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); +} + +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) + return 0; + + return asoc->outqueue.sched->get(&asoc->stream, sid, value); +} + +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) +{ + if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) { + struct sctp_stream_out *sout; + __u16 sid; + + /* datamsg is not finish, so save it as current one, + * in case application switch scheduler or a higher + * priority stream comes in. + */ + sid = sctp_chunk_stream_no(ch); + sout = &q->asoc->stream.out[sid]; + q->asoc->stream.out_curr = sout; + return; + } + + q->asoc->stream.out_curr = NULL; + q->sched->dequeue_done(q, ch); +} + +/* Auxiliary functions for the schedulers */ +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) +{ + list_del_init(&ch->list); + list_del_init(&ch->stream_list); + q->out_qlen -= ch->skb->len; +} + +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) +{ + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + INIT_LIST_HEAD(&stream->out[sid].ext->outq); + return sched->init_sid(stream, sid, gfp); +} + +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + + asoc = container_of(stream, struct sctp_association, stream); + + return asoc->outqueue.sched; +} From 13aa8770fe42d246c6f3a8eb814b85bccb428011 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:14 -0300 Subject: [PATCH 0518/2177] sctp: add sockopt to get/set stream scheduler As defined per RFC Draft ndata Section 4.3.2, named as SCTP_STREAM_SCHEDULER. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 75 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 4487e7625ddb..0050f10087d2 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -122,6 +122,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_RESET_ASSOC 120 #define SCTP_ADD_STREAMS 121 #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 +#define SCTP_STREAM_SCHEDULER 123 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d207734326b0..ae35dbf2810f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -79,6 +79,7 @@ #include #include #include +#include /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); @@ -3914,6 +3915,36 @@ out: return retval; } +static int sctp_setsockopt_scheduler(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value > SCTP_SS_MAX) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_sched(asoc, params.assoc_value); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4095,6 +4126,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ADD_STREAMS: retval = sctp_setsockopt_add_streams(sk, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_setsockopt_scheduler(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6793,6 +6827,43 @@ out: return retval; } +static int sctp_getsockopt_scheduler(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = sctp_sched_get_sched(asoc); + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6975,6 +7046,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_getsockopt_scheduler(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; From 0ccdf3c7fdeda511b10def19505178a9d2d3fccd Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:15 -0300 Subject: [PATCH 0519/2177] sctp: add sockopt to get/set stream scheduler parameters As defined per RFC Draft ndata Section 4.3.3, named as SCTP_STREAM_SCHEDULER_VALUE. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 7 ++++ net/sctp/socket.c | 77 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 0050f10087d2..00ac417d2c4f 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -123,6 +123,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ADD_STREAMS 121 #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 #define SCTP_STREAM_SCHEDULER 123 +#define SCTP_STREAM_SCHEDULER_VALUE 124 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -815,6 +816,12 @@ struct sctp_assoc_value { uint32_t assoc_value; }; +struct sctp_stream_value { + sctp_assoc_t assoc_id; + uint16_t stream_id; + uint16_t stream_value; +}; + /* * 7.2.2 Peer Address Information * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ae35dbf2810f..88c28421ec15 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3945,6 +3945,34 @@ out: return retval; } +static int sctp_setsockopt_scheduler_value(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_stream_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4129,6 +4157,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_STREAM_SCHEDULER: retval = sctp_setsockopt_scheduler(sk, optval, optlen); break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6864,6 +6895,48 @@ out: return retval; } +static int sctp_getsockopt_scheduler_value(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + retval = sctp_sched_get_value(asoc, params.stream_id, + ¶ms.stream_value); + if (retval) + goto out; + + if (put_user(len, optlen)) { + retval = -EFAULT; + goto out; + } + + if (copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7050,6 +7123,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_scheduler(sk, len, optval, optlen); break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_getsockopt_scheduler_value(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; From 637784ade221a3c8a7ecd0f583eddd95d6276b9a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:16 -0300 Subject: [PATCH 0520/2177] sctp: introduce priority based stream scheduler This patch introduces RFC Draft ndata section 3.4 Priority Based Scheduler (SCTP_SS_PRIO). It works by having a struct sctp_stream_priority for each priority configured. This struct is then enlisted on a queue ordered per priority if, and only if, there is a stream with data queued, so that dequeueing is very straightforward: either finish current datamsg or simply dequeue from the highest priority queued, which is the next stream pointed, and that's it. If there are multiple streams assigned with the same priority and with data queued, it will do round robin amongst them while respecting datamsgs boundaries (when not using idata chunks), to be reasonably fair. We intentionally don't maintain a list of priorities nor a list of all streams with the same priority to save memory. The first would mean at least 2 other pointers per priority (which, for 1000 priorities, that can mean 16kB) and the second would also mean 2 other pointers but per stream. As SCTP supports up to 65535 streams on a given asoc, that's 1MB. This impacts when giving a priority to some stream, as we have to find out if the new priority is already being used and if we can free the old one, and also when tearing down. The new fields in struct sctp_stream_out_ext and sctp_stream are added under a union because that memory is to be shared with other schedulers. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 24 +++ include/uapi/linux/sctp.h | 3 +- net/sctp/Makefile | 2 +- net/sctp/stream_sched.c | 3 + net/sctp/stream_sched_prio.c | 347 +++++++++++++++++++++++++++++++++++ 5 files changed, 377 insertions(+), 2 deletions(-) create mode 100644 net/sctp/stream_sched_prio.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3c22a30fd71b..40eb8d66a37c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1328,10 +1328,27 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_priorities { + /* List of priorities scheduled */ + struct list_head prio_sched; + /* List of streams scheduled */ + struct list_head active; + /* The next stream stream in line */ + struct sctp_stream_out_ext *next; + __u16 prio; +}; + struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; struct list_head outq; /* chunks enqueued by this stream */ + union { + struct { + /* Scheduled streams list */ + struct list_head prio_list; + struct sctp_stream_priorities *prio_head; + }; + }; }; struct sctp_stream_out { @@ -1351,6 +1368,13 @@ struct sctp_stream { __u16 incnt; /* Current stream being sent, if any */ struct sctp_stream_out *out_curr; + union { + /* Fields used by priority scheduler */ + struct { + /* List of priorities scheduled */ + struct list_head prio_list; + }; + }; }; #define SCTP_STREAM_CLOSED 0x00 diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 00ac417d2c4f..850fa8b29d7e 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1099,7 +1099,8 @@ struct sctp_add_streams { /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, - SCTP_SS_MAX = SCTP_SS_FCFS + SCTP_SS_PRIO, + SCTP_SS_MAX = SCTP_SS_PRIO }; #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 0f6e6d1d69fd..647c9cfd4e95 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o stream_sched.o + offload.o stream_sched.o stream_sched_prio.o sctp_probe-y := probe.o diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 40a9a9de2b98..115ddb765169 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -121,8 +121,11 @@ static struct sctp_sched_ops sctp_sched_fcfs = { /* API to other parts of the stack */ +extern struct sctp_sched_ops sctp_sched_prio; + struct sctp_sched_ops *sctp_sched_ops[] = { &sctp_sched_fcfs, + &sctp_sched_prio, }; int sctp_sched_set_sched(struct sctp_association *asoc, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c new file mode 100644 index 000000000000..384dbf3c8760 --- /dev/null +++ b/net/sctp/stream_sched_prio.c @@ -0,0 +1,347 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* Priority handling + * RFC DRAFT ndata section 3.4 + */ + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream); + +static struct sctp_stream_priorities *sctp_sched_prio_new_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + + p = kmalloc(sizeof(*p), gfp); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->prio_sched); + INIT_LIST_HEAD(&p->active); + p->next = NULL; + p->prio = prio; + + return p; +} + +static struct sctp_stream_priorities *sctp_sched_prio_get_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + int i; + + /* Look into scheduled priorities first, as they are sorted and + * we can find it fast IF it's scheduled. + */ + list_for_each_entry(p, &stream->prio_list, prio_sched) { + if (p->prio == prio) + return p; + if (p->prio > prio) + break; + } + + /* No luck. So we search on all streams now. */ + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + + p = stream->out[i].ext->prio_head; + if (!p) + /* Means all other streams won't be initialized + * as well. + */ + break; + if (p->prio == prio) + return p; + } + + /* If not even there, allocate a new one. */ + return sctp_sched_prio_new_head(stream, prio, gfp); +} + +static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p) +{ + struct list_head *pos; + + pos = p->next->prio_list.next; + if (pos == &p->active) + pos = pos->next; + p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list); +} + +static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute) +{ + bool scheduled = false; + + if (!list_empty(&soute->prio_list)) { + struct sctp_stream_priorities *prio_head = soute->prio_head; + + /* Scheduled */ + scheduled = true; + + if (prio_head->next == soute) + /* Try to move to the next stream */ + sctp_sched_prio_next_stream(prio_head); + + list_del_init(&soute->prio_list); + + /* Also unsched the priority if this was the last stream */ + if (list_empty(&prio_head->active)) { + list_del_init(&prio_head->prio_sched); + /* If there is no stream left, clear next */ + prio_head->next = NULL; + } + } + + return scheduled; +} + +static void sctp_sched_prio_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_priorities *prio, *prio_head; + + prio_head = soute->prio_head; + + /* Nothing to do if already scheduled */ + if (!list_empty(&soute->prio_list)) + return; + + /* Schedule the stream. If there is a next, we schedule the new + * one before it, so it's the last in round robin order. + * If there isn't, we also have to schedule the priority. + */ + if (prio_head->next) { + list_add(&soute->prio_list, prio_head->next->prio_list.prev); + return; + } + + list_add(&soute->prio_list, &prio_head->active); + prio_head->next = soute; + + list_for_each_entry(prio, &stream->prio_list, prio_sched) { + if (prio->prio > prio_head->prio) { + list_add(&prio_head->prio_sched, prio->prio_sched.prev); + return; + } + } + + list_add_tail(&prio_head->prio_sched, &stream->prio_list); +} + +static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + struct sctp_stream_out *sout = &stream->out[sid]; + struct sctp_stream_out_ext *soute = sout->ext; + struct sctp_stream_priorities *prio_head, *old; + bool reschedule = false; + int i; + + prio_head = sctp_sched_prio_get_head(stream, prio, gfp); + if (!prio_head) + return -ENOMEM; + + reschedule = sctp_sched_prio_unsched(soute); + old = soute->prio_head; + soute->prio_head = prio_head; + if (reschedule) + sctp_sched_prio_sched(stream, soute); + + if (!old) + /* Happens when we set the priority for the first time */ + return 0; + + for (i = 0; i < stream->outcnt; i++) { + soute = stream->out[i].ext; + if (soute && soute->prio_head == old) + /* It's still in use, nothing else to do here. */ + return 0; + } + + /* No hits, we are good to free it. */ + kfree(old); + + return 0; +} + +static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = stream->out[sid].ext->prio_head->prio; + return 0; +} + +static int sctp_sched_prio_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->prio_list); + + return 0; +} + +static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->prio_list); + return sctp_sched_prio_set(stream, sid, 0, gfp); +} + +static void sctp_sched_prio_free(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *prio, *n; + LIST_HEAD(list); + int i; + + /* As we don't keep a list of priorities, to avoid multiple + * frees we have to do it in 3 steps: + * 1. unsched everyone, so the lists are free to use in 2. + * 2. build the list of the priorities + * 3. free the list + */ + sctp_sched_prio_unsched_all(stream); + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + prio = stream->out[i].ext->prio_head; + if (prio && list_empty(&prio->prio_sched)) + list_add(&prio->prio_sched, &list); + } + list_for_each_entry_safe(prio, n, &list, prio_sched) { + list_del_init(&prio->prio_sched); + kfree(prio); + } +} + +static void sctp_sched_prio_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_prio_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next. It's easy, it's either the current + * one or the first chunk on the next active stream. + */ + if (stream->out_curr) { + soute = stream->out_curr->ext; + } else { + prio = list_entry(stream->prio_list.next, + struct sctp_stream_priorities, prio_sched); + soute = prio->next; + } + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_prio_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream on + * this priority. + */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + prio = soute->prio_head; + + sctp_sched_prio_next_stream(prio); + + if (list_empty(&soute->outq)) + sctp_sched_prio_unsched(soute); +} + +static void sctp_sched_prio_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out *sout; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + sout = &stream->out[sid]; + if (sout->ext) + sctp_sched_prio_sched(stream, sout->ext); + } +} + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *p, *tmp; + struct sctp_stream_out_ext *soute, *souttmp; + + list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched) + list_for_each_entry_safe(soute, souttmp, &p->active, prio_list) + sctp_sched_prio_unsched(soute); +} + +struct sctp_sched_ops sctp_sched_prio = { + .set = sctp_sched_prio_set, + .get = sctp_sched_prio_get, + .init = sctp_sched_prio_init, + .init_sid = sctp_sched_prio_init_sid, + .free = sctp_sched_prio_free, + .enqueue = sctp_sched_prio_enqueue, + .dequeue = sctp_sched_prio_dequeue, + .dequeue_done = sctp_sched_prio_dequeue_done, + .sched_all = sctp_sched_prio_sched_all, + .unsched_all = sctp_sched_prio_unsched_all, +}; From ac1ed8b82cd60ba8e7d84103ac1414b8c577c485 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:17 -0300 Subject: [PATCH 0521/2177] sctp: introduce round robin stream scheduler This patch introduces RFC Draft ndata section 3.2 Priority Based Scheduler (SCTP_SS_RR). Works by maintaining a list of enqueued streams and tracking the last one used to send data. When the datamsg is done, it switches to the next stream. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 11 ++ include/uapi/linux/sctp.h | 3 +- net/sctp/Makefile | 3 +- net/sctp/stream_sched.c | 2 + net/sctp/stream_sched_rr.c | 201 +++++++++++++++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 2 deletions(-) create mode 100644 net/sctp/stream_sched_rr.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 40eb8d66a37c..16f949eef52f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1348,6 +1348,10 @@ struct sctp_stream_out_ext { struct list_head prio_list; struct sctp_stream_priorities *prio_head; }; + /* Fields used by RR scheduler */ + struct { + struct list_head rr_list; + }; }; }; @@ -1374,6 +1378,13 @@ struct sctp_stream { /* List of priorities scheduled */ struct list_head prio_list; }; + /* Fields used by RR scheduler */ + struct { + /* List of streams scheduled */ + struct list_head rr_list; + /* The next stream stream in line */ + struct sctp_stream_out_ext *rr_next; + }; }; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 850fa8b29d7e..6cd7d416ca40 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1100,7 +1100,8 @@ struct sctp_add_streams { enum sctp_sched_type { SCTP_SS_FCFS, SCTP_SS_PRIO, - SCTP_SS_MAX = SCTP_SS_PRIO + SCTP_SS_RR, + SCTP_SS_MAX = SCTP_SS_RR }; #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 647c9cfd4e95..bf90c5397719 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o stream_sched.o stream_sched_prio.o + offload.o stream_sched.o stream_sched_prio.o \ + stream_sched_rr.o sctp_probe-y := probe.o diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 115ddb765169..03513a9fa110 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -122,10 +122,12 @@ static struct sctp_sched_ops sctp_sched_fcfs = { /* API to other parts of the stack */ extern struct sctp_sched_ops sctp_sched_prio; +extern struct sctp_sched_ops sctp_sched_rr; struct sctp_sched_ops *sctp_sched_ops[] = { &sctp_sched_fcfs, &sctp_sched_prio, + &sctp_sched_rr, }; int sctp_sched_set_sched(struct sctp_association *asoc, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c new file mode 100644 index 000000000000..7612a438c5b9 --- /dev/null +++ b/net/sctp/stream_sched_rr.c @@ -0,0 +1,201 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* Priority handling + * RFC DRAFT ndata section 3.2 + */ +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream); + +static void sctp_sched_rr_next_stream(struct sctp_stream *stream) +{ + struct list_head *pos; + + pos = stream->rr_next->rr_list.next; + if (pos == &stream->rr_list) + pos = pos->next; + stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list); +} + +static void sctp_sched_rr_unsched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (stream->rr_next == soute) + /* Try to move to the next stream */ + sctp_sched_rr_next_stream(stream); + + list_del_init(&soute->rr_list); + + /* If we have no other stream queued, clear next */ + if (list_empty(&stream->rr_list)) + stream->rr_next = NULL; +} + +static void sctp_sched_rr_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (!list_empty(&soute->rr_list)) + /* Already scheduled. */ + return; + + /* Schedule the stream */ + list_add_tail(&soute->rr_list, &stream->rr_list); + + if (!stream->rr_next) + stream->rr_next = soute; +} + +static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_rr_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->rr_list); + stream->rr_next = NULL; + + return 0; +} + +static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->rr_list); + + return 0; +} + +static void sctp_sched_rr_free(struct sctp_stream *stream) +{ + sctp_sched_rr_unsched_all(stream); +} + +static void sctp_sched_rr_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_rr_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = stream->rr_next; + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + + sctp_sched_rr_next_stream(&q->asoc->stream); + + if (list_empty(&soute->outq)) + sctp_sched_rr_unsched(&q->asoc->stream, soute); +} + +static void sctp_sched_rr_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + soute = stream->out[sid].ext; + if (soute) + sctp_sched_rr_sched(stream, soute); + } +} + +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list) + sctp_sched_rr_unsched(stream, soute); +} + +struct sctp_sched_ops sctp_sched_rr = { + .set = sctp_sched_rr_set, + .get = sctp_sched_rr_get, + .init = sctp_sched_rr_init, + .init_sid = sctp_sched_rr_init_sid, + .free = sctp_sched_rr_free, + .enqueue = sctp_sched_rr_enqueue, + .dequeue = sctp_sched_rr_dequeue, + .dequeue_done = sctp_sched_rr_dequeue_done, + .sched_all = sctp_sched_rr_sched_all, + .unsched_all = sctp_sched_rr_unsched_all, +}; From acd669a8f67ed47f5edd385741486cc7a259a446 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 3 Oct 2017 11:10:53 +0530 Subject: [PATCH 0522/2177] cxgb4: add new T6 pci device id's Add 0x6085 T6 device id. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 37d90d63e4a3..633e9751a25e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -202,6 +202,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6082), /* Custom T6225-CR SFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */ + CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ From eba0f28473b2c20d4212ff19c118723a5a547e20 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 21:40:19 +0100 Subject: [PATCH 0523/2177] ath9k: make const array reg_hole_list static, reduces object code size Don't populate the read-only array reg_hole_list on the stack, instead make it static. Makes the object code smaller by over 200 bytes: Before: text data bss dec hex filename 57518 15248 0 72766 11c3e debug.o After: text data bss dec hex filename 57218 15344 0 72562 11b72 debug.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 01fa30117288..5a0a05abd51a 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -916,7 +916,7 @@ static int open_file_regdump(struct inode *inode, struct file *file) u8 *buf; int i, j = 0; unsigned long num_regs, regdump_len, max_reg_offset; - const struct reg_hole { + static const struct reg_hole { u32 start; u32 end; } reg_hole_list[] = { From 522e89d60b62d16075a7bc96dd5eb2f3eb813635 Mon Sep 17 00:00:00 2001 From: simran singhal Date: Mon, 11 Sep 2017 21:52:37 +0200 Subject: [PATCH 0524/2177] netfilter: ipset: Compress return logic Simplify function returns by merging assignment and return into one command line. Signed-off-by: simran singhal Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b..2fff6b5dc6f0 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -453,7 +453,6 @@ static size_t list_set_memsize(const struct list_set *map, size_t dsize) { struct set_elem *e; - size_t memsize; u32 n = 0; rcu_read_lock(); @@ -461,9 +460,7 @@ list_set_memsize(const struct list_set *map, size_t dsize) n++; rcu_read_unlock(); - memsize = sizeof(*map) + n * dsize; - - return memsize; + return (sizeof(*map) + n * dsize); } static int From 8851e799ffde0d85fab6716075975200c9b5094c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 11 Sep 2017 21:52:38 +0200 Subject: [PATCH 0525/2177] netfilter: ipset: Fix sparse warnings Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99a53c2..24bf55860108 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; From 63c2af90e502fca6cda2855541d43443666e337f Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Mon, 11 Sep 2017 21:52:39 +0200 Subject: [PATCH 0526/2177] netfilter: ipset: deduplicate prefixlen maps The prefixlen maps used here are identical, and have been since introduction. It seems to make sense to use a single large map, that the preprocessor will fill appropriately. Signed-off-by: Aaron Conole Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/pfxlen.c | 395 ++++++++++++----------------------- 1 file changed, 137 insertions(+), 258 deletions(-) diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 1c8a42c1056c..d5be9c25fad6 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -3,6 +3,141 @@ /* Prefixlen maps for fast conversions, by Jan Engelhardt. */ +#ifdef E +#undef E +#endif + +#define PREFIXES_MAP \ + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + #define E(a, b, c, d) \ {.ip6 = { \ htonl(a), htonl(b), \ @@ -13,135 +148,7 @@ * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_netmask_map); @@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map); * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); From 4d86d38186271438ef002c5ae6e04836f01bf8bf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 4 Oct 2017 09:54:27 +0200 Subject: [PATCH 0527/2177] ravb: RX checksum offload Add support for RX checksum offload. This is enabled by default and may be disabled and re-enabled using ethtool: # ethtool -K eth0 rx off # ethtool -K eth0 rx on The RAVB provides a simple checksumming scheme which appears to be completely compatible with CHECKSUM_COMPLETE: sum of all packet data after the L2 header is appended to packet data; this may be trivially read by the driver and used to update the skb accordingly. In terms of performance throughput is close to gigabit line-rate both with and without RX checksum offload enabled. Perf output, however, appears to indicate that significantly less time is spent in do_csum(). This is as expected. Test results with RX checksum offload enabled: # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo enable_enobufs failed: getprotobyname Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 937.54 Summary of output of perf report: 18.28% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 10.34% ksoftirqd/0 [kernel.kallsyms] [k] __pi_memcpy 9.83% ksoftirqd/0 [kernel.kallsyms] [k] ravb_poll 7.89% ksoftirqd/0 [kernel.kallsyms] [k] skb_put 4.01% ksoftirqd/0 [kernel.kallsyms] [k] dev_gro_receive 3.37% netperf [kernel.kallsyms] [k] __arch_copy_to_user 3.17% swapper [kernel.kallsyms] [k] arch_cpu_idle 2.55% swapper [kernel.kallsyms] [k] tick_nohz_idle_enter 2.04% ksoftirqd/0 [kernel.kallsyms] [k] __pi___inval_dcache_area 2.03% swapper [kernel.kallsyms] [k] _raw_spin_unlock_irq 1.96% ksoftirqd/0 [kernel.kallsyms] [k] __netdev_alloc_skb 1.59% ksoftirqd/0 [kernel.kallsyms] [k] __slab_alloc.isra.83 Test results without RX checksum offload enabled: # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo enable_enobufs failed: getprotobyname Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 940.20 Summary of output of perf report: 17.10% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 10.99% ksoftirqd/0 [kernel.kallsyms] [k] __pi_memcpy 8.87% ksoftirqd/0 [kernel.kallsyms] [k] ravb_poll 8.16% ksoftirqd/0 [kernel.kallsyms] [k] skb_put 7.42% ksoftirqd/0 [kernel.kallsyms] [k] do_csum 3.91% ksoftirqd/0 [kernel.kallsyms] [k] dev_gro_receive 2.31% swapper [kernel.kallsyms] [k] arch_cpu_idle 2.16% ksoftirqd/0 [kernel.kallsyms] [k] __pi___inval_dcache_area 2.14% ksoftirqd/0 [kernel.kallsyms] [k] __netdev_alloc_skb 1.93% netperf [kernel.kallsyms] [k] __arch_copy_to_user 1.79% swapper [kernel.kallsyms] [k] tick_nohz_idle_enter 1.63% ksoftirqd/0 [kernel.kallsyms] [k] __slab_alloc.isra.83 Above results collected on an R-Car Gen 3 Salvator-X/r8a7796 ES1.0. Also tested on a R-Car Gen 3 Salvator-X/r8a7795 ES1.0. By inspection this also appears to be compatible with the ravb found on R-Car Gen 2 SoCs, however, this patch is currently untested on such hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 55 +++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fdf30bfa403b..a8822a756e08 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev) /* Receive frame limit set register */ ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR); - /* PAUSE prohibition */ + /* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */ ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) | + (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) | ECMR_TE | ECMR_RE, ECMR); ravb_set_rate(ndev); @@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) } } +static void ravb_rx_csum(struct sk_buff *skb) +{ + u8 *hw_csum; + + /* The hardware checksum is 2 bytes appended to packet data */ + if (unlikely(skb->len < 2)) + return; + hw_csum = skb_tail_pointer(skb) - 2; + skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + skb_trim(skb, skb->len - 2); +} + /* Packet receive function for Ethernet AVB */ static bool ravb_rx(struct net_device *ndev, int *quota, int q) { @@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) ts.tv_nsec = le32_to_cpu(desc->ts_n); shhwtstamps->hwtstamp = timespec64_to_ktime(ts); } + skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); + if (ndev->features & NETIF_F_RXCSUM) + ravb_rx_csum(skb); napi_gro_receive(&priv->napi[q], skb); stats->rx_packets++; stats->rx_bytes += pkt_len; @@ -1842,6 +1859,38 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) return phy_mii_ioctl(phydev, req, cmd); } +static void ravb_set_rx_csum(struct net_device *ndev, bool enable) +{ + struct ravb_private *priv = netdev_priv(ndev); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* Disable TX and RX */ + ravb_rcv_snd_disable(ndev); + + /* Modify RX Checksum setting */ + ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0); + + /* Enable TX and RX */ + ravb_rcv_snd_enable(ndev); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int ravb_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + + if (changed & NETIF_F_RXCSUM) + ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM); + + ndev->features = features; + + return 0; +} + static const struct net_device_ops ravb_netdev_ops = { .ndo_open = ravb_open, .ndo_stop = ravb_close, @@ -1853,6 +1902,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_do_ioctl = ravb_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = ravb_set_features, }; /* MDIO bus init function */ @@ -2004,6 +2054,9 @@ static int ravb_probe(struct platform_device *pdev) if (!ndev) return -ENOMEM; + ndev->features = NETIF_F_RXCSUM; + ndev->hw_features = NETIF_F_RXCSUM; + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); From 20e883204f0268b423799781e5efed786f8a11ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 13:56:50 +0200 Subject: [PATCH 0528/2177] net: core: fix kerneldoc comment net/core/dev.c:1306: warning: No description found for parameter 'name' net/core/dev.c:1306: warning: Excess function parameter 'alias' description in 'dev_get_alias' Fixes: 6c5570016b97 ("net: core: decouple ifalias get/set from rtnl lock") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1770097cfd86..454f05441546 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1295,7 +1295,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) /** * dev_get_alias - get ifalias of a device * @dev: device - * @alias: buffer to store name of ifalias + * @name: buffer to store name of ifalias * @len: size of buffer * * get ifalias for a device. Caller must make sure dev cannot go From ebf6b13142f947be576b40edce214788dfe1d3e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 4 Oct 2017 14:20:37 +0100 Subject: [PATCH 0529/2177] cxgb4vf: make a couple of functions static The functions t4vf_link_down_rc_str and t4vf_handle_get_port_info are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 't4vf_link_down_rc_str' was not declared. Should it be static? symbol 't4vf_handle_get_port_info' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index a8d94963b4d0..67aec59a14e6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -1812,7 +1812,7 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid) * * Returns a string representation of the Link Down Reason Code. */ -const char *t4vf_link_down_rc_str(unsigned char link_down_rc) +static const char *t4vf_link_down_rc_str(unsigned char link_down_rc) { static const char * const reason[] = { "Link Down", @@ -1838,8 +1838,8 @@ const char *t4vf_link_down_rc_str(unsigned char link_down_rc) * * Processes a GET_PORT_INFO FW reply message. */ -void t4vf_handle_get_port_info(struct port_info *pi, - const struct fw_port_cmd *cmd) +static void t4vf_handle_get_port_info(struct port_info *pi, + const struct fw_port_cmd *cmd) { int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); struct adapter *adapter = pi->adapter; From e774d96b7d2c3489bfb5bbdc2b65ed41cd68d3d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:55:29 +0200 Subject: [PATCH 0530/2177] rtnetlink: remove slave_validate callback no users in the tree. Signed-off-by: Florian Westphal Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 3 --- net/core/rtnetlink.c | 6 ------ 2 files changed, 9 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 21837ca68ecc..6520993ff449 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -93,9 +93,6 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; - int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3961f87cdc76..b63c5759641f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2631,12 +2631,6 @@ replay: return err; slave_data = slave_attr; } - if (m_ops->slave_validate) { - err = m_ops->slave_validate(tb, slave_data, - extack); - if (err < 0) - return err; - } } if (dev) { From 5c45121dc39026ab2139910e57cf933fd57d30f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:58:49 +0200 Subject: [PATCH 0531/2177] rtnetlink: remove __rtnl_af_unregister switch the only caller to rtnl_af_unregister. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 2 -- net/core/rtnetlink.c | 14 +------------- net/ipv6/addrconf.c | 4 ++-- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6520993ff449..e3ca8e2e3103 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -151,8 +151,6 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void __rtnl_af_unregister(struct rtnl_af_ops *ops); - void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b63c5759641f..3fb1ca33cba4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -475,18 +475,6 @@ void rtnl_af_register(struct rtnl_af_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_af_register); -/** - * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. - * @ops: struct rtnl_af_ops * to unregister - * - * The caller must hold the rtnl_mutex. - */ -void __rtnl_af_unregister(struct rtnl_af_ops *ops) -{ - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_af_unregister); - /** * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. * @ops: struct rtnl_af_ops * to unregister @@ -494,7 +482,7 @@ EXPORT_SYMBOL_GPL(__rtnl_af_unregister); void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); - __rtnl_af_unregister(ops); + list_del(&ops->list); rtnl_unlock(); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f553f72d0bee..837418ff2d4b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6618,9 +6618,9 @@ void addrconf_cleanup(void) unregister_pernet_subsys(&addrconf_ops); ipv6_addr_label_cleanup(); - rtnl_lock(); + rtnl_af_unregister(&inet6_ops); - __rtnl_af_unregister(&inet6_ops); + rtnl_lock(); /* clean dev list */ for_each_netdev(&init_net, dev) { From e9b871ee098dfb91780e13bfc6e91e82c7b4ad73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 16:22:59 +0200 Subject: [PATCH 0532/2177] selftests: rtnetlink: try concurrent change of ifalias to make sure this is serialized correctly. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 62c87da92770..e8c86c416ed0 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -278,6 +278,12 @@ kci_test_ifalias() ip link show "$devdummy" | grep -q "alias $namewant" check_fail $? + for i in $(seq 1 100); do + uuidgen > "$syspathname" & + done + + wait + # re-add the alias -- kernel should free mem when dummy dev is removed ip link set dev "$devdummy" alias "$namewant" check_err $? From c818fa9e288be5be7e360c33cf4f5e30f9fa206e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Oct 2017 10:48:35 -0700 Subject: [PATCH 0533/2177] net: cache skb_shinfo() in skb_try_coalesce() Compiler does not really know that skb_shinfo(to|from) are constants in skb_try_coalesce(), lets cache their values to shrink code. We might even take care of skb_zcopy() calls later. $ size net/core/skbuff.o.before net/core/skbuff.o text data bss dec hex filename 40727 1298 0 42025 a429 net/core/skbuff.o.before 40631 1298 0 41929 a3c9 net/core/skbuff.o Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d98c2e3ce2bf..822a90e56aea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4767,6 +4767,7 @@ EXPORT_SYMBOL(kfree_skb_partial); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { + struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; @@ -4781,7 +4782,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } - if (skb_has_frag_list(to) || skb_has_frag_list(from)) + to_shinfo = skb_shinfo(to); + from_shinfo = skb_shinfo(from); + if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; @@ -4790,8 +4793,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct page *page; unsigned int offset; - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) @@ -4802,12 +4805,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); @@ -4815,19 +4818,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, WARN_ON_ONCE(delta < len); - memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, - skb_shinfo(from)->frags, - skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); - skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + memcpy(to_shinfo->frags + to_shinfo->nr_frags, + from_shinfo->frags, + from_shinfo->nr_frags * sizeof(skb_frag_t)); + to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) - skb_shinfo(from)->nr_frags = 0; + from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) - skb_frag_ref(from, i); + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; From 324bda9e6c5add86ba2e1066476481c48132aca0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: [PATCH 0534/2177] bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 46 ++-- include/linux/bpf.h | 32 +++ include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 42 +++- kernel/bpf/cgroup.c | 465 ++++++++++++++++++++++++++----------- kernel/bpf/core.c | 31 +++ kernel/bpf/syscall.c | 37 ++- kernel/cgroup/cgroup.c | 28 ++- 8 files changed, 515 insertions(+), 168 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d41d40ac3efd..102e56fbb6de 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -14,27 +14,42 @@ struct bpf_sock_ops_kern; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + struct cgroup_bpf { - /* - * Store two sets of bpf_prog pointers, one for programs that are - * pinned directly to this cgroup, and one for those that are effective - * when this cgroup is accessed. + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; - bool disallow_override[MAX_BPF_ATTACH_TYPE]; + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; }; void cgroup_bpf_put(struct cgroup *cgrp); -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); +int cgroup_bpf_inherit(struct cgroup *cgrp); -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool overridable); +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); -/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable); +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -96,8 +111,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} -static inline void cgroup_bpf_inherit(struct cgroup *cgrp, - struct cgroup *parent) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 252f4bc9eb25..a6964b75f070 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -241,6 +241,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_prog **_prog; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _prog = rcu_dereference(array)->progs; \ + for (; *_prog; _prog++) \ + _ret &= func(*_prog, ctx); \ + rcu_read_unlock(); \ + _ret; \ + }) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/filter.h b/include/linux/filter.h index 911d454af107..2d2db394b0ca 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -481,7 +481,7 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6d2137b4cf38..762f74bc6c47 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,11 +143,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 546113430049..6b7500bbdb53 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -27,129 +27,361 @@ void cgroup_bpf_put(struct cgroup *cgrp) { unsigned int type; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { - struct bpf_prog *prog = cgrp->bpf.prog[type]; + for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog_list *pl, *tmp; - if (prog) { - bpf_prog_put(prog); + list_for_each_entry_safe(pl, tmp, progs, node) { + list_del(&pl->node); + bpf_prog_put(pl->prog); + kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_prog_array_free(cgrp->bpf.effective[type]); } } +/* count number of elements in the list. + * it's slow but the list cannot be long + */ +static u32 prog_list_length(struct list_head *head) +{ + struct bpf_prog_list *pl; + u32 cnt = 0; + + list_for_each_entry(pl, head, node) { + if (!pl->prog) + continue; + cnt++; + } + return cnt; +} + +/* if parent has non-overridable prog attached, + * disallow attaching new programs to the descendent cgroup. + * if parent has overridable or multi-prog, allow attaching + */ +static bool hierarchy_allows_attach(struct cgroup *cgrp, + enum bpf_attach_type type, + u32 new_flags) +{ + struct cgroup *p; + + p = cgroup_parent(cgrp); + if (!p) + return true; + do { + u32 flags = p->bpf.flags[type]; + u32 cnt; + + if (flags & BPF_F_ALLOW_MULTI) + return true; + cnt = prog_list_length(&p->bpf.progs[type]); + WARN_ON_ONCE(cnt > 1); + if (cnt == 1) + return !!(flags & BPF_F_ALLOW_OVERRIDE); + p = cgroup_parent(p); + } while (p); + return true; +} + +/* compute a chain of effective programs for a given cgroup: + * start from the list of programs in this cgroup and add + * all parent programs. + * Note that parent's F_ALLOW_OVERRIDE-type program is yielding + * to programs in this cgroup + */ +static int compute_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu **array) +{ + struct bpf_prog_array __rcu *progs; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + int cnt = 0; + + /* count number of effective programs by walking parents */ + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[type]); + p = cgroup_parent(p); + } while (p); + + progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); + if (!progs) + return -ENOMEM; + + /* populate the array with effective progs */ + cnt = 0; + p = cgrp; + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + list_for_each_entry(pl, + &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + rcu_dereference_protected(progs, 1)-> + progs[cnt++] = pl->prog; + } + p = cgroup_parent(p); + } while (p); + + *array = progs; + return 0; +} + +static void activate_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu *array) +{ + struct bpf_prog_array __rcu *old_array; + + old_array = xchg(&cgrp->bpf.effective[type], array); + /* free prog array after grace period, since __cgroup_bpf_run_*() + * might be still walking the array + */ + bpf_prog_array_free(old_array); +} + /** * cgroup_bpf_inherit() - inherit effective programs from parent * @cgrp: the cgroup to modify - * @parent: the parent to inherit from */ -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) +int cgroup_bpf_inherit(struct cgroup *cgrp) { - unsigned int type; +/* has to use marco instead of const int, since compiler thinks + * that array below is variable length + */ +#define NR ARRAY_SIZE(cgrp->bpf.effective) + struct bpf_prog_array __rcu *arrays[NR] = {}; + int i; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { - struct bpf_prog *e; + for (i = 0; i < NR; i++) + INIT_LIST_HEAD(&cgrp->bpf.progs[i]); - e = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - rcu_assign_pointer(cgrp->bpf.effective[type], e); - cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; - } + for (i = 0; i < NR; i++) + if (compute_effective_progs(cgrp, i, &arrays[i])) + goto cleanup; + + for (i = 0; i < NR; i++) + activate_effective_progs(cgrp, i, arrays[i]); + + return 0; +cleanup: + for (i = 0; i < NR; i++) + bpf_prog_array_free(arrays[i]); + return -ENOMEM; } +#define BPF_CGROUP_MAX_PROGS 64 + /** - * __cgroup_bpf_update() - Update the pinned program of a cgroup, and + * __cgroup_bpf_attach() - Attach the program to a cgroup, and * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse - * @parent: The parent of @cgrp, or %NULL if @cgrp is the root - * @prog: A new program to pin - * @type: Type of pinning operation (ingress/egress) - * - * Each cgroup has a set of two pointers for bpf programs; one for eBPF - * programs it owns, and which is effective for execution. - * - * If @prog is not %NULL, this function attaches a new program to the cgroup - * and releases the one that is currently attached, if any. @prog is then made - * the effective program of type @type in that cgroup. - * - * If @prog is %NULL, the currently attached program of type @type is released, - * and the effective program of the parent cgroup (if any) is inherited to - * @cgrp. - * - * Then, the descendants of @cgrp are walked and the effective program for - * each of them is set to the effective program of @cgrp unless the - * descendant has its own program attached, in which case the subbranch is - * skipped. This ensures that delegated subcgroups with own programs are left - * untouched. + * @prog: A program to attach + * @type: Type of attach operation * * Must be called with cgroup_mutex held. */ -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool new_overridable) +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct bpf_prog *old_prog, *effective = NULL; - struct cgroup_subsys_state *pos; - bool overridable = true; + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + bool pl_was_allocated; + u32 old_flags; + int err; - if (parent) { - overridable = !parent->bpf.disallow_override[type]; - effective = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - } + if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + /* invalid combination */ + return -EINVAL; - if (prog && effective && !overridable) - /* if parent has non-overridable prog attached, disallow - * attaching new programs to descendent cgroup + if (!hierarchy_allows_attach(cgrp, type, flags)) + return -EPERM; + + if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + /* Disallow attaching non-overridable on top + * of existing overridable in this cgroup. + * Disallow attaching multi-prog if overridable or none */ return -EPERM; - if (prog && effective && overridable != new_overridable) - /* if parent has overridable prog attached, only - * allow overridable programs in descendent cgroup - */ - return -EPERM; + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) + return -E2BIG; - old_prog = cgrp->bpf.prog[type]; + if (flags & BPF_F_ALLOW_MULTI) { + list_for_each_entry(pl, progs, node) + if (pl->prog == prog) + /* disallow attaching the same prog twice */ + return -EINVAL; - if (prog) { - overridable = new_overridable; - effective = prog; - if (old_prog && - cgrp->bpf.disallow_override[type] == new_overridable) - /* disallow attaching non-overridable on top - * of existing overridable in this cgroup - * and vice versa - */ - return -EPERM; - } - - if (!prog && !old_prog) - /* report error when trying to detach and nothing is attached */ - return -ENOENT; - - cgrp->bpf.prog[type] = prog; - - css_for_each_descendant_pre(pos, &cgrp->self) { - struct cgroup *desc = container_of(pos, struct cgroup, self); - - /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) { - pos = css_rightmost_descendant(pos); + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + pl->prog = prog; + list_add_tail(&pl->node, progs); + } else { + if (list_empty(progs)) { + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + list_add_tail(&pl->node, progs); } else { - rcu_assign_pointer(desc->bpf.effective[type], - effective); - desc->bpf.disallow_override[type] = !overridable; + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl_was_allocated = false; } + pl->prog = prog; } - if (prog) - static_branch_inc(&cgroup_bpf_enabled_key); + old_flags = cgrp->bpf.flags[type]; + cgrp->bpf.flags[type] = flags; + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + static_branch_inc(&cgroup_bpf_enabled_key); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and cleanup the prog list */ + pl->prog = old_prog; + if (pl_was_allocated) { + list_del(&pl->node); + kfree(pl); + } + return err; +} + +/** + * __cgroup_bpf_detach() - Detach the program from a cgroup, and + * propagate the change to descendants + * @cgrp: The cgroup which descendants to traverse + * @prog: A program to detach or NULL + * @type: Type of detach operation + * + * Must be called with cgroup_mutex held. + */ +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 unused_flags) +{ + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + int err; + + if (flags & BPF_F_ALLOW_MULTI) { + if (!prog) + /* to detach MULTI prog the user has to specify valid FD + * of the program to be detached + */ + return -EINVAL; + } else { + if (list_empty(progs)) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + } + + if (flags & BPF_F_ALLOW_MULTI) { + /* find the prog and detach it */ + list_for_each_entry(pl, progs, node) { + if (pl->prog != prog) + continue; + old_prog = prog; + /* mark it deleted, so it's ignored while + * recomputing effective + */ + pl->prog = NULL; + break; + } + if (!old_prog) + return -ENOENT; + } else { + /* to maintain backward compatibility NONE and OVERRIDE cgroups + * allow detaching with invalid FD (prog==NULL) + */ + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl->prog = NULL; + } + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* now can actually delete it from this cgroup list */ + list_del(&pl->node); + kfree(pl); + if (list_empty(progs)) + /* last program was detached, reset flags to zero */ + cgrp->bpf.flags[type] = 0; + + bpf_prog_put(old_prog); + static_branch_dec(&cgroup_bpf_enabled_key); + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and restore back old_prog */ + pl->prog = old_prog; + return err; } /** @@ -171,36 +403,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum bpf_attach_type type) { - struct bpf_prog *prog; + unsigned int offset = skb->data - skb_network_header(skb); + struct sock *save_sk; struct cgroup *cgrp; - int ret = 0; + int ret; if (!sk || !sk_fullsock(sk)) return 0; - if (sk->sk_family != AF_INET && - sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) { - unsigned int offset = skb->data - skb_network_header(skb); - struct sock *save_sk = skb->sk; - - skb->sk = sk; - __skb_push(skb, offset); - ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; - __skb_pull(skb, offset); - skb->sk = save_sk; - } - - rcu_read_unlock(); - - return ret; + save_sk = skb->sk; + skb->sk = sk; + __skb_push(skb, offset); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, + bpf_prog_run_save_cb); + __skb_pull(skb, offset); + skb->sk = save_sk; + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); @@ -221,19 +443,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -258,18 +471,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, + BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..6b49e1991ae7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1381,6 +1381,37 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +static struct { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +} empty_prog_array = { + .null_prog = NULL, +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) +{ + if (prog_cnt) + return kzalloc(sizeof(struct bpf_prog_array) + + sizeof(struct bpf_prog *) * (prog_cnt + 1), + flags); + + return &empty_prog_array.hdr; +} + +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +{ + if (!progs || + progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + return; + kfree_rcu(progs, rcu); +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b927da66f653..51bee695d32c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1168,6 +1168,9 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) return 0; } +#define BPF_F_ATTACH_MASK \ + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; @@ -1181,7 +1184,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; - if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + if (attr->attach_flags & ~BPF_F_ATTACH_MASK) return -EINVAL; switch (attr->attach_type) { @@ -1212,8 +1215,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, - attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + attr->attach_flags); if (ret) bpf_prog_put(prog); cgroup_put(cgrp); @@ -1225,6 +1228,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; + struct bpf_prog *prog; struct cgroup *cgrp; int ret; @@ -1237,23 +1242,33 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; + break; case BPF_CGROUP_SOCK_OPS: - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - - ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); - cgroup_put(cgrp); + ptype = BPF_PROG_TYPE_SOCK_OPS; break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - ret = sockmap_get_from_fd(attr, false); - break; + return sockmap_get_from_fd(attr, false); default: return -EINVAL; } + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + prog = NULL; + + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); + if (prog) + bpf_prog_put(prog); + cgroup_put(cgrp); return ret; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d6551cd45238..57eb866ae78d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1896,6 +1896,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) if (ret) goto destroy_root; + ret = cgroup_bpf_inherit(root_cgrp); + WARN_ON_ONCE(ret); + trace_cgroup_setup_root(root); /* @@ -4713,6 +4716,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; + ret = cgroup_bpf_inherit(cgrp); + if (ret) + goto out_idr_free; for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; @@ -4747,13 +4753,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent) if (!cgroup_on_dfl(cgrp)) cgrp->subtree_control = cgroup_control(cgrp); - if (parent) - cgroup_bpf_inherit(cgrp, parent); - cgroup_propagate_control(cgrp); return cgrp; +out_idr_free: + cgroup_idr_remove(&root->cgroup_idr, cgrp->id); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: @@ -5736,14 +5741,23 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ #ifdef CONFIG_CGROUP_BPF -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable) +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct cgroup *parent = cgroup_parent(cgrp); int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); + ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + mutex_unlock(&cgroup_mutex); + return ret; +} +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_detach(cgrp, prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } From 468e2f64d220fe2dc11caa2bcb9b3a1e50fc7321 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:22 -0700 Subject: [PATCH 0535/2177] bpf: introduce BPF_PROG_QUERY command introduce BPF_PROG_QUERY command to retrieve a set of either attached programs to given cgroup or a set of effective programs that will execute for events within a cgroup Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 4 ++++ include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 13 +++++++++++ kernel/bpf/cgroup.c | 46 ++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 38 +++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++++++ kernel/cgroup/cgroup.c | 10 +++++++++ 7 files changed, 148 insertions(+) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 102e56fbb6de..359b6f5d3d90 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -44,12 +44,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a6964b75f070..a67daea731ab 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,9 @@ struct bpf_prog_array { struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt); #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ ({ \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 762f74bc6c47..cb2b9f95160a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -211,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -289,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6b7500bbdb53..e88abc0865d5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -384,6 +384,52 @@ cleanup: return err; } +/* Must be called with cgroup_mutex held to avoid races. */ +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + enum bpf_attach_type type = attr->query.attach_type; + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + int cnt, ret = 0, i; + + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) + cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); + else + cnt = prog_list_length(progs); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) + return -EFAULT; + if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) + /* return early if user requested only program count + flags */ + return 0; + if (attr->query.prog_cnt < cnt) { + cnt = attr->query.prog_cnt; + ret = -ENOSPC; + } + + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { + return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], + prog_ids, cnt); + } else { + struct bpf_prog_list *pl; + u32 id; + + i = 0; + list_for_each_entry(pl, progs, node) { + id = pl->prog->aux->id; + if (copy_to_user(prog_ids + i, &id, sizeof(id))) + return -EFAULT; + if (++i == cnt) + break; + } + } + return ret; +} + /** * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socket sending or receiving traffic diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6b49e1991ae7..eba966c09053 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1412,6 +1412,44 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) +{ + struct bpf_prog **prog; + u32 cnt = 0; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) + cnt++; + rcu_read_unlock(); + return cnt; +} + +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt) +{ + struct bpf_prog **prog; + u32 i = 0, id; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) { + id = (*prog)->aux->id; + if (copy_to_user(prog_ids + i, &id, sizeof(id))) { + rcu_read_unlock(); + return -EFAULT; + } + if (++i == cnt) { + prog++; + break; + } + } + rcu_read_unlock(); + if (*prog) + return -ENOSPC; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 51bee695d32c..0048cb24ba7b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1272,6 +1272,37 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } +#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt + +static int bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct cgroup *cgrp; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (CHECK_ATTR(BPF_PROG_QUERY)) + return -EINVAL; + if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) + return -EINVAL; + + switch (attr->query.attach_type) { + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_SOCK_OPS: + break; + default: + return -EINVAL; + } + cgrp = cgroup_get_from_fd(attr->query.target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + ret = cgroup_bpf_query(cgrp, attr, uattr); + cgroup_put(cgrp); + return ret; +} #endif /* CONFIG_CGROUP_BPF */ #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration @@ -1568,6 +1599,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_PROG_DETACH: err = bpf_prog_detach(&attr); break; + case BPF_PROG_QUERY: + err = bpf_prog_query(&attr, uattr); + break; #endif case BPF_PROG_TEST_RUN: err = bpf_prog_test_run(&attr, uattr); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57eb866ae78d..269512b94a94 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5761,4 +5761,14 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, mutex_unlock(&cgroup_mutex); return ret; } +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_query(cgrp, attr, uattr); + mutex_unlock(&cgroup_mutex); + return ret; +} #endif /* CONFIG_CGROUP_BPF */ From 390ee7e29fc8e6e90d3065b00afb047c4ee552f9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:23 -0700 Subject: [PATCH 0536/2177] bpf: enforce return code for cgroup-bpf programs with addition of tnum logic the verifier got smart enough and we can enforce return codes at program load time. For now do so for cgroup-bpf program types. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 40 ++++++++++++ tools/testing/selftests/bpf/test_verifier.c | 72 +++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4cf9b72c59a0..52b022310f6a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3073,6 +3073,43 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static int check_return_code(struct bpf_verifier_env *env) +{ + struct bpf_reg_state *reg; + struct tnum range = tnum_range(0, 1); + + switch (env->prog->type) { + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_SOCK_OPS: + break; + default: + return 0; + } + + reg = &env->cur_state.regs[BPF_REG_0]; + if (reg->type != SCALAR_VALUE) { + verbose("At program exit the register R0 is not a known value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + + if (!tnum_in(range, reg->var_off)) { + verbose("At program exit the register R0 "); + if (!tnum_is_unknown(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("has value %s", tn_buf); + } else { + verbose("has unknown scalar value"); + } + verbose(" should have been 0 or 1\n"); + return -EINVAL; + } + return 0; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -3863,6 +3900,9 @@ static int do_check(struct bpf_verifier_env *env) return -EACCES; } + err = check_return_code(env); + if (err) + return err; process_bpf_exit: insn_idx = pop_stack(env, &prev_insn_idx); if (insn_idx < 0) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 290d5056c165..cc91d0159f43 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6892,6 +6892,78 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, + { + "bpf_exit with invalid return code. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0xffffffff)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0x3)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x2; 0x0)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test6", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 is not a known value (ctx)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has unknown scalar value", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, }; static int probe_filter_length(const struct bpf_insn *fp) From 244d20efdb68c5c1a26c667baeb232ea163e2f69 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:24 -0700 Subject: [PATCH 0537/2177] libbpf: introduce bpf_prog_detach2() introduce bpf_prog_detach2() that takes one more argument prog_fd vs bpf_prog_detach() that takes only attach_fd and type. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 12 ++++++++++++ tools/lib/bpf/bpf.h | 1 + 2 files changed, 13 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index daf624e4c720..d4b6ba8292ee 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -291,6 +291,18 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 118d00535a0d..afd64727c9cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -66,6 +66,7 @@ int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); From 39323e788cb672adba8709ca407bd6763aae577d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:25 -0700 Subject: [PATCH 0538/2177] samples/bpf: add multi-prog cgroup test case create 5 cgroups, attach 6 progs and check that progs are executed as: cgrp1 (MULTI progs A, B) -> cgrp2 (OVERRIDE prog C) -> cgrp3 (MULTI prog D) -> cgrp4 (OVERRIDE prog E) -> cgrp5 (NONE prog F) the event in cgrp5 triggers execution of F,D,A,B in that order. if prog F is detached, the execution is E,D,A,B if prog F and D are detached, the execution is E,A,B if prog F, E and D are detached, the execution is C,A,B Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/cgroup_helpers.c | 4 +- samples/bpf/test_cgrp2_attach2.c | 188 ++++++++++++++++++++++++++++++- 2 files changed, 185 insertions(+), 7 deletions(-) diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c index 9d1be9426401..88bdcf4b1670 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/samples/bpf/cgroup_helpers.c @@ -56,7 +56,7 @@ int setup_cgroup_environment(void) return 1; } - if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { log_err("mount cgroup2"); return 1; } @@ -163,7 +163,7 @@ int create_and_get_cgroup(char *path) format_cgroup_path(cgroup_path, path); if (mkdir(cgroup_path, 0777) && errno != EEXIST) { - log_err("mkdiring cgroup"); + log_err("mkdiring cgroup %s .. %s", path, cgroup_path); return 0; } diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 3049b1f26267..9a9f6836e5e9 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -30,7 +30,7 @@ #define FOO "/foo" #define BAR "/foo/bar/" -#define PING_CMD "ping -c1 -w1 127.0.0.1" +#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null" char bpf_log_buf[BPF_LOG_BUF_SIZE]; @@ -55,8 +55,7 @@ static int prog_load(int verdict) return ret; } - -int main(int argc, char **argv) +static int test_foo_bar(void) { int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; @@ -189,8 +188,187 @@ out: close(bar); cleanup_cgroup_environment(); if (!rc) - printf("PASS\n"); + printf("### override:PASS\n"); else - printf("FAIL\n"); + printf("### override:FAIL\n"); return rc; } + +static int map_fd = -1; + +static int prog_load_cnt(int verdict, int val) +{ + if (map_fd < 0) + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + struct bpf_insn prog[] = { + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + if (ret < 0) { + log_err("Loading program"); + printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); + return 0; + } + return ret; +} + + +static int test_multiprog(void) +{ + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; + int drop_prog, allow_prog[6] = {}, rc = 0; + unsigned long long value; + int i = 0; + + for (i = 0; i < 6; i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (!allow_prog[i]) + goto err; + } + drop_prog = prog_load_cnt(0, 1); + if (!drop_prog) + goto err; + + if (setup_cgroup_environment()) + goto err; + + cg1 = create_and_get_cgroup("/cg1"); + if (!cg1) + goto err; + cg2 = create_and_get_cgroup("/cg1/cg2"); + if (!cg2) + goto err; + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); + if (!cg3) + goto err; + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); + if (!cg4) + goto err; + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); + if (!cg5) + goto err; + + if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) + goto err; + + if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog to cg1"); + goto err; + } + if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Unexpected success attaching the same prog to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog2 to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to cg2"); + goto err; + } + if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog to cg3"); + goto err; + } + if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to cg4"); + goto err; + } + if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching prog to cg5"); + goto err; + } + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 32); + + /* detach bottom program and ping again */ + if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg5"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 16); + + /* detach 3rd from bottom program and ping again */ + errno = 0; + if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Unexpected success on detach from cg3"); + goto err; + } + if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching from cg3"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 16); + + /* detach 2nd from bottom program and ping again */ + if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg4"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 4); + goto out; +err: + rc = 1; + +out: + for (i = 0; i < 6; i++) + if (allow_prog[i] > 0) + close(allow_prog[i]); + close(cg1); + close(cg2); + close(cg3); + close(cg4); + close(cg5); + cleanup_cgroup_environment(); + if (!rc) + printf("### multi:PASS\n"); + else + printf("### multi:FAIL\n"); + return rc; +} + +int main(int argc, char **argv) +{ + int rc = 0; + + rc = test_foo_bar(); + if (rc) + return rc; + + return test_multiprog(); +} From defd9c476fa6b01b4eb5450452bfd202138decb7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:26 -0700 Subject: [PATCH 0539/2177] libbpf: sync bpf.h tools/include/uapi/linux/bpf.h got out of sync with actual kernel header. Update it. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6d2137b4cf38..cb2b9f95160a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -143,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -175,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -253,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: From 5d0cbf9b6c11964bf2f4041a5b0b6f81cb169736 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:27 -0700 Subject: [PATCH 0540/2177] libbpf: add support for BPF_PROG_QUERY add support for BPF_PROG_QUERY command to libbpf Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 20 ++++++++++++++++++++ tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d4b6ba8292ee..5128677e4117 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -303,6 +303,26 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.query.target_fd = target_fd; + attr.query.attach_type = type; + attr.query.query_flags = query_flags; + attr.query.prog_cnt = *prog_cnt; + attr.query.prog_ids = ptr_to_u64(prog_ids); + + ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) + *attach_flags = attr.query.attach_flags; + *prog_cnt = attr.query.prog_cnt; + return ret; +} + int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index afd64727c9cf..6534889e2b2f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -75,5 +75,6 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); int bpf_prog_get_fd_by_id(__u32 id); int bpf_map_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); - +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); #endif From dfc069998ebb010f910dfec379dab4f44d331980 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:28 -0700 Subject: [PATCH 0541/2177] samples/bpf: use bpf_prog_query() interface use BPF_PROG_QUERY command to strengthen test coverage Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/test_cgrp2_attach2.c | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 9a9f6836e5e9..3e8232cc04a8 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -236,6 +236,7 @@ static int prog_load_cnt(int verdict, int val) static int test_multiprog(void) { + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; int drop_prog, allow_prog[6] = {}, rc = 0; unsigned long long value; @@ -304,6 +305,32 @@ static int test_multiprog(void) assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); assert(value == 1 + 2 + 8 + 32); + /* query the number of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + NULL, NULL, &prog_cnt) == 0); + assert(prog_cnt == 4); + /* retrieve prog_ids of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 4); + assert(attach_flags == 0); + saved_prog_id = prog_ids[0]; + /* check enospc handling */ + prog_ids[0] = 0; + prog_cnt = 2; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == -1 && + errno == ENOSPC); + assert(prog_cnt == 4); + /* check that prog_ids are returned even when buffer is too small */ + assert(prog_ids[0] == saved_prog_id); + /* retrieve prog_id of single attached prog in cg5 */ + prog_ids[0] = 0; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 1); + assert(prog_ids[0] == saved_prog_id); + /* detach bottom program and ping again */ if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { log_err("Detaching prog from cg5"); @@ -341,6 +368,15 @@ static int test_multiprog(void) assert(system(PING_CMD) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); assert(value == 1 + 2 + 4); + + prog_cnt = 4; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 3); + assert(attach_flags == 0); + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 0); goto out; err: rc = 1; From 6621dd29eb9b5e6774ec7a9a75161352fdea47fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 3 Oct 2017 13:53:23 +0200 Subject: [PATCH 0542/2177] dev: advertise the new nsid when the netns iface changes x-netns interfaces are bound to two netns: the link netns and the upper netns. Usually, this kind of interfaces is created in the link netns and then moved to the upper netns. At the end, the interface is visible only in the upper netns. The link nsid is advertised via netlink in the upper netns, thus the user always knows where is the link part. There is no such mechanism in the link netns. When the interface is moved to another netns, the user cannot "follow" it. This patch adds a new netlink attribute which helps to follow an interface which moves to another netns. When the interface is unregistered, the new nsid is advertised. If the interface is a x-netns interface (ie rtnl_link_ops->get_link_net is defined), the nsid is allocated if needed. CC: Jason A. Donenfeld Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +++- include/uapi/linux/if_link.h | 1 + net/core/dev.c | 11 ++++++++--- net/core/rtnetlink.c | 31 ++++++++++++++++++++++--------- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index dea59c8eec54..1251638e60d3 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -17,9 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, - gfp_t flags); + gfp_t flags, int *new_nsid); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ea87bd708ee9..cd580fc0e58f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -158,6 +158,7 @@ enum { IFLA_PAD, IFLA_XDP, IFLA_EVENT, + IFLA_NEW_NETNSID, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 454f05441546..bffc75429184 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -7204,7 +7205,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL); + GFP_KERNEL, NULL); /* * Flush the unicast and multicast chains @@ -8291,7 +8292,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int err; + int err, new_nsid; ASSERT_RTNL(); @@ -8347,7 +8348,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) + new_nsid = peernet2id_alloc(dev_net(dev), net); + else + new_nsid = peernet2id(dev_net(dev), net); + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); /* * Flush the unicast and multicast chains diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3fb1ca33cba4..1ee98b1369d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -915,6 +915,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1384,7 +1385,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event) + u32 event, int *new_nsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1472,6 +1473,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_link_netnsid(skb, dev)) goto nla_put_failure; + if (new_nsid && + nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) + goto nla_put_failure; + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; @@ -1701,7 +1706,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0); + ext_filter_mask, 0, NULL); if (err < 0) { if (likely(skb->len)) @@ -2808,7 +2813,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2893,7 +2898,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, - u32 event, gfp_t flags) + u32 event, gfp_t flags, int *new_nsid) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2904,7 +2909,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, + new_nsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2927,14 +2933,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags) + gfp_t flags, int *new_nsid) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } @@ -2942,10 +2948,17 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags) { - rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); } EXPORT_SYMBOL(rtmsg_ifinfo); +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, + new_nsid); +} + static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u16 vid, u32 pid, u32 seq, @@ -4321,7 +4334,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL); + GFP_KERNEL, NULL); break; default: break; From 51d0c04795a4b5d9a188336884887a9d394a94b0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:45 -0700 Subject: [PATCH 0543/2177] net: Add extack to netdev_notifier_info Add netlink_ext_ack to netdev_notifier_info to allow notifier handlers to return errors to userspace. Clean up the initialization in dev.c such that extack is easily added in subsequent patches where relevant. Specifically, remove the init call in call_netdevice_notifiers_info and have callers initalize on stack when info is declared. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++- net/core/dev.c | 77 +++++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 32 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d04424cfffba..05fcaba4b0d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2309,7 +2309,8 @@ int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); struct netdev_notifier_info { - struct net_device *dev; + struct net_device *dev; + struct netlink_ext_ack *extack; }; struct netdev_notifier_change_info { @@ -2334,6 +2335,7 @@ static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { info->dev = dev; + info->extack = NULL; } static inline struct net_device * @@ -2342,6 +2344,12 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) return info->dev; } +static inline struct netlink_ext_ack * +netdev_notifier_info_to_extack(const struct netdev_notifier_info *info) +{ + return info->extack; +} + int call_netdevice_notifiers(unsigned long val, struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index bffc75429184..e27a6bc0ac4d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -163,7 +163,6 @@ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info); static struct napi_struct *napi_by_id(unsigned int napi_id); @@ -1339,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info.dev = dev, + }; - change_info.flags_changed = 0; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } @@ -1563,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro); static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - netdev_notifier_info_init(&info, dev); return nb->notifier_call(nb, val, &info); } @@ -1690,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); */ static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info) { ASSERT_RTNL(); - netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -1709,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val, int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - return call_netdevice_notifiers_info(val, dev, &info); + return call_netdevice_notifiers_info(val, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -6278,7 +6279,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .master = master, + .linking = true, + .upper_info = upper_info, + }; int ret = 0; ASSERT_RTNL(); @@ -6296,12 +6305,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - changeupper_info.upper_dev = upper_dev; - changeupper_info.master = master; - changeupper_info.linking = true; - changeupper_info.upper_info = upper_info; - - ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6312,7 +6316,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6376,20 +6380,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .linking = false, + }; ASSERT_RTNL(); - changeupper_info.upper_dev = upper_dev; changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; - changeupper_info.linking = false; - call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -6405,11 +6413,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink); void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info info; + struct netdev_notifier_bonding_info info = { + .info.dev = dev, + }; memcpy(&info.bonding_info, bonding_info, sizeof(struct netdev_bonding_info)); - call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, &info.info); } EXPORT_SYMBOL(netdev_bonding_info_change); @@ -6535,11 +6545,13 @@ EXPORT_SYMBOL(dev_get_nest_level); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info) { - struct netdev_notifier_changelowerstate_info changelowerstate_info; + struct netdev_notifier_changelowerstate_info changelowerstate_info = { + .info.dev = lower_dev, + }; ASSERT_RTNL(); changelowerstate_info.lower_state_info = lower_state_info; - call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, &changelowerstate_info.info); } EXPORT_SYMBOL(netdev_lower_state_changed); @@ -6830,11 +6842,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, if (dev->flags & IFF_UP && (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info = { + .dev = dev, + }, + .flags_changed = changes, + }; - change_info.flags_changed = changes; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, - &change_info.info); + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); } } From 33eaf2a6eb48ebf00374aaaf4b1b43f9950dcbe4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:46 -0700 Subject: [PATCH 0544/2177] net: Add extack to ndo_add_slave Pass extack to do_set_master and down to ndo_add_slave Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++-- drivers/net/bonding/bond_options.c | 2 +- drivers/net/team/team.c | 3 ++- drivers/net/vrf.c | 3 ++- include/linux/netdevice.h | 3 ++- include/net/bonding.h | 3 ++- net/batman-adv/soft-interface.c | 3 ++- net/bridge/br_device.c | 3 ++- net/core/rtnetlink.c | 10 ++++++---- 9 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b19dc033fb36..78feb94a36db 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1328,7 +1328,8 @@ void bond_lower_state_changed(struct slave *slave) } /* enslave device to bond device */ -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); const struct net_device_ops *slave_ops = slave_dev->netdev_ops; @@ -3492,7 +3493,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd switch (cmd) { case BOND_ENSLAVE_OLD: case SIOCBONDENSLAVE: - res = bond_enslave(bond_dev, slave_dev); + res = bond_enslave(bond_dev, slave_dev, NULL); break; case BOND_RELEASE_OLD: case SIOCBONDRELEASE: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 5931aa2fe997..8a9b085c2a98 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1383,7 +1383,7 @@ static int bond_option_slaves_set(struct bonding *bond, switch (command[0]) { case '+': netdev_dbg(bond->dev, "Adding slave %s\n", dev->name); - ret = bond_enslave(bond->dev, dev); + ret = bond_enslave(bond->dev, dev, NULL); break; case '-': diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ae53e899259f..4359d45aa131 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1914,7 +1914,8 @@ static int team_netpoll_setup(struct net_device *dev, } #endif -static int team_add_slave(struct net_device *dev, struct net_device *port_dev) +static int team_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { struct team *team = netdev_priv(dev); int err; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cc18b7b11612..4a082ef53533 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -788,7 +788,8 @@ err: return ret; } -static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) +static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05fcaba4b0d9..368a5064a487 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1246,7 +1246,8 @@ struct net_device_ops { u32 flow_id); #endif int (*ndo_add_slave)(struct net_device *dev, - struct net_device *slave_dev); + struct net_device *slave_dev, + struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); netdev_features_t (*ndo_fix_features)(struct net_device *dev, diff --git a/include/net/bonding.h b/include/net/bonding.h index b2e68657a216..2860cc66c2bb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -596,7 +596,8 @@ void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c2c986746d0b..e7d5fbb6ad53 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -867,7 +867,8 @@ free_bat_counters: * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_add(struct net_device *dev, - struct net_device *slave_dev) + struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; struct net *net = dev_net(dev); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f6b6a92f1c48..cb0131d70ab1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -320,7 +320,8 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif -static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee98b1369d5..c5ee429bcce9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1957,7 +1957,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } -static int do_set_master(struct net_device *dev, int ifindex) +static int do_set_master(struct net_device *dev, int ifindex, + struct netlink_ext_ack *extack) { struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; @@ -1982,7 +1983,7 @@ static int do_set_master(struct net_device *dev, int ifindex) return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(upper_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev, extack); if (err) return err; } else { @@ -2115,7 +2116,7 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2753,7 +2754,8 @@ replay: goto out_unregister; } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), + extack); if (err) goto out_unregister; } From 42ab19ee90292993370a30ad242599d75a3b749e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:47 -0700 Subject: [PATCH 0545/2177] net: Add extack to upper device linking Add extack arg to netdev_upper_dev_link and netdev_master_upper_dev_link Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++--- .../net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macsec.c | 2 +- drivers/net/macvlan.c | 7 ++++--- drivers/net/macvtap.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/usb/qmi_wwan.c | 2 +- drivers/net/vrf.c | 7 ++++--- include/linux/if_macvlan.h | 3 ++- include/linux/netdevice.h | 6 ++++-- net/8021q/vlan.c | 6 +++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_netlink.c | 2 +- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 15 ++++++++++----- net/openvswitch/vport-netdev.c | 3 ++- 19 files changed, 44 insertions(+), 32 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 78feb94a36db..bc92307c2082 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1217,14 +1217,15 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) } } -static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave) +static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, + struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; int err; lag_upper_info.tx_type = bond_lag_tx_type(bond); err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, - &lag_upper_info); + &lag_upper_info, extack); if (err) return err; rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); @@ -1710,7 +1711,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_detach; } - res = bond_master_upper_dev_link(bond, new_slave); + res = bond_master_upper_dev_link(bond, new_slave, extack); if (res) { netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 98f22551eb45..1af326a60cbb 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -178,7 +178,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; - err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack); if (err) goto err2; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f300ae61c6c6..dfb986421ec6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1748,7 +1748,7 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto rx_handler_failed; } - ret = netdev_upper_dev_link(vf_netdev, ndev); + ret = netdev_upper_dev_link(vf_netdev, ndev, NULL); if (ret != 0) { netdev_err(vf_netdev, "can not set master device %s (err = %d)\n", diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index c74893c1e620..57c3856bab05 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -584,7 +584,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, if (err < 0) goto remove_ida; - err = netdev_upper_dev_link(phy_dev, dev); + err = netdev_upper_dev_link(phy_dev, dev, extack); if (err) { goto unregister_netdev; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 98e4deaa3a6a..ccbe4eaffe4d 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3244,7 +3244,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, &macsec_netdev_addr_lock_key, macsec_get_nest_level(dev)); - err = netdev_upper_dev_link(real_dev, dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1ffe77e95d46..858bd66511a2 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1344,7 +1344,8 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, } int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port; @@ -1433,7 +1434,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; - err = netdev_upper_dev_link(lowerdev, dev); + err = netdev_upper_dev_link(lowerdev, dev, extack); if (err) goto unregister_netdev; @@ -1456,7 +1457,7 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - return macvlan_common_newlink(src_net, dev, tb, data); + return macvlan_common_newlink(src_net, dev, tb, data, extack); } void macvlan_dellink(struct net_device *dev, struct list_head *head) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index c2d0ea2fb019..f62aea2fcfa9 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -105,7 +105,7 @@ static int macvtap_newlink(struct net *src_net, struct net_device *dev, /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ - err = macvlan_common_newlink(src_net, dev, tb, data); + err = macvlan_common_newlink(src_net, dev, tb, data, extack); if (err) { netdev_rx_handler_unregister(dev); return err; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 4359d45aa131..a468439969df 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1112,7 +1112,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port) lag_upper_info.tx_type = team->mode->lag_tx_type; err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, - &lag_upper_info); + &lag_upper_info, NULL); if (err) return err; port->dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8c3733608271..db7279d5b250 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -221,7 +221,7 @@ static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) /* Account for reference in struct qmimux_priv_priv */ dev_hold(real_dev); - err = netdev_upper_dev_link(real_dev, new_dev); + err = netdev_upper_dev_link(real_dev, new_dev, NULL); if (err) goto out_unregister_netdev; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4a082ef53533..77d0655a0250 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -764,7 +764,8 @@ static void cycle_netdev(struct net_device *dev) } } -static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) +static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { int ret; @@ -775,7 +776,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) return -EOPNOTSUPP; port_dev->priv_flags |= IFF_L3MDEV_SLAVE; - ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); if (ret < 0) goto err; @@ -794,7 +795,7 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; - return do_vrf_add_slave(dev, port_dev); + return do_vrf_add_slave(dev, port_dev, extack); } /* inverse of do_vrf_add_slave */ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index c9ec1343d187..10e319f41fb1 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -72,7 +72,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]); + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack); extern void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 368a5064a487..31bb3010c69b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3919,10 +3919,12 @@ void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); -int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); +int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, + struct netlink_ext_ack *extack); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info); + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9649579b5b9f..71c3e045505b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev, return 0; } -int register_vlan_dev(struct net_device *dev) +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; @@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - err = netdev_upper_dev_link(real_dev, dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err) goto out_unregister_netdev; @@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) vlan->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; - err = register_vlan_dev(new_dev); + err = register_vlan_dev(new_dev, NULL); if (err < 0) goto out_free_newdev; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index df8bd65dd370..94f8eed9f9b3 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -107,7 +107,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id); void vlan_setup(struct net_device *dev); -int register_vlan_dev(struct net_device *dev); +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..6e7c5a6a7930 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -160,7 +160,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - return register_vlan_dev(dev); + return register_vlan_dev(dev, extack); } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..f7b413b9297e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL, NULL); + soft_iface, NULL, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f3aef22931ab..0a3fd727048d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -540,7 +540,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index e27a6bc0ac4d..fcddccb6be41 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6277,11 +6277,13 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { .dev = dev, + .extack = extack, }, .upper_dev = upper_dev, .master = master, @@ -6341,9 +6343,11 @@ rollback: * returns zero. */ int netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, + NULL, NULL, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -6362,10 +6366,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info); + upper_priv, upper_info, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 0389398fa4ab..2e5e7a41d8ef 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL, NULL); + get_dpdev(vport->dp), + NULL, NULL, NULL); if (err) goto error_unlock; From de3baa3ed72f09f8d2ba797645d052cd5f5de27d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:48 -0700 Subject: [PATCH 0546/2177] net: vrf: Add extack messages for enslave errors Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 77d0655a0250..0b54f553228e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -772,8 +772,11 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, /* do not allow loopback device to be enslaved to a VRF. * The vrf device acts as the loopback for the vrf. */ - if (port_dev == dev_net(dev)->loopback_dev) + if (port_dev == dev_net(dev)->loopback_dev) { + NL_SET_ERR_MSG(extack, + "Can not enslave loopback device to a VRF"); return -EOPNOTSUPP; + } port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); @@ -792,7 +795,13 @@ err: static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { - if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) + if (netif_is_l3_master(port_dev)) { + NL_SET_ERR_MSG(extack, + "Can not enslave an L3 master device to a VRF"); + return -EINVAL; + } + + if (netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev, extack); From 759088bda21f4887c645579418e8b478eb570d78 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:49 -0700 Subject: [PATCH 0547/2177] net: bonding: Add extack messages for some enslave failures A number of bond_enslave errors are logged using the netdev_err API. Return those messages to userspace via the extack facility. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bc92307c2082..172eeeb68152 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1348,12 +1348,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) { + NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved"); netdev_err(bond_dev, "Error: Device is in use and cannot be enslaved\n"); return -EBUSY; } if (bond_dev == slave_dev) { + NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself."); netdev_err(bond_dev, "cannot enslave bond to itself.\n"); return -EPERM; } @@ -1364,6 +1366,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); if (vlan_uses_dev(bond_dev)) { + NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond"); netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n", slave_dev->name, bond_dev->name); return -EPERM; @@ -1383,6 +1386,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * enslaving it; the old ifenslave will not. */ if (slave_dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Device can not be enslaved while up"); netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n", slave_dev->name); return -EPERM; @@ -1423,6 +1427,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_dev); } } else if (bond_dev->type != slave_dev->type) { + NL_SET_ERR_MSG(extack, "Device type is different from other slaves"); netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n", slave_dev->name, slave_dev->type, bond_dev->type); return -EINVAL; @@ -1430,6 +1435,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (slave_dev->type == ARPHRD_INFINIBAND && BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves"); netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n", slave_dev->type); res = -EOPNOTSUPP; @@ -1445,6 +1451,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond->params.fail_over_mac = BOND_FOM_ACTIVE; netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n"); } else { + NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n"); res = -EOPNOTSUPP; goto err_undo_flags; From ca752be006013ac6f19721280f44c91ef07ad3d1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:50 -0700 Subject: [PATCH 0548/2177] net: bridge: Pass extack to down to netdev_master_upper_dev_link Pass extack arg to br_add_if. Add messages for a couple of failures and pass arg to netdev_master_upper_dev_link. Signed-off-by: David Ahern Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 15 +++++++++++---- net/bridge/br_ioctl.c | 2 +- net/bridge/br_private.h | 3 ++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index cb0131d70ab1..7acb77c9bd65 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -326,7 +326,7 @@ static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, { struct net_bridge *br = netdev_priv(dev); - return br_add_if(br, slave_dev); + return br_add_if(br, slave_dev, extack); } static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 0a3fd727048d..59a74a414e20 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -480,7 +480,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br, } /* called with RTNL */ -int br_add_if(struct net_bridge *br, struct net_device *dev) +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack) { struct net_bridge_port *p; int err = 0; @@ -500,16 +501,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return -EINVAL; /* No bridging of bridges */ - if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) { + NL_SET_ERR_MSG(extack, + "Can not enslave a bridge to a bridge"); return -ELOOP; + } /* Device is already being bridged */ if (br_port_exists(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ - if (dev->priv_flags & IFF_DONT_BRIDGE) + if (dev->priv_flags & IFF_DONT_BRIDGE) { + NL_SET_ERR_MSG(extack, + "Device does not allow enslaving to a bridge"); return -EOPNOTSUPP; + } p = new_nbp(br, dev); if (IS_ERR(p)) @@ -540,7 +547,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack); if (err) goto err5; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 66cd98772051..8f29103935a3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) return -EINVAL; if (isadd) - ret = br_add_if(br, dev); + ret = br_add_if(br, dev, NULL); else ret = br_del_if(br, dev); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 020c709a017f..ab4df24f7bba 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -566,7 +566,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, void br_port_carrier_check(struct net_bridge_port *p); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); -int br_add_if(struct net_bridge *br, struct net_device *dev); +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack); int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, From e58376e1df2aaffbf12753959142a50f824c46ea Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:51 -0700 Subject: [PATCH 0549/2177] mlxsw: spectrum: Add extack messages for enslave failures mlxsw fails device enslavement for a number of reasons. Use the extack facility to return an error message to the user stating why the enslave is failing. Messages are prefixed with "spectrum" so users know it is a constraint imposed by the hardware driver. For example: $ ip li add br0.11 link br0 type vlan id 11 $ ip li set swp11 master br0 Error: spectrum: Enslaving a port to a device that already has an upper device is not supported. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3adf237c951a..5cd4df08ce97 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4019,14 +4019,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, - struct netdev_lag_upper_info *lag_upper_info) + struct netdev_lag_upper_info *lag_upper_info, + struct netlink_ext_ack *extack) { u16 lag_id; - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { + NL_SET_ERR_MSG(extack, + "spectrum: Exceeded number of supported LAG devices"); return false; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + } + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG(extack, + "spectrum: LAG device using unsupported Tx type"); return false; + } return true; } @@ -4231,6 +4238,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; + struct netlink_ext_ack *extack; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err = 0; @@ -4238,6 +4246,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; + extack = netdev_notifier_info_to_extack(&info->info); switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4245,25 +4254,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && - !netif_is_ovs_master(upper_dev)) + !netif_is_ovs_master(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Unknown upper device type"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, - info->upper_info)) + info->upper_info, extack)) return -EINVAL; - if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is a LAG master and this device has a VLAN"); return -EINVAL; + } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && - !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on a LAG port"); return -EINVAL; - if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + } + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is an OVS master and this device has a VLAN"); return -EINVAL; - if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + } + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on an OVS port"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; From a92bb546cff0b31d96d5919ebfeda9c678756b42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Oct 2017 20:10:03 -0700 Subject: [PATCH 0550/2177] tools: rename tools/net directory to tools/bpf We currently only have BPF tools in the tools/net directory. We are about to add more BPF tools there, not necessarily networking related, rename the directory and related Makefile targets to bpf. Suggested-by: Daniel Borkmann Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- tools/Makefile | 14 +++++++------- tools/{net => bpf}/Makefile | 0 tools/{net => bpf}/bpf_asm.c | 0 tools/{net => bpf}/bpf_dbg.c | 0 tools/{net => bpf}/bpf_exp.l | 0 tools/{net => bpf}/bpf_exp.y | 0 tools/{net => bpf}/bpf_jit_disasm.c | 0 8 files changed, 8 insertions(+), 9 deletions(-) rename tools/{net => bpf}/Makefile (100%) rename tools/{net => bpf}/bpf_asm.c (100%) rename tools/{net => bpf}/bpf_dbg.c (100%) rename tools/{net => bpf}/bpf_exp.l (100%) rename tools/{net => bpf}/bpf_exp.y (100%) rename tools/{net => bpf}/bpf_jit_disasm.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 5231392cf4bd..004816a585b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2725,7 +2725,7 @@ F: net/core/filter.c F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ -F: tools/net/bpf* +F: tools/bpf/ F: tools/testing/selftests/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER @@ -9416,7 +9416,6 @@ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: include/uapi/linux/net_namespace.h -F: tools/net/ F: tools/testing/selftests/net/ F: lib/random32.c diff --git a/tools/Makefile b/tools/Makefile index 9dfede37c8ff..df6fcb293fbc 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -19,7 +19,7 @@ help: @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' @echo ' liblockdep - user-space wrapper for kernel locking-validator' - @echo ' net - misc networking tools' + @echo ' bpf - misc BPF tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @echo ' spi - spi tools' @@ -57,7 +57,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm net iio gpio objtool leds: FORCE +cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds: FORCE $(call descend,$@) liblockdep: FORCE @@ -91,7 +91,7 @@ kvm_stat: FORCE all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests spi turbostat usb \ - virtio vm net x86_energy_perf_policy \ + virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat acpi_install: @@ -100,7 +100,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -124,7 +124,7 @@ kvm_stat_install: install: acpi_install cgroup_install cpupower_install gpio_install \ hv_install firewire_install iio_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ - virtio_install vm_install net_install x86_energy_perf_policy_install \ + virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install acpi_clean: @@ -133,7 +133,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean leds_clean: +cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -169,7 +169,7 @@ build_clean: clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ - vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ + vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean diff --git a/tools/net/Makefile b/tools/bpf/Makefile similarity index 100% rename from tools/net/Makefile rename to tools/bpf/Makefile diff --git a/tools/net/bpf_asm.c b/tools/bpf/bpf_asm.c similarity index 100% rename from tools/net/bpf_asm.c rename to tools/bpf/bpf_asm.c diff --git a/tools/net/bpf_dbg.c b/tools/bpf/bpf_dbg.c similarity index 100% rename from tools/net/bpf_dbg.c rename to tools/bpf/bpf_dbg.c diff --git a/tools/net/bpf_exp.l b/tools/bpf/bpf_exp.l similarity index 100% rename from tools/net/bpf_exp.l rename to tools/bpf/bpf_exp.l diff --git a/tools/net/bpf_exp.y b/tools/bpf/bpf_exp.y similarity index 100% rename from tools/net/bpf_exp.y rename to tools/bpf/bpf_exp.y diff --git a/tools/net/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c similarity index 100% rename from tools/net/bpf_jit_disasm.c rename to tools/bpf/bpf_jit_disasm.c From 71bb428fe2c19512ac671d5ee16ef3e73e1b49a8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Oct 2017 20:10:04 -0700 Subject: [PATCH 0551/2177] tools: bpf: add bpftool Add a simple tool for querying and updating BPF objects on the system. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/bpf/Makefile | 18 +- tools/bpf/bpftool/Makefile | 80 ++++ tools/bpf/bpftool/common.c | 216 ++++++++++ tools/bpf/bpftool/jit_disasm.c | 87 ++++ tools/bpf/bpftool/main.c | 212 ++++++++++ tools/bpf/bpftool/main.h | 99 +++++ tools/bpf/bpftool/map.c | 744 +++++++++++++++++++++++++++++++++ tools/bpf/bpftool/prog.c | 456 ++++++++++++++++++++ 8 files changed, 1909 insertions(+), 3 deletions(-) create mode 100644 tools/bpf/bpftool/Makefile create mode 100644 tools/bpf/bpftool/common.c create mode 100644 tools/bpf/bpftool/jit_disasm.c create mode 100644 tools/bpf/bpftool/main.c create mode 100644 tools/bpf/bpftool/main.h create mode 100644 tools/bpf/bpftool/map.c create mode 100644 tools/bpf/bpftool/prog.c diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index ddf888010652..325a35e1c28e 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -3,6 +3,7 @@ prefix = /usr CC = gcc LEX = flex YACC = bison +MAKE = make CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include @@ -13,7 +14,7 @@ CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include %.lex.c: %.l $(LEX) -o $@ $< -all : bpf_jit_disasm bpf_dbg bpf_asm +all: bpf_jit_disasm bpf_dbg bpf_asm bpftool bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl @@ -26,10 +27,21 @@ bpf_asm : LDLIBS = bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o bpf_exp.lex.o : bpf_exp.yacc.c -clean : +clean: bpftool_clean rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.* -install : +install: bpftool_install install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm install bpf_dbg $(prefix)/bin/bpf_dbg install bpf_asm $(prefix)/bin/bpf_asm + +bpftool: + $(MAKE) -C bpftool + +bpftool_install: + $(MAKE) -C bpftool install + +bpftool_clean: + $(MAKE) -C bpftool clean + +.PHONY: bpftool FORCE diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile new file mode 100644 index 000000000000..a7151f47fb40 --- /dev/null +++ b/tools/bpf/bpftool/Makefile @@ -0,0 +1,80 @@ +include ../../scripts/Makefile.include + +include ../../scripts/utilities.mak + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH +endif + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +BPF_DIR = $(srctree)/tools/lib/bpf/ + +ifneq ($(OUTPUT),) + BPF_PATH=$(OUTPUT) +else + BPF_PATH=$(BPF_DIR) +endif + +LIBBPF = $(BPF_PATH)libbpf.a + +$(LIBBPF): FORCE + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) + +$(LIBBPF)-clean: + $(call QUIET_CLEAN, libbpf) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null + +prefix = /usr + +CC = gcc + +CFLAGS += -O2 +CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow +CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf +LIBS = -lelf -lbfd -lopcodes $(LIBBPF) + +include $(wildcard *.d) + +all: $(OUTPUT)bpftool + +SRCS=$(wildcard *.c) +OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) + +$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +$(OUTPUT)%.o: %.c + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< + +clean: $(LIBBPF)-clean + $(call QUIET_CLEAN, bpftool) + $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + +install: + install $(OUTPUT)bpftool $(prefix)/sbin/bpftool + +FORCE: + +.PHONY: all clean FORCE +.DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c new file mode 100644 index 000000000000..df8396a0c400 --- /dev/null +++ b/tools/bpf/bpftool/common.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +static bool is_bpffs(char *path) +{ + struct statfs st_fs; + + if (statfs(path, &st_fs) < 0) + return false; + + return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; +} + +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +{ + enum bpf_obj_type type; + int fd; + + fd = bpf_obj_get(path); + if (fd < 0) { + err("bpf obj get (%s): %s\n", path, + errno == EACCES && !is_bpffs(dirname(path)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); + return -1; + } + + type = get_fd_type(fd); + if (type < 0) { + close(fd); + return type; + } + if (type != exp_type) { + err("incorrect object type: %s\n", get_fd_type_name(type)); + close(fd); + return -1; + } + + return fd; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; + int err; + int fd; + + if (!is_prefix(*argv, "id")) { + err("expected 'id' got %s\n", *argv); + return -1; + } + NEXT_ARG(); + + id = strtoul(*argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", *argv); + return -1; + } + NEXT_ARG(); + + if (argc != 1) + usage(); + + fd = get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", id, strerror(errno)); + return -1; + } + + err = bpf_obj_pin(fd, *argv); + close(fd); + if (err) { + err("can't pin the object (%s): %s\n", *argv, + errno == EACCES && !is_bpffs(dirname(*argv)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); + return -1; + } + + return 0; +} + +const char *get_fd_type_name(enum bpf_obj_type type) +{ + static const char * const names[] = { + [BPF_OBJ_UNKNOWN] = "unknown", + [BPF_OBJ_PROG] = "prog", + [BPF_OBJ_MAP] = "map", + }; + + if (type < 0 || type >= ARRAY_SIZE(names) || !names[type]) + return names[BPF_OBJ_UNKNOWN]; + + return names[type]; +} + +int get_fd_type(int fd) +{ + char path[PATH_MAX]; + char buf[512]; + ssize_t n; + + snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + + n = readlink(path, buf, sizeof(buf)); + if (n < 0) { + err("can't read link type: %s\n", strerror(errno)); + return -1; + } + if (n == sizeof(path)) { + err("can't read link type: path too long!\n"); + return -1; + } + + if (strstr(buf, "bpf-map")) + return BPF_OBJ_MAP; + else if (strstr(buf, "bpf-prog")) + return BPF_OBJ_PROG; + + return BPF_OBJ_UNKNOWN; +} + +char *get_fdinfo(int fd, const char *key) +{ + char path[PATH_MAX]; + char *line = NULL; + size_t line_n = 0; + ssize_t n; + FILE *fdi; + + snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd); + + fdi = fopen(path, "r"); + if (!fdi) { + err("can't open fdinfo: %s\n", strerror(errno)); + return NULL; + } + + while ((n = getline(&line, &line_n, fdi))) { + char *value; + int len; + + if (!strstr(line, key)) + continue; + + fclose(fdi); + + value = strchr(line, '\t'); + if (!value || !value[1]) { + err("malformed fdinfo!?\n"); + free(line); + return NULL; + } + value++; + + len = strlen(value); + memmove(line, value, len); + line[len - 1] = '\0'; + + return line; + } + + err("key '%s' not found in fdinfo\n", key); + free(line); + fclose(fdi); + return NULL; +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c new file mode 100644 index 000000000000..70e480b59e9d --- /dev/null +++ b/tools/bpf/bpftool/jit_disasm.c @@ -0,0 +1,87 @@ +/* + * Based on: + * + * Minimal BPF JIT image disassembler + * + * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for + * debugging or verification purposes. + * + * Copyright 2013 Daniel Borkmann + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void get_exec_path(char *tpath, size_t size) +{ + ssize_t len; + char *path; + + snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); + tpath[size - 1] = 0; + + path = strdup(tpath); + assert(path); + + len = readlink(path, tpath, size - 1); + assert(len > 0); + tpath[len] = 0; + + free(path); +} + +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +{ + disassembler_ftype disassemble; + struct disassemble_info info; + int count, i, pc = 0; + char tpath[256]; + bfd *bfdf; + + if (!len) + return; + + memset(tpath, 0, sizeof(tpath)); + get_exec_path(tpath, sizeof(tpath)); + + bfdf = bfd_openr(tpath, NULL); + assert(bfdf); + assert(bfd_check_format(bfdf, bfd_object)); + + init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + info.buffer = image; + info.buffer_length = len; + + disassemble_init_for_target(&info); + + disassemble = disassembler(bfdf); + assert(disassemble); + + do { + printf("%4x:\t", pc); + + count = disassemble(pc, &info); + + if (opcodes) { + printf("\n\t"); + for (i = 0; i < count; ++i) + printf("%02x ", (uint8_t) image[pc + i]); + } + printf("\n"); + + pc += count; + } while (count > 0 && pc < len); + + bfd_close(bfdf); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c new file mode 100644 index 000000000000..e02d00d6e00b --- /dev/null +++ b/tools/bpf/bpftool/main.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +const char *bin_name; +static int last_argc; +static char **last_argv; +static int (*last_do_help)(int argc, char **argv); + +void usage(void) +{ + last_do_help(last_argc - 1, last_argv + 1); + + exit(-1); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s OBJECT { COMMAND | help }\n" + " %s batch file FILE\n" + "\n" + " OBJECT := { prog | map }\n", + bin_name, bin_name); + + return 0; +} + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)) +{ + unsigned int i; + + last_argc = argc; + last_argv = argv; + last_do_help = help; + + if (argc < 1 && cmds[0].func) + return cmds[0].func(argc, argv); + + for (i = 0; cmds[i].func; i++) + if (is_prefix(*argv, cmds[i].cmd)) + return cmds[i].func(argc - 1, argv + 1); + + help(argc - 1, argv + 1); + + return -1; +} + +bool is_prefix(const char *pfx, const char *str) +{ + if (!pfx) + return false; + if (strlen(str) < strlen(pfx)) + return false; + + return !memcmp(str, pfx, strlen(pfx)); +} + +void print_hex(void *arg, unsigned int n, const char *sep) +{ + unsigned char *data = arg; + unsigned int i; + + for (i = 0; i < n; i++) { + const char *pfx = ""; + + if (!i) + /* nothing */; + else if (!(i % 16)) + printf("\n"); + else if (!(i % 8)) + printf(" "); + else + pfx = sep; + + printf("%s%02hhx", i ? pfx : "", data[i]); + } +} + +static int do_batch(int argc, char **argv); + +static const struct cmd cmds[] = { + { "help", do_help }, + { "batch", do_batch }, + { "prog", do_prog }, + { "map", do_map }, + { 0 } +}; + +static int do_batch(int argc, char **argv) +{ + unsigned int lines = 0; + char *n_argv[4096]; + char buf[65536]; + int n_argc; + FILE *fp; + int err; + + if (argc < 2) { + err("too few parameters for batch\n"); + return -1; + } else if (!is_prefix(*argv, "file")) { + err("expected 'file', got: %s\n", *argv); + return -1; + } else if (argc > 2) { + err("too many parameters for batch\n"); + return -1; + } + NEXT_ARG(); + + fp = fopen(*argv, "r"); + if (!fp) { + err("Can't open file (%s): %s\n", *argv, strerror(errno)); + return -1; + } + + while (fgets(buf, sizeof(buf), fp)) { + if (strlen(buf) == sizeof(buf) - 1) { + errno = E2BIG; + break; + } + + n_argc = 0; + n_argv[n_argc] = strtok(buf, " \t\n"); + + while (n_argv[n_argc]) { + n_argc++; + if (n_argc == ARRAY_SIZE(n_argv)) { + err("line %d has too many arguments, skip\n", + lines); + n_argc = 0; + break; + } + n_argv[n_argc] = strtok(NULL, " \t\n"); + } + + if (!n_argc) + continue; + + err = cmd_select(cmds, n_argc, n_argv, do_help); + if (err) + goto err_close; + + lines++; + } + + if (errno && errno != ENOENT) { + perror("reading batch file failed"); + err = -1; + } else { + info("processed %d lines\n", lines); + err = 0; + } +err_close: + fclose(fp); + + return err; +} + +int main(int argc, char **argv) +{ + bin_name = argv[0]; + NEXT_ARG(); + + bfd_init(); + + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h new file mode 100644 index 000000000000..85d2d7870a58 --- /dev/null +++ b/tools/bpf/bpftool/main.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski */ + +#ifndef __BPF_TOOL_H +#define __BPF_TOOL_H + +#include +#include +#include + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +#define err(msg...) fprintf(stderr, "Error: " msg) +#define warn(msg...) fprintf(stderr, "Warning: " msg) +#define info(msg...) fprintf(stderr, msg) + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +#define min(a, b) \ + ({ typeof(a) _a = (a); typeof(b) _b = (b); _a > _b ? _b : _a; }) +#define max(a, b) \ + ({ typeof(a) _a = (a); typeof(b) _b = (b); _a < _b ? _b : _a; }) + +#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) +#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) +#define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; }) + +#define BPF_TAG_FMT "%02hhx:%02hhx:%02hhx:%02hhx:" \ + "%02hhx:%02hhx:%02hhx:%02hhx" + +#define HELP_SPEC_PROGRAM \ + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" + +enum bpf_obj_type { + BPF_OBJ_UNKNOWN, + BPF_OBJ_PROG, + BPF_OBJ_MAP, +}; + +extern const char *bin_name; + +bool is_prefix(const char *pfx, const char *str); +void print_hex(void *arg, unsigned int n, const char *sep); +void usage(void) __attribute__((noreturn)); + +struct cmd { + const char *cmd; + int (*func)(int argc, char **argv); +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)); + +int get_fd_type(int fd); +const char *get_fd_type_name(enum bpf_obj_type type); +char *get_fdinfo(int fd, const char *key); +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); + +int do_prog(int argc, char **arg); +int do_map(int argc, char **arg); + +int prog_parse_fd(int *argc, char ***argv); + +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); + +#endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c new file mode 100644 index 000000000000..0528a5379e6c --- /dev/null +++ b/tools/bpf/bpftool/map.c @@ -0,0 +1,744 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", +}; + +static unsigned int get_possible_cpus(void) +{ + static unsigned int result; + char buf[128]; + long int n; + char *ptr; + int fd; + + if (result) + return result; + + fd = open("/sys/devices/system/cpu/possible", O_RDONLY); + if (fd < 0) { + err("can't open sysfs possible cpus\n"); + exit(-1); + } + + n = read(fd, buf, sizeof(buf)); + if (n < 2) { + err("can't read sysfs possible cpus\n"); + exit(-1); + } + close(fd); + + if (n == sizeof(buf)) { + err("read sysfs possible cpus overflow\n"); + exit(-1); + } + + ptr = buf; + n = 0; + while (*ptr && *ptr != '\n') { + unsigned int a, b; + + if (sscanf(ptr, "%u-%u", &a, &b) == 2) { + n += b - a + 1; + + ptr = strchr(ptr, '-') + 1; + } else if (sscanf(ptr, "%u", &a) == 1) { + n++; + } else { + assert(0); + } + + while (isdigit(*ptr)) + ptr++; + if (*ptr == ',') + ptr++; + } + + result = n; + + return result; +} + +static bool map_is_per_cpu(__u32 type) +{ + return type == BPF_MAP_TYPE_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_ARRAY || + type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +static bool map_is_map_of_maps(__u32 type) +{ + return type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS; +} + +static bool map_is_map_of_progs(__u32 type) +{ + return type == BPF_MAP_TYPE_PROG_ARRAY; +} + +static void *alloc_value(struct bpf_map_info *info) +{ + if (map_is_per_cpu(info->type)) + return malloc(info->value_size * get_possible_cpus()); + else + return malloc(info->value_size); +} + +static int map_parse_fd(int *argc, char ***argv) +{ + int fd; + + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", **argv); + return -1; + } + NEXT_ARGP(); + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) + err("get map by id (%u): %s\n", id, strerror(errno)); + return fd; + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + return open_obj_pinned_any(path, BPF_OBJ_MAP); + } + + err("expected 'id' or 'pinned', got: '%s'?\n", **argv); + return -1; +} + +static int +map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +{ + int err; + int fd; + + fd = map_parse_fd(argc, argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, info, info_len); + if (err) { + err("can't get map info: %s\n", strerror(errno)); + close(fd); + return err; + } + + return fd; +} + +static void print_entry(struct bpf_map_info *info, unsigned char *key, + unsigned char *value) +{ + if (!map_is_per_cpu(info->type)) { + bool single_line, break_names; + + break_names = info->key_size > 16 || info->value_size > 16; + single_line = info->key_size + info->value_size <= 24 && + !break_names; + + printf("key:%c", break_names ? '\n' : ' '); + print_hex(key, info->key_size, " "); + + printf(single_line ? " " : "\n"); + + printf("value:%c", break_names ? '\n' : ' '); + print_hex(value, info->value_size, " "); + + printf("\n"); + } else { + unsigned int i, n; + + n = get_possible_cpus(); + + printf("key:\n"); + print_hex(key, info->key_size, " "); + printf("\n"); + for (i = 0; i < n; i++) { + printf("value (CPU %02d):%c", + i, info->value_size > 16 ? '\n' : ' '); + print_hex(value + i * info->value_size, + info->value_size, " "); + printf("\n"); + } + } +} + +static char **parse_bytes(char **argv, const char *name, unsigned char *val, + unsigned int n) +{ + unsigned int i = 0; + char *endptr; + + while (i < n && argv[i]) { + val[i] = strtoul(argv[i], &endptr, 0); + if (*endptr) { + err("error parsing byte: %s\n", argv[i]); + break; + } + i++; + } + + if (i != n) { + err("%s expected %d bytes got %d\n", name, n, i); + return NULL; + } + + return argv + i; +} + +static int parse_elem(char **argv, struct bpf_map_info *info, + void *key, void *value, __u32 key_size, __u32 value_size, + __u32 *flags, __u32 **value_fd) +{ + if (!*argv) { + if (!key && !value) + return 0; + err("did not find %s\n", key ? "key" : "value"); + return -1; + } + + if (is_prefix(*argv, "key")) { + if (!key) { + if (key_size) + err("duplicate key\n"); + else + err("unnecessary key\n"); + return -1; + } + + argv = parse_bytes(argv + 1, "key", key, key_size); + if (!argv) + return -1; + + return parse_elem(argv, info, NULL, value, key_size, value_size, + flags, value_fd); + } else if (is_prefix(*argv, "value")) { + int fd; + + if (!value) { + if (value_size) + err("duplicate value\n"); + else + err("unnecessary value\n"); + return -1; + } + + argv++; + + if (map_is_map_of_maps(info->type)) { + int argc = 2; + + if (value_size != 4) { + err("value smaller than 4B for map in map?\n"); + return -1; + } + if (!argv[0] || !argv[1]) { + err("not enough value arguments for map in map\n"); + return -1; + } + + fd = map_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + *value_fd = value; + **value_fd = fd; + } else if (map_is_map_of_progs(info->type)) { + int argc = 2; + + if (value_size != 4) { + err("value smaller than 4B for map of progs?\n"); + return -1; + } + if (!argv[0] || !argv[1]) { + err("not enough value arguments for map of progs\n"); + return -1; + } + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + *value_fd = value; + **value_fd = fd; + } else { + argv = parse_bytes(argv, "value", value, value_size); + if (!argv) + return -1; + } + + return parse_elem(argv, info, key, NULL, key_size, value_size, + flags, NULL); + } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || + is_prefix(*argv, "exist")) { + if (!flags) { + err("flags specified multiple times: %s\n", *argv); + return -1; + } + + if (is_prefix(*argv, "any")) + *flags = BPF_ANY; + else if (is_prefix(*argv, "noexist")) + *flags = BPF_NOEXIST; + else if (is_prefix(*argv, "exist")) + *flags = BPF_EXIST; + + return parse_elem(argv + 1, info, key, value, key_size, + value_size, NULL, value_fd); + } + + err("expected key or value, got: %s\n", *argv); + return -1; +} + +static int show_map_close(int fd, struct bpf_map_info *info) +{ + char *memlock; + + memlock = get_fdinfo(fd, "memlock"); + close(fd); + + printf("%u: ", info->id); + if (info->type < ARRAY_SIZE(map_type_name)) + printf("%s ", map_type_name[info->type]); + else + printf("type %u ", info->type); + + if (*info->name) + printf("name %s ", info->name); + + printf("flags 0x%x\n", info->map_flags); + printf("\tkey %uB value %uB max_entries %u", + info->key_size, info->value_size, info->max_entries); + + if (memlock) + printf(" memlock %sB", memlock); + free(memlock); + + printf("\n"); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 id = 0; + int err; + int fd; + + if (argc == 2) { + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + return show_map_close(fd, &info); + } + + if (argc) + return BAD_ARG(); + + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + err("can't get next map: %s\n", strerror(errno)); + if (errno == EINVAL) + err("kernel too old?\n"); + return -1; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + err("can't get map by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get map info: %s\n", strerror(errno)); + close(fd); + return -1; + } + + show_map_close(fd, &info); + } + + return errno == ENOENT ? 0 : -1; +} + +static int do_dump(int argc, char **argv) +{ + void *key, *value, *prev_key; + unsigned int num_elems = 0; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err; + int fd; + + if (argc != 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { + err("Dumping maps of maps and program maps not supported\n"); + close(fd); + return -1; + } + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed\n"); + err = -1; + goto exit_free; + } + + prev_key = NULL; + while (true) { + err = bpf_map_get_next_key(fd, prev_key, key); + if (err) { + if (errno == ENOENT) + err = 0; + break; + } + + if (!bpf_map_lookup_elem(fd, key, value)) { + print_entry(&info, key, value); + } else { + info("can't lookup element with key: "); + print_hex(key, info.key_size, " "); + printf("\n"); + } + + prev_key = key; + num_elems++; + } + + printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + +static int do_update(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 *value_fd = NULL; + __u32 flags = BPF_ANY; + void *key, *value; + int fd, err; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, value, info.key_size, + info.value_size, &flags, &value_fd); + if (err) + goto exit_free; + + err = bpf_map_update_elem(fd, key, value, flags); + if (err) { + err("update failed: %s\n", strerror(errno)); + goto exit_free; + } + +exit_free: + if (value_fd) + close(*value_fd); + free(key); + free(value); + close(fd); + + return err; +} + +static int do_lookup(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *value; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + value = alloc_value(&info); + if (!key || !value) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + if (err) + goto exit_free; + + err = bpf_map_lookup_elem(fd, key, value); + if (!err) { + print_entry(&info, key, value); + } else if (errno == ENOENT) { + printf("key:\n"); + print_hex(key, info.key_size, " "); + printf("\n\nNot found\n"); + } else { + err("lookup failed: %s\n", strerror(errno)); + } + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + +static int do_getnext(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *nextkey; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + nextkey = malloc(info.key_size); + if (!key || !nextkey) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + if (argc) { + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, + NULL, NULL); + if (err) + goto exit_free; + } else { + free(key); + key = NULL; + } + + err = bpf_map_get_next_key(fd, key, nextkey); + if (err) { + err("can't get next key: %s\n", strerror(errno)); + goto exit_free; + } + + if (key) { + printf("key:\n"); + print_hex(key, info.key_size, " "); + printf("\n"); + } else { + printf("key: None\n"); + } + + printf("next key:\n"); + print_hex(nextkey, info.key_size, " "); + printf("\n"); + +exit_free: + free(nextkey); + free(key); + close(fd); + + return err; +} + +static int do_delete(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + key = malloc(info.key_size); + if (!key) { + err("mem alloc failed"); + err = -1; + goto exit_free; + } + + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + if (err) + goto exit_free; + + err = bpf_map_delete_elem(fd, key); + if (err) + err("delete failed: %s\n", strerror(errno)); + +exit_free: + free(key); + close(fd); + + return err; +} + +static int do_pin(int argc, char **argv) +{ + return do_pin_any(argc, argv, bpf_map_get_fd_by_id); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s show [MAP]\n" + " %s %s dump MAP\n" + " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" + " %s %s lookup MAP key BYTES\n" + " %s %s getnext MAP [key BYTES]\n" + " %s %s delete MAP key BYTES\n" + " %s %s pin MAP FILE\n" + " %s %s help\n" + "\n" + " MAP := { id MAP_ID | pinned FILE }\n" + " " HELP_SPEC_PROGRAM "\n" + " VALUE := { BYTES | MAP | PROG }\n" + " UPDATE_FLAGS := { any | exist | noexist }\n" + "", + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "help", do_help }, + { "dump", do_dump }, + { "update", do_update }, + { "lookup", do_lookup }, + { "getnext", do_getnext }, + { "delete", do_delete }, + { "pin", do_pin }, + { 0 } +}; + +int do_map(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c new file mode 100644 index 000000000000..421ba89ce86a --- /dev/null +++ b/tools/bpf/bpftool/prog.c @@ -0,0 +1,456 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Author: Jakub Kicinski */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", +}; + +static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) +{ + struct timespec real_time_ts, boot_time_ts; + time_t wallclock_secs; + struct tm load_tm; + + buf[--size] = '\0'; + + if (clock_gettime(CLOCK_REALTIME, &real_time_ts) || + clock_gettime(CLOCK_BOOTTIME, &boot_time_ts)) { + perror("Can't read clocks"); + snprintf(buf, size, "%llu", nsecs / 1000000000); + return; + } + + wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + + nsecs / 1000000000; + + if (!localtime_r(&wallclock_secs, &load_tm)) { + snprintf(buf, size, "%llu", nsecs / 1000000000); + return; + } + + strftime(buf, size, "%b %d/%H:%M", &load_tm); +} + +static int prog_fd_by_tag(unsigned char *tag) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + unsigned int id = 0; + int err; + int fd; + + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + err("%s\n", strerror(errno)); + return -1; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info (%u): %s\n", + id, strerror(errno)); + close(fd); + return -1; + } + + if (!memcmp(tag, info.tag, BPF_TAG_SIZE)) + return fd; + + close(fd); + } +} + +int prog_parse_fd(int *argc, char ***argv) +{ + int fd; + + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + err("can't parse %s as ID\n", **argv); + return -1; + } + NEXT_ARGP(); + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) + err("get by id (%u): %s\n", id, strerror(errno)); + return fd; + } else if (is_prefix(**argv, "tag")) { + unsigned char tag[BPF_TAG_SIZE]; + + NEXT_ARGP(); + + if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, + tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) + != BPF_TAG_SIZE) { + err("can't parse tag\n"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_tag(tag); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + return open_obj_pinned_any(path, BPF_OBJ_PROG); + } + + err("expected 'id', 'tag' or 'pinned', got: '%s'?\n", **argv); + return -1; +} + +static void show_prog_maps(int fd, u32 num_maps) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + __u32 map_ids[num_maps]; + unsigned int i; + int err; + + info.nr_map_ids = num_maps; + info.map_ids = ptr_to_u64(map_ids); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err || !info.nr_map_ids) + return; + + printf(" map_ids "); + for (i = 0; i < info.nr_map_ids; i++) + printf("%u%s", map_ids[i], + i == info.nr_map_ids - 1 ? "" : ","); +} + +static int show_prog(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + char *memlock; + int err; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + return -1; + } + + printf("%u: ", info.id); + if (info.type < ARRAY_SIZE(prog_type_name)) + printf("%s ", prog_type_name[info.type]); + else + printf("type %u ", info.type); + + if (*info.name) + printf("name %s ", info.name); + + printf("tag "); + print_hex(info.tag, BPF_TAG_SIZE, ":"); + printf("\n"); + + if (info.load_time) { + char buf[32]; + + print_boot_time(info.load_time, buf, sizeof(buf)); + + /* Piggy back on load_time, since 0 uid is a valid one */ + printf("\tloaded_at %s uid %u\n", buf, info.created_by_uid); + } + + printf("\txlated %uB", info.xlated_prog_len); + + if (info.jited_prog_len) + printf(" jited %uB", info.jited_prog_len); + else + printf(" not jited"); + + memlock = get_fdinfo(fd, "memlock"); + if (memlock) + printf(" memlock %sB", memlock); + free(memlock); + + if (info.nr_map_ids) + show_prog_maps(fd, info.nr_map_ids); + + printf("\n"); + + return 0; +} + +static int do_show(int argc, char **argv) +{ __u32 id = 0; + int err; + int fd; + + if (argc == 2) { + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + return show_prog(fd); + } + + if (argc) + return BAD_ARG(); + + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + err("can't get next program: %s\n", strerror(errno)); + if (errno == EINVAL) + err("kernel too old?\n"); + return -1; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + err("can't get prog by id (%u): %s\n", + id, strerror(errno)); + return -1; + } + + err = show_prog(fd); + close(fd); + if (err) + return err; + } + + return 0; +} + +static int do_dump(int argc, char **argv) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + bool can_disasm = false; + unsigned int buf_size; + char *filepath = NULL; + bool opcodes = false; + unsigned char *buf; + __u32 *member_len; + __u64 *member_ptr; + ssize_t n; + int err; + int fd; + + if (is_prefix(*argv, "jited")) { + member_len = &info.jited_prog_len; + member_ptr = &info.jited_prog_insns; + can_disasm = true; + } else if (is_prefix(*argv, "xlated")) { + member_len = &info.xlated_prog_len; + member_ptr = &info.xlated_prog_insns; + } else { + err("expected 'xlated' or 'jited', got: %s\n", *argv); + return -1; + } + NEXT_ARG(); + + if (argc < 2) + usage(); + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + err("expected file path\n"); + return -1; + } + + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } + + if (!filepath && !can_disasm) { + err("expected 'file' got %s\n", *argv); + return -1; + } + if (argc) { + usage(); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + return -1; + } + + if (!*member_len) { + info("no instructions returned\n"); + close(fd); + return 0; + } + + buf_size = *member_len; + + buf = malloc(buf_size); + if (!buf) { + err("mem alloc failed\n"); + close(fd); + return -1; + } + + memset(&info, 0, sizeof(info)); + + *member_ptr = ptr_to_u64(buf); + *member_len = buf_size; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + close(fd); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + goto err_free; + } + + if (*member_len > buf_size) { + info("too many instructions returned\n"); + goto err_free; + } + + if (filepath) { + fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + err("can't open file %s: %s\n", filepath, + strerror(errno)); + goto err_free; + } + + n = write(fd, buf, *member_len); + close(fd); + if (n != *member_len) { + err("error writing output file: %s\n", + n < 0 ? strerror(errno) : "short write"); + goto err_free; + } + } else { + disasm_print_insn(buf, *member_len, opcodes); + } + + free(buf); + + return 0; + +err_free: + free(buf); + return -1; +} + +static int do_pin(int argc, char **argv) +{ + return do_pin_any(argc, argv, bpf_prog_get_fd_by_id); +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s show [PROG]\n" + " %s %s dump xlated PROG file FILE\n" + " %s %s dump jited PROG [file FILE] [opcodes]\n" + " %s %s pin PROG FILE\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_PROGRAM "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "dump", do_dump }, + { "pin", do_pin }, + { 0 } +}; + +int do_prog(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} From ff69c21a85a4ed3d8dcf79dd750a0a9eda4fca83 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Oct 2017 20:10:05 -0700 Subject: [PATCH 0552/2177] tools: bpftool: add documentation Add documentation for bpftool. Separate files for each subcommand. Use rst format. Documentation is compiled into man pages using rst2man. Signed-off-by: David Beckett Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/Documentation/Makefile | 34 ++++++ .../bpf/bpftool/Documentation/bpftool-map.rst | 110 ++++++++++++++++++ .../bpftool/Documentation/bpftool-prog.rst | 79 +++++++++++++ tools/bpf/bpftool/Documentation/bpftool.rst | 34 ++++++ tools/bpf/bpftool/Makefile | 6 + 5 files changed, 263 insertions(+) create mode 100644 tools/bpf/bpftool/Documentation/Makefile create mode 100644 tools/bpf/bpftool/Documentation/bpftool-map.rst create mode 100644 tools/bpf/bpftool/Documentation/bpftool-prog.rst create mode 100644 tools/bpf/bpftool/Documentation/bpftool.rst diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile new file mode 100644 index 000000000000..bde77d7c4390 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -0,0 +1,34 @@ +include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak + +INSTALL ?= install +RM ?= rm -f + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR +prefix?=$(HOME) +endif +mandir ?= $(prefix)/share/man +man8dir = $(mandir)/man8 + +MAN8_RST = $(wildcard *.rst) + +_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST)) +DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) + +man: man8 +man8: $(DOC_MAN8) + +$(OUTPUT)%.8: %.rst + rst2man $< > $@ + +clean: + $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + +install: man + $(call QUIET_INSTALL, Documentation-man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ + $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + +.PHONY: man man8 clean install +.DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst new file mode 100644 index 000000000000..ff63e89e4b6c --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -0,0 +1,110 @@ +================ +bpftool-map +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF maps +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** **map** *COMMAND* + + *COMMANDS* := + { show | dump | update | lookup | getnext | delete | pin | help } + +MAP COMMANDS +============= + +| **bpftool** map show [*MAP*] +| **bpftool** map dump *MAP* +| **bpftool** map update *MAP* key *BYTES* value *VALUE* [*UPDATE_FLAGS*] +| **bpftool** map lookup *MAP* key *BYTES* +| **bpftool** map getnext *MAP* [key *BYTES*] +| **bpftool** map delete *MAP* key *BYTES* +| **bpftool** map pin *MAP* *FILE* +| **bpftool** map help +| +| *MAP* := { id MAP_ID | pinned FILE } +| *VALUE* := { BYTES | MAP | PROGRAM } +| *UPDATE_FLAGS* := { any | exist | noexist } + +DESCRIPTION +=========== + **bpftool map show** [*MAP*] + Show information about loaded maps. If *MAP* is specified + show information only about given map, otherwise list all + maps currently loaded on the system. + + Output will start with map ID followed by map type and + zero or more named attributes (depending on kernel version). + + **bpftool map dump** *MAP* + Dump all entries in a given *MAP*. + + **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] + Update map entry for a given *KEY*. + + *UPDATE_FLAGS* can be one of: **any** update existing entry + or add if doesn't exit; **exist** update only if entry already + exists; **noexist** update only if entry doesn't exist. + + **bpftool map lookup** *MAP* **key** *BYTES* + Lookup **key** in the map. + + **bpftool map getnext** *MAP* [**key** *BYTES*] + Get next key. If *key* is not specified, get first key. + + **bpftool map delete** *MAP* **key** *BYTES* + Remove entry from the map. + + **bpftool map pin** *MAP* *FILE* + Pin map *MAP* as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + + **bpftool map help** + Print short help message. + +EXAMPLES +======== +**# bpftool map show** +:: + + 10: hash name some_map flags 0x0 + key 4B value 8B max_entries 2048 memlock 167936B + +**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04** + +**# bpftool map lookup id 10 key 0 1 2 3** + +:: + + key: 00 01 02 03 value: 00 01 02 03 04 05 06 07 + + +**# bpftool map dump id 10** +:: + + key: 00 01 02 03 value: 00 01 02 03 04 05 06 07 + key: 0d 00 07 00 value: 02 00 00 00 01 02 03 04 + Found 2 elements + +**# bpftool map getnext id 10 key 0 1 2 3** +:: + + key: + 00 01 02 03 + next key: + 0d 00 07 00 + +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# bpftool map pin id 10 /sys/fs/bpf/map** +| **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00** + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst new file mode 100644 index 000000000000..57fc4b9924ea --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -0,0 +1,79 @@ +================ +bpftool-prog +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + +| **bpftool** prog show [*PROG*] +| **bpftool** prog dump xlated *PROG* file *FILE* +| **bpftool** prog dump jited *PROG* [file *FILE*] [opcodes] +| **bpftool** prog pin *PROG* *FILE* +| **bpftool** prog help +| +| *PROG* := { id *PROG_ID* | pinned *FILE* | tag *PROG_TAG* } + +DESCRIPTION +=========== + **bpftool prog show** [*PROG*] + Show information about loaded programs. If *PROG* is + specified show information only about given program, otherwise + list all programs currently loaded on the system. + + Output will start with program ID followed by program type and + zero or more named attributes (depending on kernel version). + + **bpftool prog dump xlated** *PROG* **file** *FILE* + Dump eBPF instructions of the program from the kernel to a + file. + + **bpftool prog dump jited** *PROG* [**file** *FILE*] [**opcodes**] + Dump jited image (host machine code) of the program. + If *FILE* is specified image will be written to a file, + otherwise it will be disassembled and printed to stdout. + + **opcodes** controls if raw opcodes will be printed. + + **bpftool prog pin** *PROG* *FILE* + Pin program *PROG* as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + + **bpftool prog help** + Print short help message. + +EXAMPLES +======== +**# bpftool prog show** +:: + + 10: xdp name some_prog tag 00:5a:3d:21:23:62:0c:8b + loaded_at Sep 29/20:11 uid 0 + xlated 528B jited 370B memlock 4096B map_ids 10 + +| +| **# bpftool prog dump xlated id 10 file /tmp/t** +| **# ls -l /tmp/t** +| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t + +| +| **# bpftool prog dum jited pinned /sys/fs/bpf/prog** + +:: + + push %rbp + mov %rsp,%rbp + sub $0x228,%rsp + sub $0x28,%rbp + mov %rbx,0x0(%rbp) + + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst new file mode 100644 index 000000000000..f1df1893fb54 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -0,0 +1,34 @@ +================ +BPFTOOL +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF programs and maps +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** *OBJECT* { *COMMAND* | help } + + **bpftool** batch file *FILE* + + *OBJECT* := { **map** | **program** } + + *MAP-COMMANDS* := + { show | dump | update | lookup | getnext | delete | pin | help } + + *PROG-COMMANDS* := { show | dump jited | dump xlated | pin | help } + +DESCRIPTION +=========== + *bpftool* allows for inspection and simple modification of BPF objects + on the system. + + Note that format of the output of all tools is not guaranteed to be + stable and should not be depended upon. + +SEE ALSO +======== + **bpftool-map**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index a7151f47fb40..8705ee44664d 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -74,6 +74,12 @@ clean: $(LIBBPF)-clean install: install $(OUTPUT)bpftool $(prefix)/sbin/bpftool +doc: + $(Q)$(MAKE) -C Documentation/ + +doc-install: + $(Q)$(MAKE) -C Documentation/ install + FORCE: .PHONY: all clean FORCE From 44f209807ee87a5eddf6c0432f3fb63cec27bad8 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:50 -0400 Subject: [PATCH 0553/2177] VSOCK: export socket tables for sock_diag interface The socket table symbols need to be exported from vsock.ko so that the vsock_diag.ko module will be able to traverse sockets. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 5 +++++ net/vmw_vsock/af_vsock.c | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f9fb566e75cf..30cba806e344 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -27,6 +27,11 @@ #define LAST_RESERVED_PORT 1023 +#define VSOCK_HASH_SIZE 251 +extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +extern spinlock_t vsock_table_lock; + #define vsock_sk(__sk) ((struct vsock_sock *)__sk) #define sk_vsock(__vsk) (&(__vsk)->sk) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index dfc8c51e4d74..9afe4da8c67d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -153,7 +153,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function * mods with VSOCK_HASH_SIZE to ensure this. */ -#define VSOCK_HASH_SIZE 251 #define MAX_PORT_RETRIES 24 #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) @@ -168,9 +167,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); #define vsock_connected_sockets_vsk(vsk) \ vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) -static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; -static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; -static DEFINE_SPINLOCK(vsock_table_lock); +struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +EXPORT_SYMBOL_GPL(vsock_bind_table); +struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +EXPORT_SYMBOL_GPL(vsock_connected_table); +DEFINE_SPINLOCK(vsock_table_lock); +EXPORT_SYMBOL_GPL(vsock_table_lock); /* Autobind this socket to the local address if necessary. */ static int vsock_auto_bind(struct vsock_sock *vsk) From bf359b8127719535f88494adb3c2b73c06667dcd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:51 -0400 Subject: [PATCH 0554/2177] VSOCK: move __vsock_in_bound/connected_table() to af_vsock.h The vsock_diag.ko module will need to check socket table membership. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 12 ++++++++++++ net/vmw_vsock/af_vsock.c | 10 ---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 30cba806e344..3dd217718a2f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -180,6 +180,18 @@ const struct vsock_transport *vsock_core_get_transport(void); /**** UTILS ****/ +/* vsock_table_lock must be held */ +static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +/* vsock_table_lock must be held */ +static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + void vsock_release_pending(struct sock *pending); void vsock_add_pending(struct sock *listener, struct sock *pending); void vsock_remove_pending(struct sock *listener, struct sock *pending); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9afe4da8c67d..9b179a0081b3 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -250,16 +250,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, return NULL; } -static bool __vsock_in_bound_table(struct vsock_sock *vsk) -{ - return !list_empty(&vsk->bound_table); -} - -static bool __vsock_in_connected_table(struct vsock_sock *vsk) -{ - return !list_empty(&vsk->connected_table); -} - static void vsock_insert_unbound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); From 3b4477d2dcf2709d0be89e2a8dced3d0f4a017f2 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:52 -0400 Subject: [PATCH 0555/2177] VSOCK: use TCP state constants for sk_state There are two state fields: socket->state and sock->sk_state. The socket->state field uses SS_UNCONNECTED, SS_CONNECTED, etc while the sock->sk_state typically uses values that match TCP state constants (TCP_CLOSE, TCP_ESTABLISHED). AF_VSOCK does not follow this convention and instead uses SS_* constants for both fields. The sk_state field will be exposed to userspace through the vsock_diag interface for ss(8), netstat(8), and other programs. This patch switches sk_state to TCP state constants so that the meaning of this field is consistent with other address families. Not just AF_INET and AF_INET6 use the TCP constants, AF_UNIX and others do too. The following mapping was used to convert the code: SS_FREE -> TCP_CLOSE SS_UNCONNECTED -> TCP_CLOSE SS_CONNECTING -> TCP_SYN_SENT SS_CONNECTED -> TCP_ESTABLISHED SS_DISCONNECTING -> TCP_CLOSING VSOCK_SS_LISTEN -> TCP_LISTEN In __vsock_create() the sk_state initialization was dropped because sock_init_data() already initializes sk_state to TCP_CLOSE. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 3 -- net/vmw_vsock/af_vsock.c | 46 ++++++++++++-------- net/vmw_vsock/hyperv_transport.c | 12 ++--- net/vmw_vsock/virtio_transport.c | 2 +- net/vmw_vsock/virtio_transport_common.c | 22 +++++----- net/vmw_vsock/vmci_transport.c | 34 +++++++-------- net/vmw_vsock/vmci_transport_notify.c | 2 +- net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- 8 files changed, 64 insertions(+), 59 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 3dd217718a2f..9324ac2d9ff2 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -22,9 +22,6 @@ #include "vsock_addr.h" -/* vsock-specific sock->sk_state constants */ -#define VSOCK_SS_LISTEN 255 - #define LAST_RESERVED_PORT 1023 #define VSOCK_HASH_SIZE 251 diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9b179a0081b3..98359c19522f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -36,7 +36,7 @@ * not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this - * implementation because they are in the VSOCK_SS_LISTEN state. When a + * implementation because they are in the TCP_LISTEN state. When a * connection request is received (the second kind of socket mentioned above), * we create a new socket and refer to it as a pending socket. These pending * sockets are placed on the pending connection list of the listener socket. @@ -82,6 +82,15 @@ * argument, we must ensure the reference count is increased to ensure the * socket isn't freed before the function is run; the deferred function will * then drop the reference. + * + * - sk->sk_state uses the TCP state constants because they are widely used by + * other address families and exposed to userspace tools like ss(8): + * + * TCP_CLOSE - unconnected + * TCP_SYN_SENT - connecting + * TCP_ESTABLISHED - connected + * TCP_CLOSING - disconnecting + * TCP_LISTEN - listening */ #include @@ -477,7 +486,7 @@ void vsock_pending_work(struct work_struct *work) if (vsock_in_connected_table(vsk)) vsock_remove_connected(vsk); - sk->sk_state = SS_FREE; + sk->sk_state = TCP_CLOSE; out: release_sock(sk); @@ -617,7 +626,6 @@ struct sock *__vsock_create(struct net *net, sk->sk_destruct = vsock_sk_destruct; sk->sk_backlog_rcv = vsock_queue_rcv_skb; - sk->sk_state = 0; sock_reset_flag(sk, SOCK_DONE); INIT_LIST_HEAD(&vsk->bound_table); @@ -891,7 +899,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, /* Listening sockets that have connections in their accept * queue can be read. */ - if (sk->sk_state == VSOCK_SS_LISTEN + if (sk->sk_state == TCP_LISTEN && !vsock_is_accept_queue_empty(sk)) mask |= POLLIN | POLLRDNORM; @@ -920,7 +928,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( @@ -942,7 +950,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * POLLOUT|POLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ - if (sk->sk_state == SS_UNCONNECTED) { + if (sk->sk_state == TCP_CLOSE) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= POLLOUT | POLLWRNORM; @@ -1112,9 +1120,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk = sk_vsock(vsk); lock_sock(sk); - if (sk->sk_state == SS_CONNECTING && + if (sk->sk_state == TCP_SYN_SENT && (sk->sk_shutdown != SHUTDOWN_MASK)) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); cancel = 1; @@ -1160,7 +1168,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = -EALREADY; break; default: - if ((sk->sk_state == VSOCK_SS_LISTEN) || + if ((sk->sk_state == TCP_LISTEN) || vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { err = -EINVAL; goto out; @@ -1183,7 +1191,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - sk->sk_state = SS_CONNECTING; + sk->sk_state = TCP_SYN_SENT; err = transport->connect(vsk); if (err < 0) @@ -1203,7 +1211,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection @@ -1226,13 +1234,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; @@ -1243,7 +1251,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; } else { err = 0; @@ -1276,7 +1284,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, goto out; } - if (listener->sk_state != VSOCK_SS_LISTEN) { + if (listener->sk_state != TCP_LISTEN) { err = -EINVAL; goto out; } @@ -1366,7 +1374,7 @@ static int vsock_listen(struct socket *sock, int backlog) } sk->sk_max_ack_backlog = backlog; - sk->sk_state = VSOCK_SS_LISTEN; + sk->sk_state = TCP_LISTEN; err = 0; @@ -1546,7 +1554,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { - err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out; } @@ -1557,7 +1565,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (sk->sk_state != SS_CONNECTED || + if (sk->sk_state != TCP_ESTABLISHED || !vsock_addr_bound(&vsk->local_addr)) { err = -ENOTCONN; goto out; @@ -1681,7 +1689,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, lock_sock(sk); - if (sk->sk_state != SS_CONNECTED) { + if (sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a * peer has not connected or a local shutdown occured with the diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 14ed5a344cdf..bbac023e70d1 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -310,7 +310,7 @@ static void hvs_close_connection(struct vmbus_channel *chan) struct sock *sk = get_per_channel_state(chan); struct vsock_sock *vsk = vsock_sk(sk); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; @@ -344,8 +344,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (!sk) return; - if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || - (!conn_from_host && sk->sk_state != SS_CONNECTING)) + if ((conn_from_host && sk->sk_state != TCP_LISTEN) || + (!conn_from_host && sk->sk_state != TCP_SYN_SENT)) goto out; if (conn_from_host) { @@ -357,7 +357,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (!new) goto out; - new->sk_state = SS_CONNECTING; + new->sk_state = TCP_SYN_SENT; vnew = vsock_sk(new); hvs_new = vnew->trans; hvs_new->chan = chan; @@ -384,7 +384,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) vmbus_set_chn_rescind_callback(chan, hvs_close_connection); if (conn_from_host) { - new->sk_state = SS_CONNECTED; + new->sk_state = TCP_ESTABLISHED; sk->sk_ack_backlog++; hvs_addr_init(&vnew->local_addr, if_type); @@ -399,7 +399,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) vsock_enqueue_accept(sk, new); release_sock(sk); } else { - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsock_sk(sk)); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 403d86e80162..8e03bd3f3668 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { lock_sock(sk); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); release_sock(sk); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index edba7ab97563..3ae3a33da70b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); if (vsk->close_work_scheduled && @@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk) { struct sock *sk = &vsk->sk; - if (!(sk->sk_state == SS_CONNECTED || - sk->sk_state == SS_DISCONNECTING)) + if (!(sk->sk_state == TCP_ESTABLISHED || + sk->sk_state == TCP_CLOSING)) return true; /* Already received SHUTDOWN from peer, reply with RST */ @@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk, switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RESPONSE: - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk, destroy: virtio_transport_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk, vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK && vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; if (le32_to_cpu(pkt->hdr.flags)) sk->sk_state_change(sk); break; @@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) lock_sock_nested(child, SINGLE_DEPTH_NESTING); - child->sk_state = SS_CONNECTED; + child->sk_state = TCP_ESTABLISHED; vchild = vsock_sk(child); vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), @@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) sk->sk_write_space(sk); switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: virtio_transport_recv_listen(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: virtio_transport_recv_connecting(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: virtio_transport_recv_connected(sk, pkt); break; - case SS_DISCONNECTING: + case TCP_CLOSING: virtio_transport_recv_disconnecting(sk, pkt); virtio_transport_free_pkt(pkt); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 0206155bff53..391775e3575c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -742,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) /* The local context ID may be out of date, update it. */ vsk->local_addr.svm_cid = dst.svm_cid; - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vmci_trans(vsk)->notify_ops->handle_notify_pkt( sk, pkt, true, &dst, &src, &bh_process_pkt); @@ -800,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk) * left in our consume queue. */ if (vsock_stream_has_data(vsk) <= 0) { - if (sk->sk_state == SS_CONNECTING) { + sk->sk_state = TCP_CLOSE; + + if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., * if the peer VM is killed after attaching to @@ -809,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ - sk->sk_state = SS_UNCONNECTED; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } - sk->sk_state = SS_UNCONNECTED; } sk->sk_state_change(sk); } @@ -882,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: vmci_transport_recv_listen(sk, pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: /* Processing of pending connections for servers goes through * the listening socket, so see vmci_transport_recv_listen() * for that path. */ vmci_transport_recv_connecting_client(sk, pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: vmci_transport_recv_connected(sk, pkt); break; default: @@ -941,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; switch (pending->sk_state) { - case SS_CONNECTING: + case TCP_SYN_SENT: err = vmci_transport_recv_connecting_server(sk, pending, pkt); @@ -1071,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_add_pending(sk, pending); sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; + pending->sk_state = TCP_SYN_SENT; vmci_trans(vpending)->produce_size = vmci_trans(vpending)->consume_size = qp_size; vmci_trans(vpending)->queue_pair_size = qp_size; @@ -1196,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener, * the socket will be valid until it is removed from the queue. * * If we fail sending the attach below, we remove the socket from the - * connected list and move the socket to SS_UNCONNECTED before + * connected list and move the socket to TCP_CLOSE before * releasing the lock, so a pending slow path processing of an incoming * packet will not see the socket in the connected state in that case. */ - pending->sk_state = SS_CONNECTED; + pending->sk_state = TCP_ESTABLISHED; vsock_insert_connected(vpending); @@ -1231,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, destroy: pending->sk_err = skerr; - pending->sk_state = SS_UNCONNECTED; + pending->sk_state = TCP_CLOSE; /* As long as we drop our reference, all necessary cleanup will handle * when the cleanup function drops its reference and our destruct * implementation is called. Note that since the listen handler will @@ -1269,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, * accounting (it can already be found since it's in the bound * table). */ - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -1337,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, destroy: vmci_transport_send_reset(sk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -1525,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); break; @@ -1789,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) err = vmci_transport_send_conn_request( sk, vmci_trans(vsk)->queue_pair_size); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } } else { @@ -1799,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) sk, vmci_trans(vsk)->queue_pair_size, supported_proto_versions); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 1406db4d97d1..41fb427f150a 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!send_waiting_read(sk, 1)) return -1; diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index f3a0afc46208..0cc84f2bb05e 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vsock_block_update_write_window(sk); *data_ready_now = false; } From 413a4317aca7d6367d57a5971b0c461f03851207 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:53 -0400 Subject: [PATCH 0556/2177] VSOCK: add sock_diag interface This patch adds the sock_diag interface for querying sockets from userspace. Tools like ss(8) and netstat(8) can use this interface to list open sockets. The userspace ABI is defined in and includes netlink request and response structs. The request can query sockets based on their sk_state (e.g. listening sockets only) and the response contains socket information fields including the local/remote addresses, inode number, etc. This patch does not dump VMCI pending sockets because I have only tested the virtio transport, which does not use pending sockets. Support can be added later by extending vsock_diag_dump() if needed by VMCI users. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- MAINTAINERS | 2 + include/uapi/linux/vm_sockets_diag.h | 33 +++++ net/vmw_vsock/Kconfig | 10 ++ net/vmw_vsock/Makefile | 3 + net/vmw_vsock/diag.c | 186 +++++++++++++++++++++++++++ 5 files changed, 234 insertions(+) create mode 100644 include/uapi/linux/vm_sockets_diag.h create mode 100644 net/vmw_vsock/diag.c diff --git a/MAINTAINERS b/MAINTAINERS index d0cbb3d7a0ca..0fd9121953bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14286,6 +14286,8 @@ S: Maintained F: include/linux/virtio_vsock.h F: include/uapi/linux/virtio_vsock.h F: include/uapi/linux/vsockmon.h +F: include/uapi/linux/vm_sockets_diag.h +F: net/vmw_vsock/diag.c F: net/vmw_vsock/af_vsock_tap.c F: net/vmw_vsock/virtio_transport_common.c F: net/vmw_vsock/virtio_transport.c diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h new file mode 100644 index 000000000000..14cd7dc5a187 --- /dev/null +++ b/include/uapi/linux/vm_sockets_diag.h @@ -0,0 +1,33 @@ +/* AF_VSOCK sock_diag(7) interface for querying open sockets */ + +#ifndef _UAPI__VM_SOCKETS_DIAG_H__ +#define _UAPI__VM_SOCKETS_DIAG_H__ + +#include + +/* Request */ +struct vsock_diag_req { + __u8 sdiag_family; /* must be AF_VSOCK */ + __u8 sdiag_protocol; /* must be 0 */ + __u16 pad; /* must be 0 */ + __u32 vdiag_states; /* query bitmap (e.g. 1 << TCP_LISTEN) */ + __u32 vdiag_ino; /* must be 0 (reserved) */ + __u32 vdiag_show; /* must be 0 (reserved) */ + __u32 vdiag_cookie[2]; +}; + +/* Response */ +struct vsock_diag_msg { + __u8 vdiag_family; /* AF_VSOCK */ + __u8 vdiag_type; /* SOCK_STREAM or SOCK_DGRAM */ + __u8 vdiag_state; /* sk_state (e.g. TCP_LISTEN) */ + __u8 vdiag_shutdown; /* local RCV_SHUTDOWN | SEND_SHUTDOWN */ + __u32 vdiag_src_cid; + __u32 vdiag_src_port; + __u32 vdiag_dst_cid; + __u32 vdiag_dst_port; + __u32 vdiag_ino; + __u32 vdiag_cookie[2]; +}; + +#endif /* _UAPI__VM_SOCKETS_DIAG_H__ */ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index a24369d175fd..970f96489fe7 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -15,6 +15,16 @@ config VSOCKETS To compile this driver as a module, choose M here: the module will be called vsock. If unsure, say N. +config VSOCKETS_DIAG + tristate "Virtual Sockets monitoring interface" + depends on VSOCKETS + default y + help + Support for PF_VSOCK sockets monitoring interface used by the ss tool. + If unsure, say Y. + + Enable this module so userspace applications can query open sockets. + config VMWARE_VMCI_VSOCKETS tristate "VMware VMCI transport for Virtual Sockets" depends on VSOCKETS && VMWARE_VMCI diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index e63d574234a9..64afc06805da 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o @@ -6,6 +7,8 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o +vsock_diag-y += diag.o + vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c new file mode 100644 index 000000000000..31b567652250 --- /dev/null +++ b/net/vmw_vsock/diag.c @@ -0,0 +1,186 @@ +/* + * vsock sock_diag(7) module + * + * Copyright (C) 2017 Red Hat, Inc. + * Author: Stefan Hajnoczi + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + u32 portid, u32 seq, u32 flags) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_diag_msg *rep; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + + rep = nlmsg_data(nlh); + rep->vdiag_family = AF_VSOCK; + + /* Lock order dictates that sk_lock is acquired before + * vsock_table_lock, so we cannot lock here. Simply don't take + * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is + * held. + */ + rep->vdiag_type = sk->sk_type; + rep->vdiag_state = sk->sk_state; + rep->vdiag_shutdown = sk->sk_shutdown; + rep->vdiag_src_cid = vsk->local_addr.svm_cid; + rep->vdiag_src_port = vsk->local_addr.svm_port; + rep->vdiag_dst_cid = vsk->remote_addr.svm_cid; + rep->vdiag_dst_port = vsk->remote_addr.svm_port; + rep->vdiag_ino = sock_i_ino(sk); + + sock_diag_save_cookie(sk, rep->vdiag_cookie); + + return 0; +} + +static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct vsock_diag_req *req; + struct vsock_sock *vsk; + unsigned int bucket; + unsigned int last_i; + unsigned int table; + struct net *net; + unsigned int i; + + req = nlmsg_data(cb->nlh); + net = sock_net(skb->sk); + + /* State saved between calls: */ + table = cb->args[0]; + bucket = cb->args[1]; + i = last_i = cb->args[2]; + + /* TODO VMCI pending sockets? */ + + spin_lock_bh(&vsock_table_lock); + + /* Bind table (locally created sockets) */ + if (table == 0) { + while (bucket < ARRAY_SIZE(vsock_bind_table)) { + struct list_head *head = &vsock_bind_table[bucket]; + + i = 0; + list_for_each_entry(vsk, head, bound_table) { + struct sock *sk = sk_vsock(vsk); + + if (!net_eq(sock_net(sk), net)) + continue; + if (i < last_i) + goto next_bind; + if (!(req->vdiag_states & (1 << sk->sk_state))) + goto next_bind; + if (sk_diag_fill(sk, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + goto done; +next_bind: + i++; + } + last_i = 0; + bucket++; + } + + table++; + bucket = 0; + } + + /* Connected table (accepted connections) */ + while (bucket < ARRAY_SIZE(vsock_connected_table)) { + struct list_head *head = &vsock_connected_table[bucket]; + + i = 0; + list_for_each_entry(vsk, head, connected_table) { + struct sock *sk = sk_vsock(vsk); + + /* Skip sockets we've already seen above */ + if (__vsock_in_bound_table(vsk)) + continue; + + if (!net_eq(sock_net(sk), net)) + continue; + if (i < last_i) + goto next_connected; + if (!(req->vdiag_states & (1 << sk->sk_state))) + goto next_connected; + if (sk_diag_fill(sk, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + goto done; +next_connected: + i++; + } + last_i = 0; + bucket++; + } + +done: + spin_unlock_bh(&vsock_table_lock); + + cb->args[0] = table; + cb->args[1] = bucket; + cb->args[2] = i; + + return skb->len; +} + +static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct vsock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = vsock_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } + + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler vsock_diag_handler = { + .family = AF_VSOCK, + .dump = vsock_diag_handler_dump, +}; + +static int __init vsock_diag_init(void) +{ + return sock_diag_register(&vsock_diag_handler); +} + +static void __exit vsock_diag_exit(void) +{ + sock_diag_unregister(&vsock_diag_handler); +} + +module_init(vsock_diag_init); +module_exit(vsock_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, + 40 /* AF_VSOCK */); From 0b02503384301e4c2db13f1045a1ddbaca2ba0ce Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Oct 2017 16:46:54 -0400 Subject: [PATCH 0557/2177] VSOCK: add tools/testing/vsock/vsock_diag_test This patch adds tests for the vsock_diag.ko module. These tests are not self-tests because they require manual set up of a KVM or VMware guest. Please see tools/testing/vsock/README for instructions. The control.h and timeout.h infrastructure can be used for additional AF_VSOCK tests in the future. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- MAINTAINERS | 1 + tools/testing/vsock/.gitignore | 2 + tools/testing/vsock/Makefile | 9 + tools/testing/vsock/README | 36 ++ tools/testing/vsock/control.c | 219 +++++++++ tools/testing/vsock/control.h | 13 + tools/testing/vsock/timeout.c | 64 +++ tools/testing/vsock/timeout.h | 14 + tools/testing/vsock/vsock_diag_test.c | 681 ++++++++++++++++++++++++++ 9 files changed, 1039 insertions(+) create mode 100644 tools/testing/vsock/.gitignore create mode 100644 tools/testing/vsock/Makefile create mode 100644 tools/testing/vsock/README create mode 100644 tools/testing/vsock/control.c create mode 100644 tools/testing/vsock/control.h create mode 100644 tools/testing/vsock/timeout.c create mode 100644 tools/testing/vsock/timeout.h create mode 100644 tools/testing/vsock/vsock_diag_test.c diff --git a/MAINTAINERS b/MAINTAINERS index 0fd9121953bb..f0c37be4e04a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14294,6 +14294,7 @@ F: net/vmw_vsock/virtio_transport.c F: drivers/net/vsockmon.c F: drivers/vhost/vsock.c F: drivers/vhost/vsock.h +F: tools/testing/vsock/ VIRTIO CONSOLE DRIVER M: Amit Shah diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore new file mode 100644 index 000000000000..dc5f11faf530 --- /dev/null +++ b/tools/testing/vsock/.gitignore @@ -0,0 +1,2 @@ +*.d +vsock_diag_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile new file mode 100644 index 000000000000..66ba0924194d --- /dev/null +++ b/tools/testing/vsock/Makefile @@ -0,0 +1,9 @@ +all: test +test: vsock_diag_test +vsock_diag_test: vsock_diag_test.o timeout.o control.o + +CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE +.PHONY: all test clean +clean: + ${RM} *.o *.d vsock_diag_test +-include *.d diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README new file mode 100644 index 000000000000..2cc6d7302db6 --- /dev/null +++ b/tools/testing/vsock/README @@ -0,0 +1,36 @@ +AF_VSOCK test suite +------------------- +These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and +Hyper-V. + +The following tests are available: + + * vsock_diag_test - vsock_diag.ko module for listing open sockets + +The following prerequisite steps are not automated and must be performed prior +to running tests: + +1. Build the kernel and these tests. +2. Install the kernel and tests on the host. +3. Install the kernel and tests inside the guest. +4. Boot the guest and ensure that the AF_VSOCK transport is enabled. + +Invoke test binaries in both directions as follows: + + # host=server, guest=client + (host)# $TEST_BINARY --mode=server \ + --control-port=1234 \ + --peer-cid=3 + (guest)# $TEST_BINARY --mode=client \ + --control-host=$HOST_IP \ + --control-port=1234 \ + --peer-cid=2 + + # host=client, guest=server + (guest)# $TEST_BINARY --mode=server \ + --control-port=1234 \ + --peer-cid=2 + (host)# $TEST_BINARY --mode=client \ + --control-port=$GUEST_IP \ + --control-port=1234 \ + --peer-cid=3 diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c new file mode 100644 index 000000000000..90fd47f0e422 --- /dev/null +++ b/tools/testing/vsock/control.c @@ -0,0 +1,219 @@ +/* Control socket for client/server test execution + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +/* The client and server may need to coordinate to avoid race conditions like + * the client attempting to connect to a socket that the server is not + * listening on yet. The control socket offers a communications channel for + * such coordination tasks. + * + * If the client calls control_expectln("LISTENING"), then it will block until + * the server calls control_writeln("LISTENING"). This provides a simple + * mechanism for coordinating between the client and the server. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "timeout.h" +#include "control.h" + +static int control_fd = -1; + +/* Open the control socket, either in server or client mode */ +void control_init(const char *control_host, + const char *control_port, + bool server) +{ + struct addrinfo hints = { + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *result = NULL; + struct addrinfo *ai; + int ret; + + ret = getaddrinfo(control_host, control_port, &hints, &result); + if (ret != 0) { + fprintf(stderr, "%s\n", gai_strerror(ret)); + exit(EXIT_FAILURE); + } + + for (ai = result; ai; ai = ai->ai_next) { + int fd; + int val = 1; + + fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (fd < 0) + continue; + + if (!server) { + if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) + goto next; + control_fd = fd; + printf("Control socket connected to %s:%s.\n", + control_host, control_port); + break; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + &val, sizeof(val)) < 0) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } + + if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) + goto next; + if (listen(fd, 1) < 0) + goto next; + + printf("Control socket listening on %s:%s\n", + control_host, control_port); + fflush(stdout); + + control_fd = accept(fd, NULL, 0); + close(fd); + + if (control_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + printf("Control socket connection accepted...\n"); + break; + +next: + close(fd); + } + + if (control_fd < 0) { + fprintf(stderr, "Control socket initialization failed. Invalid address %s:%s?\n", + control_host, control_port); + exit(EXIT_FAILURE); + } + + freeaddrinfo(result); +} + +/* Free resources */ +void control_cleanup(void) +{ + close(control_fd); + control_fd = -1; +} + +/* Write a line to the control socket */ +void control_writeln(const char *str) +{ + ssize_t len = strlen(str); + ssize_t ret; + + timeout_begin(TIMEOUT); + + do { + ret = send(control_fd, str, len, MSG_MORE); + timeout_check("send"); + } while (ret < 0 && errno == EINTR); + + if (ret != len) { + perror("send"); + exit(EXIT_FAILURE); + } + + do { + ret = send(control_fd, "\n", 1, 0); + timeout_check("send"); + } while (ret < 0 && errno == EINTR); + + if (ret != 1) { + perror("send"); + exit(EXIT_FAILURE); + } + + timeout_end(); +} + +/* Return the next line from the control socket (without the trailing newline). + * + * The program terminates if a timeout occurs. + * + * The caller must free() the returned string. + */ +char *control_readln(void) +{ + char *buf = NULL; + size_t idx = 0; + size_t buflen = 0; + + timeout_begin(TIMEOUT); + + for (;;) { + ssize_t ret; + + if (idx >= buflen) { + char *new_buf; + + new_buf = realloc(buf, buflen + 80); + if (!new_buf) { + perror("realloc"); + exit(EXIT_FAILURE); + } + + buf = new_buf; + buflen += 80; + } + + do { + ret = recv(control_fd, &buf[idx], 1, 0); + timeout_check("recv"); + } while (ret < 0 && errno == EINTR); + + if (ret == 0) { + fprintf(stderr, "unexpected EOF on control socket\n"); + exit(EXIT_FAILURE); + } + + if (ret != 1) { + perror("recv"); + exit(EXIT_FAILURE); + } + + if (buf[idx] == '\n') { + buf[idx] = '\0'; + break; + } + + idx++; + } + + timeout_end(); + + return buf; +} + +/* Wait until a given line is received or a timeout occurs */ +void control_expectln(const char *str) +{ + char *line; + + line = control_readln(); + if (strcmp(str, line) != 0) { + fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n", + str, line); + exit(EXIT_FAILURE); + } + + free(line); +} diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h new file mode 100644 index 000000000000..54a07efd267c --- /dev/null +++ b/tools/testing/vsock/control.h @@ -0,0 +1,13 @@ +#ifndef CONTROL_H +#define CONTROL_H + +#include + +void control_init(const char *control_host, const char *control_port, + bool server); +void control_cleanup(void); +void control_writeln(const char *str); +char *control_readln(void); +void control_expectln(const char *str); + +#endif /* CONTROL_H */ diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c new file mode 100644 index 000000000000..c49b3003b2db --- /dev/null +++ b/tools/testing/vsock/timeout.c @@ -0,0 +1,64 @@ +/* Timeout API for single-threaded programs that use blocking + * syscalls (read/write/send/recv/connect/accept). + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +/* Use the following pattern: + * + * timeout_begin(TIMEOUT); + * do { + * ret = accept(...); + * timeout_check("accept"); + * } while (ret < 0 && ret == EINTR); + * timeout_end(); + */ + +#include +#include +#include +#include +#include "timeout.h" + +static volatile bool timeout; + +/* SIGALRM handler function. Do not use sleep(2), alarm(2), or + * setitimer(2) while using this API - they may interfere with each + * other. + */ +void sigalrm(int signo) +{ + timeout = true; +} + +/* Start a timeout. Call timeout_check() to verify that the timeout hasn't + * expired. timeout_end() must be called to stop the timeout. Timeouts cannot + * be nested. + */ +void timeout_begin(unsigned int seconds) +{ + alarm(seconds); +} + +/* Exit with an error message if the timeout has expired */ +void timeout_check(const char *operation) +{ + if (timeout) { + fprintf(stderr, "%s timed out\n", operation); + exit(EXIT_FAILURE); + } +} + +/* Stop a timeout */ +void timeout_end(void) +{ + alarm(0); + timeout = false; +} diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h new file mode 100644 index 000000000000..77db9ce9860a --- /dev/null +++ b/tools/testing/vsock/timeout.h @@ -0,0 +1,14 @@ +#ifndef TIMEOUT_H +#define TIMEOUT_H + +enum { + /* Default timeout */ + TIMEOUT = 10 /* seconds */ +}; + +void sigalrm(int signo); +void timeout_begin(unsigned int seconds); +void timeout_check(const char *operation); +void timeout_end(void); + +#endif /* TIMEOUT_H */ diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c new file mode 100644 index 000000000000..e896a4af52f4 --- /dev/null +++ b/tools/testing/vsock/vsock_diag_test.c @@ -0,0 +1,681 @@ +/* + * vsock_diag_test - vsock_diag.ko test suite + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../include/uapi/linux/vm_sockets.h" +#include "../../../include/uapi/linux/vm_sockets_diag.h" + +#include "timeout.h" +#include "control.h" + +enum test_mode { + TEST_MODE_UNSET, + TEST_MODE_CLIENT, + TEST_MODE_SERVER +}; + +/* Per-socket status */ +struct vsock_stat { + struct list_head list; + struct vsock_diag_msg msg; +}; + +static const char *sock_type_str(int type) +{ + switch (type) { + case SOCK_DGRAM: + return "DGRAM"; + case SOCK_STREAM: + return "STREAM"; + default: + return "INVALID TYPE"; + } +} + +static const char *sock_state_str(int state) +{ + switch (state) { + case TCP_CLOSE: + return "UNCONNECTED"; + case TCP_SYN_SENT: + return "CONNECTING"; + case TCP_ESTABLISHED: + return "CONNECTED"; + case TCP_CLOSING: + return "DISCONNECTING"; + case TCP_LISTEN: + return "LISTEN"; + default: + return "INVALID STATE"; + } +} + +static const char *sock_shutdown_str(int shutdown) +{ + switch (shutdown) { + case 1: + return "RCV_SHUTDOWN"; + case 2: + return "SEND_SHUTDOWN"; + case 3: + return "RCV_SHUTDOWN | SEND_SHUTDOWN"; + default: + return "0"; + } +} + +static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port) +{ + if (cid == VMADDR_CID_ANY) + fprintf(fp, "*:"); + else + fprintf(fp, "%u:", cid); + + if (port == VMADDR_PORT_ANY) + fprintf(fp, "*"); + else + fprintf(fp, "%u", port); +} + +static void print_vsock_stat(FILE *fp, struct vsock_stat *st) +{ + print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port); + fprintf(fp, " "); + print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port); + fprintf(fp, " %s %s %s %u\n", + sock_type_str(st->msg.vdiag_type), + sock_state_str(st->msg.vdiag_state), + sock_shutdown_str(st->msg.vdiag_shutdown), + st->msg.vdiag_ino); +} + +static void print_vsock_stats(FILE *fp, struct list_head *head) +{ + struct vsock_stat *st; + + list_for_each_entry(st, head, list) + print_vsock_stat(fp, st); +} + +static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd) +{ + struct vsock_stat *st; + struct stat stat; + + if (fstat(fd, &stat) < 0) { + perror("fstat"); + exit(EXIT_FAILURE); + } + + list_for_each_entry(st, head, list) + if (st->msg.vdiag_ino == stat.st_ino) + return st; + + fprintf(stderr, "cannot find fd %d\n", fd); + exit(EXIT_FAILURE); +} + +static void check_no_sockets(struct list_head *head) +{ + if (!list_empty(head)) { + fprintf(stderr, "expected no sockets\n"); + print_vsock_stats(stderr, head); + exit(1); + } +} + +static void check_num_sockets(struct list_head *head, int expected) +{ + struct list_head *node; + int n = 0; + + list_for_each(node, head) + n++; + + if (n != expected) { + fprintf(stderr, "expected %d sockets, found %d\n", + expected, n); + print_vsock_stats(stderr, head); + exit(EXIT_FAILURE); + } +} + +static void check_socket_state(struct vsock_stat *st, __u8 state) +{ + if (st->msg.vdiag_state != state) { + fprintf(stderr, "expected socket state %#x, got %#x\n", + state, st->msg.vdiag_state); + exit(EXIT_FAILURE); + } +} + +static void send_req(int fd) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK, + }; + struct { + struct nlmsghdr nlh; + struct vsock_diag_req vreq; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + }, + .vreq = { + .sdiag_family = AF_VSOCK, + .vdiag_states = ~(__u32)0, + }, + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req), + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + + perror("sendmsg"); + exit(EXIT_FAILURE); + } + + return; + } +} + +static ssize_t recv_resp(int fd, void *buf, size_t len) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK, + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + ssize_t ret; + + do { + ret = recvmsg(fd, &msg, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + perror("recvmsg"); + exit(EXIT_FAILURE); + } + + return ret; +} + +static void add_vsock_stat(struct list_head *sockets, + const struct vsock_diag_msg *resp) +{ + struct vsock_stat *st; + + st = malloc(sizeof(*st)); + if (!st) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + st->msg = *resp; + list_add_tail(&st->list, sockets); +} + +/* + * Read vsock stats into a list. + */ +static void read_vsock_stat(struct list_head *sockets) +{ + long buf[8192 / sizeof(long)]; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + send_req(fd); + + for (;;) { + const struct nlmsghdr *h; + ssize_t ret; + + ret = recv_resp(fd, buf, sizeof(buf)); + if (ret == 0) + goto done; + if (ret < sizeof(*h)) { + fprintf(stderr, "short read of %zd bytes\n", ret); + exit(EXIT_FAILURE); + } + + h = (struct nlmsghdr *)buf; + + while (NLMSG_OK(h, ret)) { + if (h->nlmsg_type == NLMSG_DONE) + goto done; + + if (h->nlmsg_type == NLMSG_ERROR) { + const struct nlmsgerr *err = NLMSG_DATA(h); + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + fprintf(stderr, "NLMSG_ERROR\n"); + else { + errno = -err->error; + perror("NLMSG_ERROR"); + } + + exit(EXIT_FAILURE); + } + + if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) { + fprintf(stderr, "unexpected nlmsg_type %#x\n", + h->nlmsg_type); + exit(EXIT_FAILURE); + } + if (h->nlmsg_len < + NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) { + fprintf(stderr, "short vsock_diag_msg\n"); + exit(EXIT_FAILURE); + } + + add_vsock_stat(sockets, NLMSG_DATA(h)); + + h = NLMSG_NEXT(h, ret); + } + } + +done: + close(fd); +} + +static void free_sock_stat(struct list_head *sockets) +{ + struct vsock_stat *st; + struct vsock_stat *next; + + list_for_each_entry_safe(st, next, sockets, list) + free(st); +} + +static void test_no_sockets(unsigned int peer_cid) +{ + LIST_HEAD(sockets); + + read_vsock_stat(&sockets); + + check_no_sockets(&sockets); + + free_sock_stat(&sockets); +} + +static void test_listen_socket_server(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + LIST_HEAD(sockets); + struct vsock_stat *st; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 1); + st = find_vsock_stat(&sockets, fd); + check_socket_state(st, TCP_LISTEN); + + close(fd); + free_sock_stat(&sockets); +} + +static void test_connect_client(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = peer_cid, + }, + }; + int fd; + int ret; + LIST_HEAD(sockets); + struct vsock_stat *st; + + control_expectln("LISTENING"); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 1); + st = find_vsock_stat(&sockets, fd); + check_socket_state(st, TCP_ESTABLISHED); + + control_expectln("DONE"); + control_writeln("DONE"); + + close(fd); + free_sock_stat(&sockets); +} + +static void test_connect_server(unsigned int peer_cid) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + socklen_t clientaddr_len = sizeof(clientaddr.svm); + LIST_HEAD(sockets); + struct vsock_stat *st; + int fd; + int client_fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTENING"); + + timeout_begin(TIMEOUT); + do { + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + timeout_check("accept"); + } while (client_fd < 0 && errno == EINTR); + timeout_end(); + + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + if (clientaddr.sa.sa_family != AF_VSOCK) { + fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n", + clientaddr.sa.sa_family); + exit(EXIT_FAILURE); + } + if (clientaddr.svm.svm_cid != peer_cid) { + fprintf(stderr, "expected peer CID %u from accept(2), got %u\n", + peer_cid, clientaddr.svm.svm_cid); + exit(EXIT_FAILURE); + } + + read_vsock_stat(&sockets); + + check_num_sockets(&sockets, 2); + find_vsock_stat(&sockets, fd); + st = find_vsock_stat(&sockets, client_fd); + check_socket_state(st, TCP_ESTABLISHED); + + control_writeln("DONE"); + control_expectln("DONE"); + + close(client_fd); + close(fd); + free_sock_stat(&sockets); +} + +static struct { + const char *name; + void (*run_client)(unsigned int peer_cid); + void (*run_server)(unsigned int peer_cid); +} test_cases[] = { + { + .name = "No sockets", + .run_server = test_no_sockets, + }, + { + .name = "Listen socket", + .run_server = test_listen_socket_server, + }, + { + .name = "Connect", + .run_client = test_connect_client, + .run_server = test_connect_server, + }, + {}, +}; + +static void init_signals(void) +{ + struct sigaction act = { + .sa_handler = sigalrm, + }; + + sigaction(SIGALRM, &act, NULL); + signal(SIGPIPE, SIG_IGN); +} + +static unsigned int parse_cid(const char *str) +{ + char *endptr = NULL; + unsigned long int n; + + errno = 0; + n = strtoul(str, &endptr, 10); + if (errno || *endptr != '\0') { + fprintf(stderr, "malformed CID \"%s\"\n", str); + exit(EXIT_FAILURE); + } + return n; +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid=\n" + "\n" + " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run vsock_diag.ko tests. Must be launched in both\n" + "guest and host. One side must use --mode=client and\n" + "the other side must use --mode=server.\n" + "\n" + "A TCP control socket connection is used to coordinate tests\n" + "between the client and the server. The server requires a\n" + "listen address and the client requires an address to\n" + "connect to.\n" + "\n" + "The CID of the other side must be given with --peer-cid=.\n"); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + int mode = TEST_MODE_UNSET; + unsigned int peer_cid = VMADDR_CID_ANY; + int i; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) + mode = TEST_MODE_CLIENT; + else if (strcmp(optarg, "server") == 0) + mode = TEST_MODE_SERVER; + else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + peer_cid = parse_cid(optarg); + break; + case 'P': + control_port = optarg; + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (mode == TEST_MODE_UNSET) + usage(); + if (peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, mode == TEST_MODE_SERVER); + + for (i = 0; test_cases[i].name; i++) { + void (*run)(unsigned int peer_cid); + + printf("%s...", test_cases[i].name); + fflush(stdout); + + if (mode == TEST_MODE_CLIENT) + run = test_cases[i].run_client; + else + run = test_cases[i].run_server; + + if (run) + run(peer_cid); + + printf("ok\n"); + } + + control_cleanup(); + return EXIT_SUCCESS; +} From 27204aaa9dc67b833b77179fdac556288ec3a4bf Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 4 Oct 2017 10:03:44 -0700 Subject: [PATCH 0558/2177] tcp: uniform the set up of sockets after successful connection Currently in the TCP code, the initialization sequence for cached metrics, congestion control, BPF, etc, after successful connection is very inconsistent. This introduces inconsistent bevhavior and is prone to bugs. The current call sequence is as follows: (1) for active case (tcp_finish_connect() case): tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); (2) for passive case (tcp_rcv_state_process() TCP_SYN_RECV case): icsk->icsk_af_ops->rebuild_header(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_mtup_init(sk); tcp_init_buffer_space(sk); tcp_init_metrics(sk); (3) for TFO passive case (tcp_fastopen_create_child()): inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_buffer_space(child); This commit uniforms the above functions to have the following sequence: tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE/PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); This sequence is the same as the (1) active case. We pick this sequence because this order correctly allows BPF to override the settings including congestion control module and initial cwnd, etc from the route, and then allows the CC module to see those settings. Suggested-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: Wei Wang Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/ipv4/tcp_fastopen.c | 7 +------ net/ipv4/tcp_input.c | 21 +++------------------ 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a3a8af56fd6..426c2e986016 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -416,6 +416,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); +void tcp_init_transfer(struct sock *sk, int bpf_op); unsigned int tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 23225c98d287..c115e37ca608 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -456,6 +456,18 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); +void tcp_init_transfer(struct sock *sk, int bpf_op) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_mtup_init(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_metrics(sk); + tcp_call_bpf(sk, bpf_op); + tcp_init_congestion_control(sk); + tcp_init_buffer_space(sk); +} + static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { if (tsflags && skb) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index de470e7e586f..29fff14d5a53 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -236,12 +236,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tcp_init_buffer_space(child); + tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index db9bb46b5776..bd3a35f5dbf2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5513,20 +5513,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) security_inet_conn_established(sk, skb); } - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - - tcp_init_metrics(sk); - tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); - tcp_init_congestion_control(sk); + tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ tp->lsndtime = tcp_jiffies32; - tcp_init_buffer_space(sk); - if (sock_flag(sk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); @@ -5693,7 +5686,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tcp_is_sack(tp) && sysctl_tcp_fack) tcp_enable_fack(tp); - tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); @@ -5920,14 +5912,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); } else { - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tcp_init_congestion_control(sk); - - tcp_mtup_init(sk); + tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; - tcp_init_buffer_space(sk); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); @@ -5957,8 +5943,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * are sent out. */ tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + } if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); From 6d05081e55a5a55b32ae6609f1902eca8277541d Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 4 Oct 2017 10:04:04 -0700 Subject: [PATCH 0559/2177] tcp: clean up TFO server's initial tcp_rearm_rto() call This commit does a cleanup and moves tcp_rearm_rto() call in the TFO server case into a previous spot in tcp_rcv_state_process() to make it more compact. This is only a cosmetic change. Suggested-by: Yuchung Cheng Signed-off-by: Wei Wang Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bd3a35f5dbf2..c5b8d61846c2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5911,6 +5911,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (req) { inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. + */ + tcp_rearm_rto(sk); } else { tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; @@ -5933,18 +5942,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - if (req) { - /* Re-arm the timer because data may have been sent out. - * This is similar to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive and - * retransmitting any data sooner based on when they - * are sent out. - */ - tcp_rearm_rto(sk); - } - if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); From 1bcdca3ffbbbed0d09b46cf4263ed2593a418375 Mon Sep 17 00:00:00 2001 From: Tim Hansen Date: Wed, 4 Oct 2017 15:59:49 -0400 Subject: [PATCH 0560/2177] net/ipv4: Remove unused variable in route.c int rc is unmodified after initalization in net/ipv4/route.c, this patch simply cleans up that variable and returns 0. This was found with coccicheck M=net/ipv4/ on linus' tree. Signed-off-by: Tim Hansen Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6fde5d45f1..1c7ed77968c9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3038,7 +3038,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; int __init ip_rt_init(void) { - int rc = 0; int cpu; ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); @@ -3095,7 +3094,7 @@ int __init ip_rt_init(void) #endif register_pernet_subsys(&rt_genid_ops); register_pernet_subsys(&ipv4_inetpeer_ops); - return rc; + return 0; } #ifdef CONFIG_SYSCTL From 9dff9936f0ccfffba5106ee4217c71c7bcf95143 Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Wed, 4 Oct 2017 12:10:43 -0700 Subject: [PATCH 0561/2177] RDS: IB: Limit the scope of has_fr/has_fmr variables This patch fixes the scope of has_fr and has_fmr variables as they are needed only in rds_ib_add_one(). Signed-off-by: Avinash Repaka Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib.c | 11 ++++++----- net/rds/ib.h | 2 -- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index a0954ace3774..36dd2099048a 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) static void rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; + bool has_fr, has_fmr; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) @@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); - rds_ibdev->has_fr = (device->attrs.device_cap_flags & - IB_DEVICE_MEM_MGT_EXTENSIONS); - rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); - rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr); + has_fr = (device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + rds_ibdev->use_fastreg = (has_fr && !has_fmr); rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; rds_ibdev->max_1m_mrs = device->attrs.max_mr ? diff --git a/net/rds/ib.h b/net/rds/ib.h index bf4822407567..6ea6a27891b0 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -215,8 +215,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - bool has_fmr; - bool has_fr; bool use_fastreg; unsigned int max_mrs; From b1fb67fa501c4787035317f84db6caf013385581 Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Wed, 4 Oct 2017 12:11:29 -0700 Subject: [PATCH 0562/2177] RDS: IB: Initialize max_items based on underlying device attributes Use max_1m_mrs/max_8k_mrs while setting max_items, as the former variables are set based on the underlying device attributes. Signed-off-by: Avinash Repaka Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 9a3c54e659e9..e678699268a2 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; - pool->max_items = RDS_MR_1M_POOL_SIZE; + pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; - pool->max_items = RDS_MR_8K_POOL_SIZE; + pool->max_items = rds_ibdev->max_8k_mrs; } pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; From e2080072ed2d98a55ae69d95dea60ff7a17cddd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Oct 2017 12:59:58 -0700 Subject: [PATCH 0563/2177] tcp: new list for sent but unacked skbs for RACK recovery This patch adds a new queue (list) that tracks the sent but not yet acked or SACKed skbs for a TCP connection. The list is chronologically ordered by skb->skb_mstamp (the head is the oldest sent skb). This list will be used to optimize TCP Rack recovery, which checks an skb's timestamp to judge if it has been lost and needs to be retransmitted. Since TCP write queue is ordered by sequence instead of sent time, RACK has to scan over the write queue to catch all eligible packets to detect lost retransmission, and iterates through SACKed skbs repeatedly. Special cares for rare events: 1. TCP repair fakes skb transmission so the send queue needs adjusted 2. SACK reneging would require re-inserting SACKed skbs into the send queue. For now I believe it's not worth the complexity to make RACK work perfectly on SACK reneging, so we do nothing here. 3. Fast Open: currently for non-TFO, send-queue correctly queues the pure SYN packet. For TFO which queues a pure SYN and then a data packet, send-queue only queues the data packet but not the pure SYN due to the structure of TFO code. This is okay because the SYN receiver would never respond with a SACK on a missing SYN (i.e. SYN is never fast-retransmitted by SACK/RACK). In order to not grow sk_buff, we use an union for the new list and _skb_refdst/destructor fields. This is a bit complicated because we need to make sure _skb_refdst and destructor are properly zeroed before skb is cloned/copied at transmit, and before being freed. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- include/linux/tcp.h | 1 + include/net/tcp.h | 24 ++++++++++++++++++++++- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_input.c | 9 +++++++-- net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 42 +++++++++++++++++++++++++++++----------- 7 files changed, 74 insertions(+), 16 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ada821466e88..01a985937867 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -617,6 +617,7 @@ typedef unsigned char *sk_buff_data_t; * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function + * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on @@ -686,8 +687,14 @@ struct sk_buff { */ char cb[48] __aligned(8); - unsigned long _skb_refdst; - void (*destructor)(struct sk_buff *skb); + union { + struct { + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + }; + struct list_head tcp_tsorted_anchor; + }; + #ifdef CONFIG_XFRM struct sec_path *sp; #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4aa40ef02d32..1d2c44e09e31 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -191,6 +191,7 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ + struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 426c2e986016..3b16f353b539 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1589,14 +1589,34 @@ enum tcp_chrono { void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); +/* This helper is needed, because skb->tcp_tsorted_anchor uses + * the same memory storage than skb->destructor/_skb_refdst + */ +static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) +{ + skb->destructor = NULL; + skb->_skb_refdst = 0UL; +} + +#define tcp_skb_tsorted_save(skb) { \ + unsigned long _save = skb->_skb_refdst; \ + skb->_skb_refdst = 0UL; + +#define tcp_skb_tsorted_restore(skb) \ + skb->_skb_refdst = _save; \ +} + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + tcp_skb_tsorted_anchor_cleanup(skb); sk_wmem_free_skb(sk, skb); + } + INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); } @@ -1711,6 +1731,8 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { + list_del(&skb->tcp_tsorted_anchor); + tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c115e37ca608..8cf742fd4f99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -415,6 +415,7 @@ void tcp_init_sock(struct sock *sk) tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); INIT_LIST_HEAD(&tp->tsq_node); + INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); @@ -881,6 +882,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * available to the caller, no more, no less. */ skb->reserved_tailroom = skb->end - skb->tail - size; + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } __kfree_skb(skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5b8d61846c2..fb0d7ed84b94 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1593,6 +1593,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, tcp_skb_pcount(skb), skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + list_del_init(&skb->tcp_tsorted_anchor); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -3054,8 +3056,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, shinfo = skb_shinfo(skb); if (!before(shinfo->tskey, prior_snd_una) && - before(shinfo->tskey, tcp_sk(sk)->snd_una)) - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + before(shinfo->tskey, tcp_sk(sk)->snd_una)) { + tcp_skb_tsorted_save(skb) { + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + } tcp_skb_tsorted_restore(skb); + } } /* Remove acknowledged frames from the retransmission queue. If our packet diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 188a6f31356d..2341b9f857b6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -446,6 +446,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; INIT_LIST_HEAD(&newtp->tsq_node); + INIT_LIST_HEAD(&newtp->tsorted_sent_queue); tcp_init_wl(newtp, treq->rcv_isn); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46a5369..8162e2880178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -971,6 +971,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) HRTIMER_MODE_ABS_PINNED); } +static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) +{ + skb->skb_mstamp = tp->tcp_mstamp; + list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -1003,10 +1009,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; oskb = skb; - if (unlikely(skb_cloned(skb))) - skb = pskb_copy(skb, gfp_mask); - else - skb = skb_clone(skb, gfp_mask); + + tcp_skb_tsorted_save(oskb) { + if (unlikely(skb_cloned(oskb))) + skb = pskb_copy(oskb, gfp_mask); + else + skb = skb_clone(oskb, gfp_mask); + } tcp_skb_tsorted_restore(oskb); + if (unlikely(!skb)) return -ENOBUFS; } @@ -1127,7 +1137,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, err = net_xmit_eval(err); } if (!err && oskb) { - oskb->skb_mstamp = tp->tcp_mstamp; + tcp_update_skb_after_send(tp, oskb); tcp_rate_skb_sent(sk, oskb); } return err; @@ -1328,6 +1338,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Link BUFF into the send queue. */ __skb_header_release(buff); tcp_insert_write_queue_after(skb, buff, sk); + list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); return 0; } @@ -2260,7 +2271,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - skb->skb_mstamp = tp->tcp_mstamp; + tcp_update_skb_after_send(tp, skb); goto repair; /* Skip network transmission */ } @@ -2838,11 +2849,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + tcp_skb_tsorted_save(skb) { + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; + } tcp_skb_tsorted_restore(skb); + if (!err) - skb->skb_mstamp = tp->tcp_mstamp; + tcp_update_skb_after_send(tp, skb); } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } @@ -3023,6 +3037,7 @@ coalesce: goto coalesce; return; } + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); skb_reserve(skb, MAX_TCP_HEADER); sk_forced_mem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ @@ -3078,9 +3093,14 @@ int tcp_send_synack(struct sock *sk) } if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + struct sk_buff *nskb; + + tcp_skb_tsorted_save(skb) { + nskb = skb_copy(skb, GFP_ATOMIC); + } tcp_skb_tsorted_restore(skb); if (!nskb) return -ENOMEM; + INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); tcp_unlink_write_queue(skb, sk); __skb_header_release(nskb); __tcp_add_write_queue_head(sk, nskb); From 043b87d7599ed8e86a33f4cbc3f062d57e263711 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 4 Oct 2017 12:59:59 -0700 Subject: [PATCH 0564/2177] tcp: more efficient RACK loss detection Use the new time-ordered list to speed up RACK. The detection logic is identical. But since the list is chronologically ordered by skb_mstamp and contains only skbs not yet acked or sacked, RACK can abort the loop upon hitting skbs that were sent more recently. On YouTube servers this patch reduces the iterations on write queue by 40x. The improvement is even bigger with large BDP networks. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 449cd914d58e..8aa56caefde8 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -45,7 +45,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; + struct sk_buff *skb, *n; u32 reo_wnd; *reo_timeout = 0; @@ -58,17 +58,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); - tcp_for_write_queue(skb, sk) { + list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, + tcp_tsorted_anchor) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - if (skb == tcp_send_head(sk)) - break; - - /* Skip ones already (s)acked */ - if (!after(scb->end_seq, tp->snd_una) || - scb->sacked & TCPCB_SACKED_ACKED) - continue; - if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, tp->rack.end_seq, scb->end_seq)) { /* Step 3 in draft-cheng-tcpm-rack-00.txt: @@ -81,6 +74,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) if (remaining < 0) { tcp_rack_mark_skb_lost(sk, skb); + list_del_init(&skb->tcp_tsorted_anchor); continue; } @@ -91,11 +85,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) /* Record maximum wait time (+1 to avoid 0) */ *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); - - } else if (!(scb->sacked & TCPCB_RETRANS)) { - /* Original data are sent sequentially so stop early - * b/c the rest are all sent after rack_sent - */ + } else { break; } } From bef06223083b81d2064824afe2bc85be416ab73a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 4 Oct 2017 13:00:00 -0700 Subject: [PATCH 0565/2177] tcp: a small refactor of RACK loss detection Refactor the RACK loop to improve readability and speed up the checks. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 8aa56caefde8..cda6074a429a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -61,32 +61,28 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + s32 remaining; - if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, - tp->rack.end_seq, scb->end_seq)) { - /* Step 3 in draft-cheng-tcpm-rack-00.txt: - * A packet is lost if its elapsed time is beyond - * the recent RTT plus the reordering window. - */ - u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp, - skb->skb_mstamp); - s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; + /* Skip ones marked lost but not yet retransmitted */ + if ((scb->sacked & TCPCB_LOST) && + !(scb->sacked & TCPCB_SACKED_RETRANS)) + continue; - if (remaining < 0) { - tcp_rack_mark_skb_lost(sk, skb); - list_del_init(&skb->tcp_tsorted_anchor); - continue; - } - - /* Skip ones marked lost but not yet retransmitted */ - if ((scb->sacked & TCPCB_LOST) && - !(scb->sacked & TCPCB_SACKED_RETRANS)) - continue; + if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, + tp->rack.end_seq, scb->end_seq)) + break; + /* A packet is lost if it has not been s/acked beyond + * the recent RTT plus the reordering window. + */ + remaining = tp->rack.rtt_us + reo_wnd - + tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); + if (remaining < 0) { + tcp_rack_mark_skb_lost(sk, skb); + list_del_init(&skb->tcp_tsorted_anchor); + } else { /* Record maximum wait time (+1 to avoid 0) */ *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); - } else { - break; } } } From d009313c99ba575b65a944fe2c683c6346ea1721 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Oct 2017 10:10:23 +0100 Subject: [PATCH 0566/2177] net: qcom/emac: make function emac_isr static The function emac_isr is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warnings: symbol 'emac_isr' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 759543512117..f477ba29c569 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -130,7 +130,7 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev) return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb); } -irqreturn_t emac_isr(int _irq, void *data) +static irqreturn_t emac_isr(int _irq, void *data) { struct emac_irq *irq = data; struct emac_adapter *adpt = From b13c5c14dbfd5923d773de9661404ed9600c53ef Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 5 Oct 2017 10:41:57 -0400 Subject: [PATCH 0567/2177] libbpf: parse maps sections of varying size This library previously assumed a fixed-size map options structure. Any new options were ignored. In order to allow the options structure to grow and to support parsing older programs, this patch updates the maps section parsing to handle varying sizes. Object files with maps sections smaller than expected will have the new fields initialized to zero. Object files which have larger than expected maps sections will be rejected unless all of the unrecognized data is zero. This change still assumes that each map definition in the maps section is the same size. Signed-off-by: Craig Gallek Acked-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/libbpf.c | 70 +++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4f402dcdf372..23152890ec60 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -579,31 +579,6 @@ bpf_object__init_kversion(struct bpf_object *obj, return 0; } -static int -bpf_object__validate_maps(struct bpf_object *obj) -{ - int i; - - /* - * If there's only 1 map, the only error case should have been - * catched in bpf_object__init_maps(). - */ - if (!obj->maps || !obj->nr_maps || (obj->nr_maps == 1)) - return 0; - - for (i = 1; i < obj->nr_maps; i++) { - const struct bpf_map *a = &obj->maps[i - 1]; - const struct bpf_map *b = &obj->maps[i]; - - if (b->offset - a->offset < sizeof(struct bpf_map_def)) { - pr_warning("corrupted map section in %s: map \"%s\" too small\n", - obj->path, a->name); - return -EINVAL; - } - } - return 0; -} - static int compare_bpf_map(const void *_a, const void *_b) { const struct bpf_map *a = _a; @@ -615,7 +590,7 @@ static int compare_bpf_map(const void *_a, const void *_b) static int bpf_object__init_maps(struct bpf_object *obj) { - int i, map_idx, nr_maps = 0; + int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; Elf_Data *data; Elf_Data *symbols = obj->efile.symbols; @@ -658,6 +633,15 @@ bpf_object__init_maps(struct bpf_object *obj) if (!nr_maps) return 0; + /* Assume equally sized map definitions */ + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } + obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { pr_warning("alloc maps for object failed\n"); @@ -690,7 +674,7 @@ bpf_object__init_maps(struct bpf_object *obj) obj->efile.strtabidx, sym.st_name); obj->maps[map_idx].offset = sym.st_value; - if (sym.st_value + sizeof(struct bpf_map_def) > data->d_size) { + if (sym.st_value + map_def_sz > data->d_size) { pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -704,12 +688,40 @@ bpf_object__init_maps(struct bpf_object *obj) pr_debug("map %d is \"%s\"\n", map_idx, obj->maps[map_idx].name); def = (struct bpf_map_def *)(data->d_buf + sym.st_value); - obj->maps[map_idx].def = *def; + /* + * If the definition of the map in the object file fits in + * bpf_map_def, copy it. Any extra fields in our version + * of bpf_map_def will default to zero as a result of the + * calloc above. + */ + if (map_def_sz <= sizeof(struct bpf_map_def)) { + memcpy(&obj->maps[map_idx].def, def, map_def_sz); + } else { + /* + * Here the map structure being read is bigger than what + * we expect, truncate if the excess bits are all zero. + * If they are not zero, reject this map as + * incompatible. + */ + char *b; + for (b = ((char *)def) + sizeof(struct bpf_map_def); + b < ((char *)def) + map_def_sz; b++) { + if (*b != 0) { + pr_warning("maps section in %s: \"%s\" " + "has unrecognized, non-zero " + "options\n", + obj->path, map_name); + return -EINVAL; + } + } + memcpy(&obj->maps[map_idx].def, def, + sizeof(struct bpf_map_def)); + } map_idx++; } qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); - return bpf_object__validate_maps(obj); + return 0; } static int bpf_object__elf_collect(struct bpf_object *obj) From fe9b5f774b28143dcc2957eadffefdca6171a26a Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 5 Oct 2017 10:41:58 -0400 Subject: [PATCH 0568/2177] libbpf: use map_flags when creating maps This is required to use BPF_MAP_TYPE_LPM_TRIE or any other map type which requires flags. Signed-off-by: Craig Gallek Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 23152890ec60..5aa45f89da93 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -942,7 +942,7 @@ bpf_object__create_maps(struct bpf_object *obj) def->key_size, def->value_size, def->max_entries, - 0); + def->map_flags); if (*pfd < 0) { size_t j; int err = *pfd; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 7959086eb9c9..6e20003109e0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -207,6 +207,7 @@ struct bpf_map_def { unsigned int key_size; unsigned int value_size; unsigned int max_entries; + unsigned int map_flags; }; /* From 380537b4f7f2e706e499695b38eabc90a5f72fa8 Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 02:07:08 +0800 Subject: [PATCH 0569/2177] net: ipv6: remove unused code in ipv6_find_hdr() Storing the left length of skb into 'len' actually has no effect so we can remove it. Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- net/ipv6/exthdrs_core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 115d60919f72..11025f8d124b 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len; bool found; if (fragoff) @@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } - len = skb->len - start; do { struct ipv6_opt_hdr _hdr, *hp; @@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (!found) { nexthdr = hp->nexthdr; - len -= hdrlen; start += hdrlen; } } while (!found); From cc71b7b071192ac1c288e272fdc3f3877eb96663 Mon Sep 17 00:00:00 2001 From: Tim Hansen Date: Thu, 5 Oct 2017 15:45:32 -0400 Subject: [PATCH 0570/2177] net/ipv6: remove unused err variable on icmpv6_push_pending_frames int err is unused by icmpv6_push_pending_frames(), this patch returns removes the variable and returns the function with 0. git bisect shows this variable has been around since linux has been in git in commit 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2. This was found by running make coccicheck M=net/ipv6/ on linus' tree on commit 77ede3a014a32746002f7889211f0cecf4803163 (current HEAD as of this patch). Signed-off-by: Tim Hansen Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5acb54405b10..aeb49b4d8c7d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -255,7 +255,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, { struct sk_buff *skb; struct icmp6hdr *icmp6h; - int err = 0; skb = skb_peek(&sk->sk_write_queue); if (!skb) @@ -288,7 +287,7 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } ip6_push_pending_frames(sk); out: - return err; + return 0; } struct icmpv6_msg { From 4c02d62fa37a05254a87575c7d430819f77bd6c9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Oct 2017 10:39:10 -0700 Subject: [PATCH 0571/2177] net/mac80211/mesh_plink: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. This requires adding a pointer back to the sta_info since container_of() can't resolve the sta_info. Cc: Johannes Berg Cc: "David S. Miller" Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Signed-off-by: Johannes Berg --- net/mac80211/mesh.h | 1 + net/mac80211/mesh_plink.c | 10 ++++------ net/mac80211/sta_info.c | 4 +++- net/mac80211/sta_info.h | 2 ++ 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7e5f271e3c30..465b7853edc0 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +void mesh_plink_timer(struct timer_list *t); void mesh_plink_broken(struct sta_info *sta); u32 mesh_plink_deactivate(struct sta_info *sta); u32 mesh_plink_open(struct sta_info *sta); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dc8e10f87207..e2d00cce3c17 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -603,8 +603,9 @@ out: ieee80211_mbss_info_change_notify(sdata, changed); } -static void mesh_plink_timer(unsigned long data) +void mesh_plink_timer(struct timer_list *t) { + struct mesh_sta *mesh = from_timer(mesh, t, plink_timer); struct sta_info *sta; u16 reason = 0; struct ieee80211_sub_if_data *sdata; @@ -616,7 +617,7 @@ static void mesh_plink_timer(unsigned long data) * del_timer_sync() this timer after having made sure * it cannot be readded (by deleting the plink.) */ - sta = (struct sta_info *) data; + sta = mesh->plink_sta; if (sta->sdata->local->quiescing) return; @@ -696,11 +697,8 @@ static void mesh_plink_timer(unsigned long data) static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout) { - sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout); - sta->mesh->plink_timer.data = (unsigned long) sta; - sta->mesh->plink_timer.function = mesh_plink_timer; sta->mesh->plink_timeout = timeout; - add_timer(&sta->mesh->plink_timer); + mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout)); } static bool llid_in_use(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ffcd25c4908c..9673e157bf8f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); if (!sta->mesh) goto free; + sta->mesh->plink_sta = sta; spin_lock_init(&sta->mesh->plink_lock); if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) - init_timer(&sta->mesh->plink_timer); + timer_setup(&sta->mesh->plink_timer, mesh_plink_timer, + 0); sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; } #endif diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a35c964f6217..5c54acd10562 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer + * @plink_sta: peer link watch timer's sta_info * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift @@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) */ struct mesh_sta { struct timer_list plink_timer; + struct sta_info *plink_sta; s64 t_offset; s64 t_offset_setpoint; From 3669cd31927b8229096622e046bb0f6020328b2b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 16 Aug 2017 14:15:08 +0300 Subject: [PATCH 0572/2177] iwlwifi: mvm: add dbgfs entry for fw info Add a dbgfs entry for an easy way during runtime to check what FW file was loaded, and get some general FW-related data. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index e97904c2c4d4..2ff594f11259 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -660,6 +660,36 @@ out: return ret ?: count; } +static ssize_t iwl_dbgfs_fw_ver_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + char *buff, *pos, *endpos; + static const size_t bufsz = 1024; + int ret; + + buff = kmalloc(bufsz, GFP_KERNEL); + if (!buff) + return -ENOMEM; + + pos = buff; + endpos = pos + bufsz; + + pos += scnprintf(pos, endpos - pos, "FW prefix: %s\n", + mvm->trans->cfg->fw_name_pre); + pos += scnprintf(pos, endpos - pos, "FW: %s\n", + mvm->fwrt.fw->human_readable); + pos += scnprintf(pos, endpos - pos, "Device: %s\n", + mvm->fwrt.trans->cfg->name); + pos += scnprintf(pos, endpos - pos, "Bus: %s\n", + mvm->fwrt.dev->bus->name); + + ret = simple_read_from_buffer(user_buf, count, ppos, buff, pos - buff); + kfree(buff); + + return ret; +} + #define PRINT_STATS_LE32(_struct, _memb) \ pos += scnprintf(buf + pos, bufsz - pos, \ fmt_table, #_memb, \ @@ -1662,6 +1692,7 @@ MVM_DEBUGFS_READ_FILE_OPS(bt_cmd); MVM_DEBUGFS_READ_WRITE_FILE_OPS(disable_power_off, 64); MVM_DEBUGFS_READ_FILE_OPS(fw_rx_stats); MVM_DEBUGFS_READ_FILE_OPS(drv_rx_stats); +MVM_DEBUGFS_READ_FILE_OPS(fw_ver); MVM_DEBUGFS_WRITE_FILE_OPS(fw_restart, 10); MVM_DEBUGFS_WRITE_FILE_OPS(fw_nmi, 10); MVM_DEBUGFS_WRITE_FILE_OPS(bt_tx_prio, 10); @@ -1843,6 +1874,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, S_IRUSR); MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir, S_IRUSR | S_IWUSR); + MVM_DEBUGFS_ADD_FILE(fw_ver, mvm->debugfs_dir, S_IRUSR); MVM_DEBUGFS_ADD_FILE(fw_rx_stats, mvm->debugfs_dir, S_IRUSR); MVM_DEBUGFS_ADD_FILE(drv_rx_stats, mvm->debugfs_dir, S_IRUSR); MVM_DEBUGFS_ADD_FILE(fw_restart, mvm->debugfs_dir, S_IWUSR); From d621d3c73391fc49c5bbbf823745f0165981119d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 30 Mar 2016 22:09:34 +0300 Subject: [PATCH 0573/2177] iwlwifi: trans: move ref/unref code to the common part of the transport De-inline iwl_trans_ref/unref and move it to common transport code in preparation for more common code to come to these functions. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 16 ++++++++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 14 ++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 784bdd0ed233..7e9c924e1220 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -205,3 +207,17 @@ int iwl_cmd_groups_verify_sorted(const struct iwl_trans_config *trans) return 0; } IWL_EXPORT_SYMBOL(iwl_cmd_groups_verify_sorted); + +void iwl_trans_ref(struct iwl_trans *trans) +{ + if (trans->ops->ref) + trans->ops->ref(trans); +} +IWL_EXPORT_SYMBOL(iwl_trans_ref); + +void iwl_trans_unref(struct iwl_trans *trans) +{ + if (trans->ops->unref) + trans->ops->unref(trans); +} +IWL_EXPORT_SYMBOL(iwl_trans_unref); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index e90abbfba718..91ec077900f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -875,18 +875,6 @@ static inline int iwl_trans_d3_resume(struct iwl_trans *trans, return trans->ops->d3_resume(trans, status, test, reset); } -static inline void iwl_trans_ref(struct iwl_trans *trans) -{ - if (trans->ops->ref) - trans->ops->ref(trans); -} - -static inline void iwl_trans_unref(struct iwl_trans *trans) -{ - if (trans->ops->unref) - trans->ops->unref(trans); -} - static inline int iwl_trans_suspend(struct iwl_trans *trans) { if (!trans->ops->suspend) @@ -1191,6 +1179,8 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, const struct iwl_cfg *cfg, const struct iwl_trans_ops *ops); void iwl_trans_free(struct iwl_trans *trans); +void iwl_trans_ref(struct iwl_trans *trans); +void iwl_trans_unref(struct iwl_trans *trans); /***************************************************** * driver (transport) register/unregister functions From f7f5873bbd45a67d3097dfb55237ade2ad520184 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Mon, 28 Aug 2017 10:33:38 +0300 Subject: [PATCH 0574/2177] iwlwifi: fix wrong struct for a000 device The PCI ID (0x2720, 0x0070) was set with the config struct iwla000_2ax_cfg_hr instead of iwla000_2ac_cfg_hr_cdb. Fixes: 175b87c69253 ("iwlwifi: add the new a000_2ax series") Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 858765fed8f8..3416c6155996 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -576,7 +576,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x0000, iwla000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x34F0, 0x0070, iwla000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0078, iwla000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x1080, iwla000_2ax_cfg_hr)}, #endif /* CONFIG_IWLMVM */ From d048b36b9654c4e0cf0d3576be2d1ed2a3084c6f Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 31 Aug 2017 13:15:09 +0300 Subject: [PATCH 0575/2177] iwlwifi: add a new a000 device Add a new a000 device with PCI ID (0x2720, 0x0030). Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3416c6155996..e0966f656376 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -577,6 +577,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x0070, iwla000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0078, iwla000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x0030, iwla000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x1080, iwla000_2ax_cfg_hr)}, #endif /* CONFIG_IWLMVM */ From 72cbb73e8ad34b2d4409156546678763e91c71b6 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Wed, 30 Aug 2017 16:23:14 +0300 Subject: [PATCH 0576/2177] iwlwifi: mvm: Add new quota command API New quota command adds a field indicating low latency direction per quota. A TLV API bit was added to indicate the new API. Signed-off-by: David Spinadel Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/binding.h | 43 ++++++++++++-- drivers/net/wireless/intel/iwlwifi/fw/file.h | 3 + drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 16 +++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 27 +++++++++ .../net/wireless/intel/iwlwifi/mvm/quota.c | 59 +++++++++++-------- 5 files changed, 114 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h index d2717fafdf5b..570f19026c91 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h @@ -116,14 +116,14 @@ struct iwl_binding_cmd { #define IWL_MVM_MAX_QUOTA 128 /** - * struct iwl_time_quota_data - configuration of time quota per binding + * struct iwl_time_quota_data_v1 - configuration of time quota per binding * @id_and_color: ID and color of the relevant Binding, * &enum iwl_ctxt_id_and_color * @quota: absolute time quota in TU. The scheduler will try to divide the * remainig quota (after Time Events) according to this quota. * @max_duration: max uninterrupted context duration in TU */ -struct iwl_time_quota_data { +struct iwl_time_quota_data_v1 { __le32 id_and_color; __le32 quota; __le32 max_duration; @@ -137,8 +137,43 @@ struct iwl_time_quota_data { * essentially zero. * On CDB the fourth one is a regular binding. */ -struct iwl_time_quota_cmd { - struct iwl_time_quota_data quotas[MAX_BINDINGS]; +struct iwl_time_quota_cmd_v1 { + struct iwl_time_quota_data_v1 quotas[MAX_BINDINGS]; } __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */ +enum iwl_quota_low_latency { + IWL_QUOTA_LOW_LATENCY_NONE = 0, + IWL_QUOTA_LOW_LATENCY_TX = BIT(0), + IWL_QUOTA_LOW_LATENCY_RX = BIT(1), + IWL_QUOTA_LOW_LATENCY_TX_RX = + IWL_QUOTA_LOW_LATENCY_TX | IWL_QUOTA_LOW_LATENCY_RX, +}; + +/** + * struct iwl_time_quota_data - configuration of time quota per binding + * @id_and_color: ID and color of the relevant Binding. + * @quota: absolute time quota in TU. The scheduler will try to divide the + * remainig quota (after Time Events) according to this quota. + * @max_duration: max uninterrupted context duration in TU + * @low_latency: low latency status, &enum iwl_quota_low_latency + */ +struct iwl_time_quota_data { + __le32 id_and_color; + __le32 quota; + __le32 max_duration; + __le32 low_latency; +} __packed; /* TIME_QUOTA_DATA_API_S_VER_2 */ + +/** + * struct iwl_time_quota_cmd - configuration of time quota between bindings + * ( TIME_QUOTA_CMD = 0x2c ) + * Note: on non-CDB the fourth one is the auxilary mac and is essentially zero. + * On CDB the fourth one is a regular binding. + * + * @quotas: allocations per binding + */ +struct iwl_time_quota_cmd { + struct iwl_time_quota_data quotas[MAX_BINDINGS]; +} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_2 */ + #endif /* __iwl_fw_api_binding_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 279248cd9cfb..efd7fb65de8b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -248,6 +248,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t; * @IWL_UCODE_TLV_API_NEW_RX_STATS: should new RX STATISTICS API be used * @IWL_UCODE_TLV_API_ATS_COEX_EXTERNAL: the coex notification is enlared to * include information about ACL time sharing. + * @IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY: Quota command includes a field + * indicating low latency direction. * * @NUM_IWL_UCODE_TLV_API: number of bits used */ @@ -265,6 +267,7 @@ enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE = (__force iwl_ucode_tlv_api_t)34, IWL_UCODE_TLV_API_NEW_RX_STATS = (__force iwl_ucode_tlv_api_t)35, IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL = (__force iwl_ucode_tlv_api_t)37, + IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY = (__force iwl_ucode_tlv_api_t)38, NUM_IWL_UCODE_TLV_API #ifdef __CHECKER__ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 5de19ea10575..c5ea3fad8002 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -664,6 +664,7 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif, int ret, i; struct iwl_binding_cmd binding_cmd = {}; struct iwl_time_quota_cmd quota_cmd = {}; + struct iwl_time_quota_data *quota; u32 status; int size; @@ -745,17 +746,20 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return ret; /* and some quota */ - quota_cmd.quotas[0].id_and_color = + quota = iwl_mvm_quota_cmd_get_quota(mvm, "a_cmd, 0); + quota->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->phy_ctxt->id, mvmvif->phy_ctxt->color)); - quota_cmd.quotas[0].quota = cpu_to_le32(IWL_MVM_MAX_QUOTA); - quota_cmd.quotas[0].max_duration = cpu_to_le32(IWL_MVM_MAX_QUOTA); + quota->quota = cpu_to_le32(IWL_MVM_MAX_QUOTA); + quota->max_duration = cpu_to_le32(IWL_MVM_MAX_QUOTA); - for (i = 1; i < MAX_BINDINGS; i++) - quota_cmd.quotas[i].id_and_color = cpu_to_le32(FW_CTXT_INVALID); + for (i = 1; i < MAX_BINDINGS; i++) { + quota = iwl_mvm_quota_cmd_get_quota(mvm, "a_cmd, i); + quota->id_and_color = cpu_to_le32(FW_CTXT_INVALID); + } ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0, - sizeof(quota_cmd), "a_cmd); + iwl_mvm_quota_cmd_size(mvm), "a_cmd); if (ret) IWL_ERR(mvm, "Failed to send quota: %d\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 83303bac0e4b..48cb08eea700 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1248,6 +1248,12 @@ static inline bool iwl_mvm_has_new_ats_coex_api(struct iwl_mvm *mvm) IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL); } +static inline bool iwl_mvm_has_quota_low_latency(struct iwl_mvm *mvm) +{ + return fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY); +} + static inline struct agg_tx_status * iwl_mvm_get_agg_status(struct iwl_mvm *mvm, void *tx_resp) { @@ -1486,6 +1492,27 @@ int iwl_mvm_binding_add_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); /* Quota management */ +static inline size_t iwl_mvm_quota_cmd_size(struct iwl_mvm *mvm) +{ + return iwl_mvm_has_quota_low_latency(mvm) ? + sizeof(struct iwl_time_quota_cmd) : + sizeof(struct iwl_time_quota_cmd_v1); +} + +static inline struct iwl_time_quota_data +*iwl_mvm_quota_cmd_get_quota(struct iwl_mvm *mvm, + struct iwl_time_quota_cmd *cmd, + int i) +{ + struct iwl_time_quota_data_v1 *quotas; + + if (iwl_mvm_has_quota_low_latency(mvm)) + return &cmd->quotas[i]; + + quotas = (struct iwl_time_quota_data_v1 *)cmd->quotas; + return (struct iwl_time_quota_data *)"as[i]; +} + int iwl_mvm_update_quotas(struct iwl_mvm *mvm, bool force_upload, struct ieee80211_vif *disabled_vif); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c index 2141db5bff82..b4a0264329b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2016 Intel Deutschland GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,7 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2016 Intel Deutschland GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -164,9 +164,12 @@ static void iwl_mvm_adjust_quota_for_noa(struct iwl_mvm *mvm, beacon_int = mvm->noa_vif->bss_conf.beacon_int; for (i = 0; i < MAX_BINDINGS; i++) { - u32 id_n_c = le32_to_cpu(cmd->quotas[i].id_and_color); + struct iwl_time_quota_data *data = + iwl_mvm_quota_cmd_get_quota(mvm, cmd, + i); + u32 id_n_c = le32_to_cpu(data->id_and_color); u32 id = (id_n_c & FW_CTXT_ID_MSK) >> FW_CTXT_ID_POS; - u32 quota = le32_to_cpu(cmd->quotas[i].quota); + u32 quota = le32_to_cpu(data->quota); if (id != phy_id) continue; @@ -175,9 +178,9 @@ static void iwl_mvm_adjust_quota_for_noa(struct iwl_mvm *mvm, quota /= beacon_int; IWL_DEBUG_QUOTA(mvm, "quota: adjust for NoA from %d to %d\n", - le32_to_cpu(cmd->quotas[i].quota), quota); + le32_to_cpu(data->quota), quota); - cmd->quotas[i].quota = cpu_to_le32(quota); + data->quota = cpu_to_le32(quota); } #endif } @@ -194,6 +197,7 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, .disabled_vif = disabled_vif, }; struct iwl_time_quota_cmd *last = &mvm->last_quota_cmd; + struct iwl_time_quota_data *qdata, *last_data; bool send = false; lockdep_assert_held(&mvm->mutex); @@ -216,7 +220,8 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, */ num_active_macs = 0; for (i = 0; i < MAX_BINDINGS; i++) { - cmd.quotas[i].id_and_color = cpu_to_le32(FW_CTXT_INVALID); + qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i); + qdata->id_and_color = cpu_to_le32(FW_CTXT_INVALID); num_active_macs += data.n_interfaces[i]; } @@ -265,14 +270,16 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, if (data.colors[i] < 0) continue; - cmd.quotas[idx].id_and_color = + qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, idx); + + qdata->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(i, data.colors[i])); if (data.n_interfaces[i] <= 0) - cmd.quotas[idx].quota = cpu_to_le32(0); + qdata->quota = cpu_to_le32(0); #ifdef CONFIG_IWLWIFI_DEBUGFS else if (data.dbgfs_min[i]) - cmd.quotas[idx].quota = + qdata->quota = cpu_to_le32(data.dbgfs_min[i] * QUOTA_100 / 100); #endif else if (data.n_low_latency_bindings == 1 && n_non_lowlat && @@ -283,24 +290,25 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, * the minimal required quota for the low latency * binding. */ - cmd.quotas[idx].quota = cpu_to_le32(QUOTA_LOWLAT_MIN); + qdata->quota = cpu_to_le32(QUOTA_LOWLAT_MIN); else - cmd.quotas[idx].quota = + qdata->quota = cpu_to_le32(quota * data.n_interfaces[i]); - WARN_ONCE(le32_to_cpu(cmd.quotas[idx].quota) > QUOTA_100, + WARN_ONCE(le32_to_cpu(qdata->quota) > QUOTA_100, "Binding=%d, quota=%u > max=%u\n", - idx, le32_to_cpu(cmd.quotas[idx].quota), QUOTA_100); + idx, le32_to_cpu(qdata->quota), QUOTA_100); - cmd.quotas[idx].max_duration = cpu_to_le32(0); + qdata->max_duration = cpu_to_le32(0); idx++; } /* Give the remainder of the session to the first data binding */ for (i = 0; i < MAX_BINDINGS; i++) { - if (le32_to_cpu(cmd.quotas[i].quota) != 0) { - le32_add_cpu(&cmd.quotas[i].quota, quota_rem); + qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i); + if (le32_to_cpu(qdata->quota) != 0) { + le32_add_cpu(&qdata->quota, quota_rem); IWL_DEBUG_QUOTA(mvm, "quota: giving remainder of %d to binding %d\n", quota_rem, i); @@ -312,17 +320,19 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, /* check that we have non-zero quota for all valid bindings */ for (i = 0; i < MAX_BINDINGS; i++) { - if (cmd.quotas[i].id_and_color != last->quotas[i].id_and_color) + qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i); + last_data = iwl_mvm_quota_cmd_get_quota(mvm, last, i); + if (qdata->id_and_color != last_data->id_and_color) send = true; - if (cmd.quotas[i].max_duration != last->quotas[i].max_duration) + if (qdata->max_duration != last_data->max_duration) send = true; - if (abs((int)le32_to_cpu(cmd.quotas[i].quota) - - (int)le32_to_cpu(last->quotas[i].quota)) + if (abs((int)le32_to_cpu(qdata->quota) - + (int)le32_to_cpu(last_data->quota)) > IWL_MVM_QUOTA_THRESHOLD) send = true; - if (cmd.quotas[i].id_and_color == cpu_to_le32(FW_CTXT_INVALID)) + if (qdata->id_and_color == cpu_to_le32(FW_CTXT_INVALID)) continue; - WARN_ONCE(cmd.quotas[i].quota == 0, + WARN_ONCE(qdata->quota == 0, "zero quota on binding %d\n", i); } @@ -334,7 +344,8 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, return 0; } - err = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0, sizeof(cmd), &cmd); + err = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0, + iwl_mvm_quota_cmd_size(mvm), &cmd); if (err) IWL_ERR(mvm, "Failed to send quota: %d\n", err); From d7b9bb69c9b3accbd199b51b28d95d08f37049cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 31 Aug 2017 12:57:33 +0300 Subject: [PATCH 0577/2177] iwlwifi: mvm: remove support for Link Quality Measurements This was never used by any product. Remove it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 67 ---------------- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 76 ------------------- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 12 --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 - .../net/wireless/intel/iwlwifi/mvm/utils.c | 71 ----------------- 6 files changed, 238 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 39c89e85fd2f..ec42c84e5df2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -67,79 +67,12 @@ * enum iwl_mac_conf_subcmd_ids - mac configuration command IDs */ enum iwl_mac_conf_subcmd_ids { - /** - * @LINK_QUALITY_MEASUREMENT_CMD: &struct iwl_link_qual_msrmnt_cmd - */ - LINK_QUALITY_MEASUREMENT_CMD = 0x1, - - /** - * @LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF: - * &struct iwl_link_qual_msrmnt_notif - */ - LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF = 0xFE, - /** * @CHANNEL_SWITCH_NOA_NOTIF: &struct iwl_channel_switch_noa_notif */ CHANNEL_SWITCH_NOA_NOTIF = 0xFF, }; -#define LQM_NUMBER_OF_STATIONS_IN_REPORT 16 - -enum iwl_lqm_cmd_operatrions { - LQM_CMD_OPERATION_START_MEASUREMENT = 0x01, - LQM_CMD_OPERATION_STOP_MEASUREMENT = 0x02, -}; - -enum iwl_lqm_status { - LQM_STATUS_SUCCESS = 0, - LQM_STATUS_TIMEOUT = 1, - LQM_STATUS_ABORT = 2, -}; - -/** - * struct iwl_link_qual_msrmnt_cmd - Link Quality Measurement command - * @cmd_operation: command operation to be performed (start or stop) - * as defined above. - * @mac_id: MAC ID the measurement applies to. - * @measurement_time: time of the total measurement to be performed, in uSec. - * @timeout: maximum time allowed until a response is sent, in uSec. - */ -struct iwl_link_qual_msrmnt_cmd { - __le32 cmd_operation; - __le32 mac_id; - __le32 measurement_time; - __le32 timeout; -} __packed /* LQM_CMD_API_S_VER_1 */; - -/** - * struct iwl_link_qual_msrmnt_notif - Link Quality Measurement notification - * - * @frequent_stations_air_time: an array containing the total air time - * (in uSec) used by the most frequently transmitting stations. - * @number_of_stations: the number of uniqe stations included in the array - * (a number between 0 to 16) - * @total_air_time_other_stations: the total air time (uSec) used by all the - * stations which are not included in the above report. - * @time_in_measurement_window: the total time in uSec in which a measurement - * took place. - * @tx_frame_dropped: the number of TX frames dropped due to retry limit during - * measurement - * @mac_id: MAC ID the measurement applies to. - * @status: return status. may be one of the LQM_STATUS_* defined above. - * @reserved: reserved. - */ -struct iwl_link_qual_msrmnt_notif { - __le32 frequent_stations_air_time[LQM_NUMBER_OF_STATIONS_IN_REPORT]; - __le32 number_of_stations; - __le32 total_air_time_other_stations; - __le32 time_in_measurement_window; - __le32 tx_frame_dropped; - __le32 mac_id; - __le32 status; - u8 reserved[12]; -} __packed; /* LQM_MEASUREMENT_COMPLETE_NTF_API_S_VER1 */ - /** * struct iwl_channel_switch_noa_notif - Channel switch NOA notification * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 71a01df96f8b..4228fac77f41 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -1455,80 +1455,6 @@ static const char * const chanwidths[] = { [NL80211_CHAN_WIDTH_160] = "vht160", }; -static bool iwl_mvm_lqm_notif_wait(struct iwl_notif_wait_data *notif_wait, - struct iwl_rx_packet *pkt, void *data) -{ - struct ieee80211_vif *vif = data; - struct iwl_mvm *mvm = - container_of(notif_wait, struct iwl_mvm, notif_wait); - struct iwl_link_qual_msrmnt_notif *report = (void *)pkt->data; - u32 num_of_stations = le32_to_cpu(report->number_of_stations); - int i; - - IWL_INFO(mvm, "LQM report:\n"); - IWL_INFO(mvm, "\tstatus: %d\n", report->status); - IWL_INFO(mvm, "\tmacID: %d\n", le32_to_cpu(report->mac_id)); - IWL_INFO(mvm, "\ttx_frame_dropped: %d\n", - le32_to_cpu(report->tx_frame_dropped)); - IWL_INFO(mvm, "\ttime_in_measurement_window: %d us\n", - le32_to_cpu(report->time_in_measurement_window)); - IWL_INFO(mvm, "\ttotal_air_time_other_stations: %d\n", - le32_to_cpu(report->total_air_time_other_stations)); - IWL_INFO(mvm, "\tchannel_freq: %d\n", - vif->bss_conf.chandef.center_freq1); - IWL_INFO(mvm, "\tchannel_width: %s\n", - chanwidths[vif->bss_conf.chandef.width]); - IWL_INFO(mvm, "\tnumber_of_stations: %d\n", num_of_stations); - for (i = 0; i < num_of_stations; i++) - IWL_INFO(mvm, "\t\tsta[%d]: %d\n", i, - report->frequent_stations_air_time[i]); - - return true; -} - -static ssize_t iwl_dbgfs_lqm_send_cmd_write(struct ieee80211_vif *vif, - char *buf, size_t count, - loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - struct iwl_notification_wait wait_lqm_notif; - static u16 lqm_notif[] = { - WIDE_ID(MAC_CONF_GROUP, - LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF) - }; - int err; - u32 duration; - u32 timeout; - - if (sscanf(buf, "%d,%d", &duration, &timeout) != 2) - return -EINVAL; - - iwl_init_notification_wait(&mvm->notif_wait, &wait_lqm_notif, - lqm_notif, ARRAY_SIZE(lqm_notif), - iwl_mvm_lqm_notif_wait, vif); - mutex_lock(&mvm->mutex); - err = iwl_mvm_send_lqm_cmd(vif, LQM_CMD_OPERATION_START_MEASUREMENT, - duration, timeout); - mutex_unlock(&mvm->mutex); - - if (err) { - IWL_ERR(mvm, "Failed to send lqm cmdf(err=%d)\n", err); - iwl_remove_notification(&mvm->notif_wait, &wait_lqm_notif); - return err; - } - - /* wait for 2 * timeout (safety guard) and convert to jiffies*/ - timeout = msecs_to_jiffies((timeout * 2) / 1000); - - err = iwl_wait_notification(&mvm->notif_wait, &wait_lqm_notif, - timeout); - if (err) - IWL_ERR(mvm, "Getting lqm notif timed out\n"); - - return count; -} - #define MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz) \ _MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif) #define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \ @@ -1553,7 +1479,6 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_abort, 32); MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); -MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64); MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff); @@ -1594,7 +1519,6 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) S_IRUSR | S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, S_IRUSR | S_IWUSR); - MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff, mvmvif->dbgfs_dir, S_IRUSR); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 15f2d826bb4b..8b4584541272 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1878,11 +1878,6 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_ASSOC && bss_conf->assoc) iwl_mvm_mac_ctxt_recalc_tsf_id(mvm, vif); - if (changes & BSS_CHANGED_ASSOC && !bss_conf->assoc && - mvmvif->lqm_active) - iwl_mvm_send_lqm_cmd(vif, LQM_CMD_OPERATION_STOP_MEASUREMENT, - 0, 0); - /* * If we're not associated yet, take the (new) BSSID before associating * so the firmware knows. If we're already associated, then use the old @@ -3879,11 +3874,6 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw, break; case NL80211_IFTYPE_STATION: - if (mvmvif->lqm_active) - iwl_mvm_send_lqm_cmd(vif, - LQM_CMD_OPERATION_STOP_MEASUREMENT, - 0, 0); - /* Schedule the time event to a bit before beacon 1, * to make sure we're in the new channel when the * GO/AP arrives. In case count <= 1 immediately schedule the diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 48cb08eea700..ec2cf248990b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -436,12 +436,6 @@ struct iwl_mvm_vif { /* TCP Checksum Offload */ netdev_features_t features; - - /* - * link quality measurement - used to check whether this interface - * is in the middle of a link quality measurement - */ - bool lqm_active; }; static inline struct iwl_mvm_vif * @@ -1846,12 +1840,6 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, void iwl_mvm_connection_loss(struct iwl_mvm *mvm, struct ieee80211_vif *vif, const char *errmsg); -/* Link Quality Measurement */ -int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif, - enum iwl_lqm_cmd_operatrions operation, - u32 duration, u32 timeout); -bool iwl_mvm_lqm_active(struct iwl_mvm *mvm); - int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b); int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 231878969332..d855920f5456 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -423,8 +423,6 @@ static const struct iwl_hcmd_names iwl_mvm_system_names[] = { * Access is done through binary search */ static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = { - HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD), - HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF), HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF), }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 2ea74abad73d..328035640669 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1389,74 +1389,3 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) iwl_mvm_power_update_device(mvm); } } - -int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif, - enum iwl_lqm_cmd_operatrions operation, - u32 duration, u32 timeout) -{ - struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_link_qual_msrmnt_cmd cmd = { - .cmd_operation = cpu_to_le32(operation), - .mac_id = cpu_to_le32(mvm_vif->id), - .measurement_time = cpu_to_le32(duration), - .timeout = cpu_to_le32(timeout), - }; - u32 cmdid = - iwl_cmd_id(LINK_QUALITY_MEASUREMENT_CMD, MAC_CONF_GROUP, 0); - int ret; - - if (!fw_has_capa(&mvm_vif->mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_LQM_SUPPORT)) - return -EOPNOTSUPP; - - if (vif->type != NL80211_IFTYPE_STATION || vif->p2p) - return -EINVAL; - - switch (operation) { - case LQM_CMD_OPERATION_START_MEASUREMENT: - if (iwl_mvm_lqm_active(mvm_vif->mvm)) - return -EBUSY; - if (!vif->bss_conf.assoc) - return -EINVAL; - mvm_vif->lqm_active = true; - break; - case LQM_CMD_OPERATION_STOP_MEASUREMENT: - if (!iwl_mvm_lqm_active(mvm_vif->mvm)) - return -EINVAL; - break; - default: - return -EINVAL; - } - - ret = iwl_mvm_send_cmd_pdu(mvm_vif->mvm, cmdid, 0, sizeof(cmd), - &cmd); - - /* command failed - roll back lqm_active state */ - if (ret) { - mvm_vif->lqm_active = - operation == LQM_CMD_OPERATION_STOP_MEASUREMENT; - } - - return ret; -} - -static void iwl_mvm_lqm_active_iterator(void *_data, u8 *mac, - struct ieee80211_vif *vif) -{ - struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif); - bool *lqm_active = _data; - - *lqm_active = *lqm_active || mvm_vif->lqm_active; -} - -bool iwl_mvm_lqm_active(struct iwl_mvm *mvm) -{ - bool ret = false; - - lockdep_assert_held(&mvm->mutex); - ieee80211_iterate_active_interfaces_atomic( - mvm->hw, IEEE80211_IFACE_ITER_NORMAL, - iwl_mvm_lqm_active_iterator, &ret); - - return ret; -} From 528a542aa6654a9ec03830cc75142cc94276739e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 31 Aug 2017 11:52:30 +0300 Subject: [PATCH 0578/2177] iwlwifi: mvm: support firmware debug trigger on frame reorder timeout The trigger that collects data when a frame is released because of the timer of the reordering buffer was not implemented for 9000 devices. Fix this. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 28 ++----------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++++ drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 ++++ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 1 + .../net/wireless/intel/iwlwifi/mvm/utils.c | 25 +++++++++++++++++ 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 8b4584541272..d7530474c1d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4186,31 +4186,6 @@ static void iwl_mvm_event_bar_rx_callback(struct iwl_mvm *mvm, event->u.ba.ssn); } -static void -iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - const struct ieee80211_event *event) -{ - struct iwl_fw_dbg_trigger_tlv *trig; - struct iwl_fw_dbg_trigger_ba *ba_trig; - - if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_BA)) - return; - - trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA); - ba_trig = (void *)trig->data; - if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, - ieee80211_vif_to_wdev(vif), trig)) - return; - - if (!(le16_to_cpu(ba_trig->frame_timeout) & BIT(event->u.ba.tid))) - return; - - iwl_fw_dbg_collect_trig(&mvm->fwrt, trig, - "Frame from %pM timed out, tid %d", - event->u.ba.sta->addr, event->u.ba.tid); -} - static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const struct ieee80211_event *event) @@ -4225,7 +4200,8 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw, iwl_mvm_event_bar_rx_callback(mvm, vif, event); break; case BA_FRAME_TIMEOUT: - iwl_mvm_event_frame_timeout_callback(mvm, vif, event); + iwl_mvm_event_frame_timeout_callback(mvm, vif, event->u.ba.sta, + event->u.ba.tid); break; default: break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ec2cf248990b..e8be5104b909 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -586,6 +586,7 @@ enum iwl_mvm_tdls_cs_state { * @queue: queue of this reorder buffer * @last_amsdu: track last ASMDU SN for duplication detection * @last_sub_index: track ASMDU sub frame index for duplication detection + * @tid: the tid * @entries: list of skbs stored * @reorder_time: time the packet was stored in the reorder buffer * @reorder_timer: timer for frames are in the reorder buffer. For AMSDU @@ -603,6 +604,7 @@ struct iwl_mvm_reorder_buffer { int queue; u16 last_amsdu; u8 last_sub_index; + u8 tid; struct sk_buff_head entries[IEEE80211_MAX_AMPDU_BUF]; unsigned long reorder_time[IEEE80211_MAX_AMPDU_BUF]; struct timer_list reorder_timer; @@ -1839,6 +1841,10 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, bool tdls, bool cmd_q); void iwl_mvm_connection_loss(struct iwl_mvm *mvm, struct ieee80211_vif *vif, const char *errmsg); +void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + const struct ieee80211_sta *sta, + u16 tid); int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b); int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 67ffd9774712..a0b406e68d55 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -492,13 +492,18 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) if (expired) { struct ieee80211_sta *sta; + struct iwl_mvm_sta *mvmsta; rcu_read_lock(); sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[buf->sta_id]); + mvmsta = iwl_mvm_sta_from_mac80211(sta); + /* SN is set to the last expired frame + 1 */ IWL_DEBUG_HT(buf->mvm, "Releasing expired frames for sta %u, sn %d\n", buf->sta_id, sn); + iwl_mvm_event_frame_timeout_callback(buf->mvm, mvmsta->vif, + sta, buf->tid); iwl_mvm_release_frames(buf->mvm, sta, NULL, buf, sn); rcu_read_unlock(); } else { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 411a2055dc45..3711f226220c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2158,6 +2158,7 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, reorder_buf->mvm = mvm; reorder_buf->queue = i; reorder_buf->sta_id = sta_id; + reorder_buf->tid = data->tid; reorder_buf->valid = false; for (j = 0; j < reorder_buf->buf_size; j++) __skb_queue_head_init(&reorder_buf->entries[j]); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 328035640669..2da1b088ac01 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1368,6 +1368,31 @@ void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) rcu_read_unlock(); } +void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + const struct ieee80211_sta *sta, + u16 tid) +{ + struct iwl_fw_dbg_trigger_tlv *trig; + struct iwl_fw_dbg_trigger_ba *ba_trig; + + if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_BA)) + return; + + trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA); + ba_trig = (void *)trig->data; + if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, + ieee80211_vif_to_wdev(vif), trig)) + return; + + if (!(le16_to_cpu(ba_trig->frame_timeout) & BIT(tid))) + return; + + iwl_fw_dbg_collect_trig(&mvm->fwrt, trig, + "Frame from %pM timed out, tid %d", + sta->addr, tid); +} + void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) { bool ps_disabled; From b88beaf95a6253aa8a5a6424db26ecf8a1e0ea0a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 4 Sep 2017 14:39:22 +0300 Subject: [PATCH 0579/2177] iwlwifi: Add few debug prints to the WRT dump flow This would enable to better catch timing issues with cases that WRT dump takes too much time. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 6afc7a799892..f7eab03937f2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -93,6 +93,8 @@ static void iwl_read_radio_regs(struct iwl_fw_runtime *fwrt, unsigned long flags; int i; + IWL_DEBUG_INFO(fwrt, "WRT radio registers dump\n"); + if (!iwl_trans_grab_nic_access(fwrt->trans, &flags)) return; @@ -233,6 +235,8 @@ static void iwl_fw_dump_fifos(struct iwl_fw_runtime *fwrt, unsigned long flags; int i, j; + IWL_DEBUG_INFO(fwrt, "WRT FIFO dump\n"); + if (!iwl_trans_grab_nic_access(fwrt->trans, &flags)) return; @@ -476,6 +480,8 @@ static void iwl_dump_prph(struct iwl_trans *trans, unsigned long flags; u32 i; + IWL_DEBUG_INFO(trans, "WRT PRPH dump\n"); + if (!iwl_trans_grab_nic_access(trans, &flags)) return; @@ -559,6 +565,8 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) bool monitor_dump_only = false; int i; + IWL_DEBUG_INFO(fwrt, "WRT dump start\n"); + /* there's no point in fw dump if the bus is dead */ if (test_bit(STATUS_TRANS_DEAD, &fwrt->trans->status)) { IWL_ERR(fwrt, "Skip fw error dump since bus is dead\n"); @@ -816,6 +824,9 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) dump_mem->type = fw_dbg_mem[i].data_type; dump_mem->offset = cpu_to_le32(ofs); + IWL_DEBUG_INFO(fwrt, "WRT memory dump. Type=%u\n", + dump_mem->type); + switch (dump_mem->type & cpu_to_le32(FW_DBG_MEM_TYPE_MASK)) { case cpu_to_le32(FW_DBG_MEM_TYPE_REGULAR): iwl_trans_read_mem_bytes(fwrt->trans, ofs, @@ -841,6 +852,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) } if (smem_len) { + IWL_DEBUG_INFO(fwrt, "WRT SMEM dump\n"); dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM); dump_data->len = cpu_to_le32(smem_len + sizeof(*dump_mem)); dump_mem = (void *)dump_data->data; @@ -853,6 +865,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) } if (sram2_len) { + IWL_DEBUG_INFO(fwrt, "WRT SRAM dump\n"); dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM); dump_data->len = cpu_to_le32(sram2_len + sizeof(*dump_mem)); dump_mem = (void *)dump_data->data; @@ -868,6 +881,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) if (!fwrt->trans->cfg->gen2 && fwrt->fw->img[fwrt->cur_fw_img].paging_mem_size && fwrt->fw_paging_db[0].fw_paging_block) { + IWL_DEBUG_INFO(fwrt, "WRT paging dump\n"); for (i = 1; i < fwrt->num_of_paging_blk + 1; i++) { struct iwl_fw_error_dump_paging *paging; struct page *pages = @@ -930,6 +944,7 @@ out: iwl_fw_free_dump_desc(fwrt); fwrt->dump.trig = NULL; clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status); + IWL_DEBUG_INFO(fwrt, "WRT dump done\n"); } IWL_EXPORT_SYMBOL(iwl_fw_error_dump); From dd05f9aab4426ff178b12d601e50d19d336eba30 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 30 Jul 2017 17:33:48 +0300 Subject: [PATCH 0580/2177] iwlwifi: pcie: dynamic Tx command queue size Devices in the A000 family can use a different size for the command queue. To allow this, make the command queue size configurable and set the size for A000 devices to 32. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/a000.c | 3 ++- .../net/wireless/intel/iwlwifi/iwl-config.h | 3 +++ .../wireless/intel/iwlwifi/pcie/ctxt-info.c | 2 +- .../wireless/intel/iwlwifi/pcie/internal.h | 3 +++ .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 8 +++++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 23 +++++++++++++++++-- 6 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c index 76ba1f8bc72f..ed8bccd228f8 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c @@ -134,7 +134,8 @@ static const struct iwl_ht_params iwl_a000_ht_params = { .rf_id = true, \ .gen2 = true, \ .ext_nvm = true, \ - .dbgc_supported = true + .dbgc_supported = true, \ + .tx_cmd_queue_size = 32 const struct iwl_cfg iwla000_2ac_cfg_hr = { .name = "Intel(R) Dual Band Wireless AC a000", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 3e057b539d5b..b9f3b350fe34 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -321,6 +321,8 @@ struct iwl_pwr_tx_backoff { * @gen2: a000 and on transport operation * @cdb: CDB support * @ext_nvm: extended NVM format + * @tx_cmd_queue_size: size of the cmd queue. If zero, use the same value as + * the regular queues * * We enable the driver to be backward compatible wrt. hardware features. * API differences in uCode shouldn't be handled here but through TLVs @@ -371,6 +373,7 @@ struct iwl_cfg { cdb:1, ext_nvm:1, dbgc_supported:1; + u16 tx_cmd_queue_size; u8 valid_tx_ant; u8 valid_rx_ant; u8 non_shared_ant; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index 3fc4343581ee..5ef216f3a60b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -244,7 +244,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, ctxt_info->hcmd_cfg.cmd_queue_addr = cpu_to_le64(trans_pcie->txq[trans_pcie->cmd_queue]->dma_addr); ctxt_info->hcmd_cfg.cmd_queue_size = - TFD_QUEUE_CB_SIZE(TFD_CMD_SLOTS); + TFD_QUEUE_CB_SIZE(trans_pcie->tx_cmd_queue_size); /* allocate ucode sections in dram and set addresses */ ret = iwl_pcie_ctxt_info_init_fw_sec(trans, fw, ctxt_info); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 4fb7647995c3..9caff1ec29e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -383,6 +383,7 @@ struct iwl_self_init_dram { * @hw_init_mask: initial unmasked hw causes * @fh_mask: current unmasked fh causes * @hw_mask: current unmasked hw causes + * @tx_cmd_queue_size: the size of the tx command queue */ struct iwl_trans_pcie { struct iwl_rxq *rxq; @@ -463,6 +464,7 @@ struct iwl_trans_pcie { u32 fh_mask; u32 hw_mask; cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES]; + u16 tx_cmd_queue_size; }; static inline struct iwl_trans_pcie * @@ -534,6 +536,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); +void iwl_pcie_set_tx_cmd_queue_size(struct iwl_trans *trans); static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd, u8 idx) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index d74613fcb756..79e4c73a9709 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1160,6 +1160,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans) struct iwl_txq *cmd_queue; int txq_id = trans_pcie->cmd_queue, ret; + iwl_pcie_set_tx_cmd_queue_size(trans); + /* alloc and init the command queue */ if (!trans_pcie->txq[txq_id]) { cmd_queue = kzalloc(sizeof(*cmd_queue), GFP_KERNEL); @@ -1168,7 +1170,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans) return -ENOMEM; } trans_pcie->txq[txq_id] = cmd_queue; - ret = iwl_pcie_txq_alloc(trans, cmd_queue, TFD_CMD_SLOTS, true); + ret = iwl_pcie_txq_alloc(trans, cmd_queue, + trans_pcie->tx_cmd_queue_size, true); if (ret) { IWL_ERR(trans, "Tx %d queue init failed\n", txq_id); goto error; @@ -1177,7 +1180,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans) cmd_queue = trans_pcie->txq[txq_id]; } - ret = iwl_pcie_txq_init(trans, cmd_queue, TFD_CMD_SLOTS, true); + ret = iwl_pcie_txq_init(trans, cmd_queue, + trans_pcie->tx_cmd_queue_size, true); if (ret) { IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id); goto error; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index c645d10d3707..e93c471ef9bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -951,7 +951,8 @@ static int iwl_pcie_tx_alloc(struct iwl_trans *trans) txq_id++) { bool cmd_queue = (txq_id == trans_pcie->cmd_queue); - slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS; + slots_num = cmd_queue ? trans_pcie->tx_cmd_queue_size : + TFD_TX_CMD_SLOTS; trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id]; ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id], slots_num, cmd_queue); @@ -970,6 +971,21 @@ error: return ret; } +void iwl_pcie_set_tx_cmd_queue_size(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int queue_size = TFD_CMD_SLOTS; + + if (trans->cfg->tx_cmd_queue_size) + queue_size = trans->cfg->tx_cmd_queue_size; + + if (WARN_ON(!(is_power_of_2(queue_size) && + TFD_QUEUE_CB_SIZE(queue_size) > 0))) + trans_pcie->tx_cmd_queue_size = TFD_CMD_SLOTS; + else + trans_pcie->tx_cmd_queue_size = queue_size; +} + int iwl_pcie_tx_init(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -977,6 +993,8 @@ int iwl_pcie_tx_init(struct iwl_trans *trans) int txq_id, slots_num; bool alloc = false; + iwl_pcie_set_tx_cmd_queue_size(trans); + if (!trans_pcie->txq_memory) { ret = iwl_pcie_tx_alloc(trans); if (ret) @@ -1000,7 +1018,8 @@ int iwl_pcie_tx_init(struct iwl_trans *trans) txq_id++) { bool cmd_queue = (txq_id == trans_pcie->cmd_queue); - slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS; + slots_num = cmd_queue ? trans_pcie->tx_cmd_queue_size : + TFD_TX_CMD_SLOTS; ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id], slots_num, cmd_queue); if (ret) { From a2ff48af65ad661a257987fa05973351c595002e Mon Sep 17 00:00:00 2001 From: Chaya Rachel Ivgi Date: Wed, 13 Sep 2017 15:12:16 +0300 Subject: [PATCH 0581/2177] iwlwifi: remove redundant reading from NVM file The driver reads xtal_calib from NVM file, but actually never uses it. This is only used in dvm driver. Signed-off-by: Chaya Rachel Ivgi Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 3014beef4873..4574a126c1ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -89,10 +89,6 @@ enum wkp_nvm_offsets { SKU = 2, N_HW_ADDRS = 3, NVM_CHANNELS = 0x1E0 - NVM_SW_SECTION, - - /* NVM calibration section offset (in words) definitions */ - NVM_CALIB_SECTION = 0x2B8, - XTAL_CALIB = 0x316 - NVM_CALIB_SECTION }; enum ext_nvm_offsets { @@ -748,9 +744,6 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, kfree(data); return NULL; } - /* in family 8000 Xtal calibration values moved to OTP */ - data->xtal_calib[0] = *(nvm_calib + XTAL_CALIB); - data->xtal_calib[1] = *(nvm_calib + XTAL_CALIB + 1); lar_enabled = true; ch_section = &nvm_sw[NVM_CHANNELS]; } else { From 7a20bcceeddd7162801bb37d992c5a2bfd5ca3ce Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 14 Sep 2017 15:45:44 +0300 Subject: [PATCH 0582/2177] iwlwifi: mvm: don't send identical PHY_CTXT_CMD When we have an AP which supports HT and a single HT station is connected, we change the min_width from NL80211_CHAN_WIDTH_20_NOHT to NL80211_CHAN_WIDTH_20. This of course has no implication on the channel width but still sends a command to the firmware. Remember the last width that was sent and refrain from sending unnecessary commands to the firmware. Sending a PHY_CTXT_CMD to the firmware has a cost since it recalculates the presence on the medium and because of that it closes the transmit queues for a short while. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 14 ++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 1 + 3 files changed, 17 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d7530474c1d3..d6180463e92f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3406,10 +3406,24 @@ static void iwl_mvm_change_chanctx(struct ieee80211_hw *hw, return; mutex_lock(&mvm->mutex); + + /* we are only changing the min_width, may be a noop */ + if (changed == IEEE80211_CHANCTX_CHANGE_MIN_WIDTH) { + if (phy_ctxt->width == ctx->min_def.width) + goto out_unlock; + + /* we are just toggling between 20_NOHT and 20 */ + if (phy_ctxt->width <= NL80211_CHAN_WIDTH_20 && + ctx->min_def.width <= NL80211_CHAN_WIDTH_20) + goto out_unlock; + } + iwl_mvm_bt_coex_vif_change(mvm); iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &ctx->min_def, ctx->rx_chains_static, ctx->rx_chains_dynamic); + +out_unlock: mutex_unlock(&mvm->mutex); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index e8be5104b909..2867683f1aa0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -147,6 +147,8 @@ struct iwl_mvm_phy_ctxt { u16 color; u32 ref; + enum nl80211_chan_width width; + /* * TODO: This should probably be removed. Currently here only for rate * scaling algorithm diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c index 7ee8e9077baf..305cd56bf746 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c @@ -272,6 +272,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, } ctxt->channel = chandef->chan; + ctxt->width = chandef->width; return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, chains_static, chains_dynamic, action, 0); From aed52a88c09d486bb405373e6ad35b5f249093fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2017 12:59:03 +0200 Subject: [PATCH 0583/2177] iwlwifi: fw: api: remove excess enum value documentation These enum values don't exist, so remove their documentation as well. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/sta.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h index af369eba3795..dc40cbd52f92 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h @@ -68,9 +68,6 @@ * @STA_FLG_REDUCED_TX_PWR_DATA: reduced TX power (data frames) * @STA_FLG_DISABLE_TX: set if TX should be disabled * @STA_FLG_PS: set if STA is in Power Save - * @STA_FLG_INVALID: set if STA is invalid - * @STA_FLG_DLP_EN: Direct Link Protocol is enabled - * @STA_FLG_SET_ALL_KEYS: the current key applies to all key IDs * @STA_FLG_DRAIN_FLOW: drain flow * @STA_FLG_PAN: STA is for PAN interface * @STA_FLG_CLASS_AUTH: station is authenticated @@ -100,7 +97,6 @@ * @STA_FLG_MIMO_EN_SISO: no support for MIMO * @STA_FLG_MIMO_EN_MIMO2: 2 streams supported * @STA_FLG_MIMO_EN_MIMO3: 3 streams supported - * @STA_FLG_MFP_EN: Management Frame Protection * @STA_FLG_AGG_MPDU_DENS_MSK: A-MPDU density (mask) * @STA_FLG_AGG_MPDU_DENS_SHIFT: A-MPDU density (bit shift) * @STA_FLG_AGG_MPDU_DENS_2US: A-MPDU density (2 usec gap) From 5281f5181a3ba23ff9ee0fd8e9cbd7479d58baef Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Tue, 19 Sep 2017 15:52:13 +0000 Subject: [PATCH 0584/2177] iwlwifi: mvm: add marker cmd response struct. This helps for documentation and clarifies the code by defining the exact response struct for the marker command. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/commands.h | 1 + drivers/net/wireless/intel/iwlwifi/fw/api/debug.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 074868394427..7ebbf097488b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -504,6 +504,7 @@ enum iwl_legacy_cmds { /** * @MARKER_CMD: trace marker command, uses &struct iwl_mvm_marker + * with &struct iwl_mvm_marker_rsp */ MARKER_CMD = 0xcb, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h index 9f88b61536bc..0a81fb1b6ed4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h @@ -278,6 +278,15 @@ struct iwl_mvm_marker { __le32 metadata[0]; } __packed; /* MARKER_API_S_VER_1 */ +/** + * struct iwl_mvm_marker_rsp - Response to marker cmd + * + * @gp2: The gp2 clock value in the FW + */ +struct iwl_mvm_marker_rsp { + __le32 gp2; +} __packed; + /* Operation types for the debug mem access */ enum { DEBUG_MEM_OP_READ = 0, From 1e37f79945b0b837a2d8c56560e18c7be61ea665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Sat, 23 Sep 2017 12:31:12 +0200 Subject: [PATCH 0585/2177] iwlwifi: fix minor code style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes three trivial issues as reported by checkpatch.pl, namely two switch/case indentation issues and one alignment issue in a multiline comment. Signed-off-by: Christoph Böhmwalder Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 99676d6c4713..ccdb247d68c5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -832,7 +832,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, capa->standard_phy_calibration_size = le32_to_cpup((__le32 *)tlv_data); break; - case IWL_UCODE_TLV_SEC_RT: + case IWL_UCODE_TLV_SEC_RT: iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR, tlv_len); drv->fw.type = IWL_FW_MVM; @@ -864,7 +864,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, FW_PHY_CFG_RX_CHAIN) >> FW_PHY_CFG_RX_CHAIN_POS; break; - case IWL_UCODE_TLV_SECURE_SEC_RT: + case IWL_UCODE_TLV_SECURE_SEC_RT: iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR, tlv_len); drv->fw.type = IWL_FW_MVM; @@ -1335,7 +1335,8 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) /* Runtime instructions and 2 copies of data: * 1) unmodified from disk - * 2) backup cache for save/restore during power-downs */ + * 2) backup cache for save/restore during power-downs + */ for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) if (iwl_alloc_ucode(drv, pieces, i)) goto out_free_fw; From a6d24fad00d98e28841b1f39965fda530df637df Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Thu, 17 Aug 2017 12:05:12 -0700 Subject: [PATCH 0586/2177] iwlwifi: pcie: dump registers when HW becomes inaccessible We conclude the HW became inaccessible when we timeout waiting for a bit to be set in a memory mapped register (CSR_GP_CNTRL). This conclusion may not be true because the bit may not get set due to: - a firmware issue - a driver issue - a PCI bus issue - a platform issue There are a lot of such reports with really no good debug information beyond this message to help us. Add some debug information and attempt to dump the different register spaces at such a failure: * Dump some configuration space of device - this will tell us if something very basic is broken in the PCIe bus (so that configuration accesses are failing). If this works, the PCIe bus seems OK. If this does not work, it is definitely an PCIe issue. * Dump some memory mapped registers - if we're reading some sane'ish values, this will tell us that the PCIe bus is OK, but may be a firmware / driver issue. If this does not work, it may be a PCI configuration issue or a driver/firmware issue. * Dump parent and device's AER registers, will give us some straws to chew on. This is the sample output: [ 13.082651] ------------[ cut here ]------------ [ 13.086791] iwlwifi 0000:01:00.0: iwlwifi transaction failed, dumping registers [ 13.086793] iwlwifi 0000:01:00.0: iwlwifi device config registers: [ 13.086893] iwlwifi 0000:01:00.0: 00000000: 095a8086 00100406 02800059 00000000 00000004 00000000 00000000 00000000 [ 13.086895] iwlwifi 0000:01:00.0: 00000020: 00000000 00000000 00000000 50108086 00000000 000000c8 00000000 00000100 [ 13.086901] iwlwifi 0000:01:00.0: iwlwifi device memory mapped registers: [ 13.086989] iwlwifi 0000:01:00.0: 00000000: ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff [ 13.086991] iwlwifi 0000:01:00.0: 00000020: ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff [ 13.086999] iwlwifi 0000:01:00.0: iwlwifi device AER capability structure: [ 13.087033] iwlwifi 0000:01:00.0: 00000000: 14010001 00100000 00000000 00462031 00002000 00002000 00000014 40000001 [ 13.087034] iwlwifi 0000:01:00.0: 00000020: 0000000f d140000c 00000000 [ 13.087036] iwlwifi 0000:01:00.0: iwlwifi parent port (0000:00:1c.0) config registers: [ 13.087074] iwlwifi 0000:00:1c.0: 00000000: 9d108086 00100506 060400f1 00810010 00000000 00000000 00010100 200000f0 [ 13.087075] iwlwifi 0000:00:1c.0: 00000020: d140d140 0001fff1 00000000 00000000 00000000 00000040 00000000 0006010b [ 13.087087] ------------[ cut here ]------------ [ 13.087095] WARNING: CPU: 0 PID: 1759 at drivers/net/wireless/iwl7000/iwlwifi/pcie/trans.c:2082 iwl_trans_pcie_reclaim+0x1ee4/0x2b9a [iwlwifi]() [ 13.087096] Timeout waiting for hardware access (CSR_GP_CNTRL 0xffffffff) Signed-off-by: Rajat Jain Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 1 + .../net/wireless/intel/iwlwifi/pcie/trans.c | 89 +++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 9caff1ec29e1..d749abeca3ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -443,6 +443,7 @@ struct iwl_trans_pcie { bool bc_table_dword; bool scd_set_active; bool sw_csum_tx; + bool pcie_dbg_dumped_once; u32 rx_page_order; /*protect hw register */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2e3e013ec95a..0008ea323be3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -88,6 +88,93 @@ #define IWL_FW_MEM_EXTENDED_START 0x40000 #define IWL_FW_MEM_EXTENDED_END 0x57FFF +static void iwl_trans_pcie_err_dump(struct iwl_trans *trans) +{ +#define PCI_DUMP_SIZE 64 +#define PREFIX_LEN 32 + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct pci_dev *pdev = trans_pcie->pci_dev; + u32 i, pos, alloc_size, *ptr, *buf; + char *prefix; + + if (trans_pcie->pcie_dbg_dumped_once) + return; + + /* Should be a multiple of 4 */ + BUILD_BUG_ON(PCI_DUMP_SIZE > 4096 || PCI_DUMP_SIZE & 0x3); + /* Alloc a max size buffer */ + if (PCI_ERR_ROOT_ERR_SRC + 4 > PCI_DUMP_SIZE) + alloc_size = PCI_ERR_ROOT_ERR_SRC + 4 + PREFIX_LEN; + else + alloc_size = PCI_DUMP_SIZE + PREFIX_LEN; + buf = kmalloc(alloc_size, GFP_ATOMIC); + if (!buf) + return; + prefix = (char *)buf + alloc_size - PREFIX_LEN; + + IWL_ERR(trans, "iwlwifi transaction failed, dumping registers\n"); + + /* Print wifi device registers */ + sprintf(prefix, "iwlwifi %s: ", pci_name(pdev)); + IWL_ERR(trans, "iwlwifi device config registers:\n"); + for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++) + if (pci_read_config_dword(pdev, i, ptr)) + goto err_read; + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); + + IWL_ERR(trans, "iwlwifi device memory mapped registers:\n"); + for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++) + *ptr = iwl_read32(trans, i); + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + if (pos) { + IWL_ERR(trans, "iwlwifi device AER capability structure:\n"); + for (i = 0, ptr = buf; i < PCI_ERR_ROOT_COMMAND; i += 4, ptr++) + if (pci_read_config_dword(pdev, pos + i, ptr)) + goto err_read; + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, + 32, 4, buf, i, 0); + } + + /* Print parent device registers next */ + if (!pdev->bus->self) + goto out; + + pdev = pdev->bus->self; + sprintf(prefix, "iwlwifi %s: ", pci_name(pdev)); + + IWL_ERR(trans, "iwlwifi parent port (%s) config registers:\n", + pci_name(pdev)); + for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++) + if (pci_read_config_dword(pdev, i, ptr)) + goto err_read; + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); + + /* Print root port AER registers */ + pos = 0; + pdev = pcie_find_root_port(pdev); + if (pdev) + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + if (pos) { + IWL_ERR(trans, "iwlwifi root port (%s) AER cap structure:\n", + pci_name(pdev)); + sprintf(prefix, "iwlwifi %s: ", pci_name(pdev)); + for (i = 0, ptr = buf; i <= PCI_ERR_ROOT_ERR_SRC; i += 4, ptr++) + if (pci_read_config_dword(pdev, pos + i, ptr)) + goto err_read; + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, + 4, buf, i, 0); + } + +err_read: + print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0); + IWL_ERR(trans, "Read failed at 0x%X\n", i); +out: + trans_pcie->pcie_dbg_dumped_once = 1; + kfree(buf); +} + static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -649,6 +736,7 @@ static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, trans_pcie->ucode_write_complete, 5 * HZ); if (!ret) { IWL_ERR(trans, "Failed to load firmware chunk!\n"); + iwl_trans_pcie_err_dump(trans); return -ETIMEDOUT; } @@ -1868,6 +1956,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY | CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000); if (unlikely(ret < 0)) { + iwl_trans_pcie_err_dump(trans); iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI); WARN_ONCE(1, "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", From 417795a3f4d634bb50dbffaf2ab9d7f46750b7e8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 28 Sep 2017 11:11:51 +0300 Subject: [PATCH 0587/2177] iwlwifi: mvm: change warning to warn_once() In case there is a FW bug where the BAID value in the metadata is not properly initialized we hit the warning for every RX packet. Change it to warn once and add elaborate message. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a0b406e68d55..836c6cf4b369 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -624,7 +624,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, return false; /* no sta yet */ - if (WARN_ON(IS_ERR_OR_NULL(sta))) + if (WARN_ONCE(IS_ERR_OR_NULL(sta), + "Got valid BAID without a valid station assigned\n")) return false; mvm_sta = iwl_mvm_sta_from_mac80211(sta); From 813df5cef3bb119940998f2e70cb9016e4b434f7 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 19 Sep 2017 12:35:18 +0300 Subject: [PATCH 0588/2177] iwlwifi: acpi: add common code to read from ACPI There are many places where the same process of invoking a method from ACPI is used, causing a lot of duplicate code. To improve this, introduce a new function to get an ACPI object by invoking an ACPI method that can be reused. Additionally, since this function needs to be called when we only have the trans, the opmode or the device, introduce a new debug macro that gets the device as a parameter so it can be used in the new function. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/Makefile | 1 + drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 97 ++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 78 +++++++++++++ .../net/wireless/intel/iwlwifi/iwl-debug.h | 1 + .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 37 ++---- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 105 ++++-------------- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 33 ++---- 7 files changed, 211 insertions(+), 141 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/acpi.c create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/acpi.h diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile index 35a32a3ec882..91b90e77d6d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/Makefile @@ -13,6 +13,7 @@ iwlwifi-objs += iwl-trans.o iwlwifi-objs += fw/notif-wait.o iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o fw/dbg.o iwlwifi-$(CONFIG_IWLMVM) += fw/common_rx.o fw/nvm.o +iwlwifi-$(CONFIG_ACPI) += fw/acpi.o iwlwifi-objs += $(iwlwifi-m) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c new file mode 100644 index 000000000000..a7b2a48618cd --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2017 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * Contact Information: + * Intel Linux Wireless + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + * BSD LICENSE + * + * Copyright(c) 2017 Intel Deutschland GmbH + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ + +#include "iwl-drv.h" +#include "iwl-debug.h" +#include "acpi.h" + +void *iwl_acpi_get_object(struct device *dev, acpi_string method) +{ + acpi_handle root_handle; + acpi_handle handle; + struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + + root_handle = ACPI_HANDLE(dev); + if (!root_handle) { + IWL_DEBUG_DEV_RADIO(dev, + "Could not retrieve root port ACPI handle\n"); + return ERR_PTR(-ENOENT); + } + + /* Get the method's handle */ + status = acpi_get_handle(root_handle, method, &handle); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_DEV_RADIO(dev, "%s method not found\n", method); + return ERR_PTR(-ENOENT); + } + + /* Call the method with no arguments */ + status = acpi_evaluate_object(handle, NULL, NULL, &buf); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_DEV_RADIO(dev, "%s invocation failed (0x%x)\n", + method, status); + return ERR_PTR(-ENOENT); + } + + return buf.pointer; +} +IWL_EXPORT_SYMBOL(iwl_acpi_get_object); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h new file mode 100644 index 000000000000..e7612a1a8e1a --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -0,0 +1,78 @@ +/****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2017 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * Contact Information: + * Intel Linux Wireless + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + * BSD LICENSE + * + * Copyright(c) 2017 Intel Deutschland GmbH + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef __iwl_fw_acpi__ +#define __iwl_fw_acpi__ + +#include + +#ifdef CONFIG_ACPI + +void *iwl_acpi_get_object(struct device *dev, acpi_string method); + +#else /* CONFIG_ACPI */ + +static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) +{ + return ERR_PTR(-ENOENT); +} + +#endif /* CONFIG_ACPI */ +#endif /* __iwl_fw_acpi__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h index cd77c6971753..c023fcf5d452 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h @@ -216,6 +216,7 @@ do { \ #define IWL_DEBUG_TX_REPLY(p, f, a...) IWL_DEBUG(p, IWL_DL_TX_REPLY, f, ## a) #define IWL_DEBUG_TX_QUEUES(p, f, a...) IWL_DEBUG(p, IWL_DL_TX_QUEUES, f, ## a) #define IWL_DEBUG_RADIO(p, f, a...) IWL_DEBUG(p, IWL_DL_RADIO, f, ## a) +#define IWL_DEBUG_DEV_RADIO(p, f, a...) IWL_DEBUG_DEV(p, IWL_DL_RADIO, f, ## a) #define IWL_DEBUG_POWER(p, f, a...) IWL_DEBUG(p, IWL_DL_POWER, f, ## a) #define IWL_DEBUG_11H(p, f, a...) IWL_DEBUG(p, IWL_DL_11H, f, ## a) #define IWL_DEBUG_RPM(p, f, a...) IWL_DEBUG(p, IWL_DL_RPM, f, ## a) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 4574a126c1ae..5165af25f010 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -68,13 +68,14 @@ #include #include #include -#include + #include "iwl-drv.h" #include "iwl-modparams.h" #include "iwl-nvm-parse.h" #include "iwl-prph.h" #include "iwl-io.h" #include "iwl-csr.h" +#include "fw/acpi.h" /* NVM offsets (in words) definitions */ enum wkp_nvm_offsets { @@ -990,37 +991,15 @@ static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) int iwl_get_bios_mcc(struct device *dev, char *mcc) { - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; + union acpi_object *data; u32 mcc_val; - root_handle = ACPI_HANDLE(dev); - if (!root_handle) { - IWL_DEBUG_EEPROM(dev, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } + data = iwl_acpi_get_object(dev, WRDD_METHOD); + if (IS_ERR(data)) + return PTR_ERR(data); - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD, - &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_EEPROM(dev, "WRD method not found\n"); - return -ENOENT; - } - - /* Call WRDD with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n", - status); - return -ENOENT; - } - - mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer); - kfree(wrdd.pointer); + mcc_val = iwl_wrdd_get_mcc(dev, data); + kfree(data); if (!mcc_val) return -ENOENT; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 83485493a79a..b2a9e7de15b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -66,7 +66,6 @@ *****************************************************************************/ #include #include -#include #include "iwl-trans.h" #include "iwl-op-mode.h" @@ -76,6 +75,7 @@ #include "iwl-io.h" /* for iwl_mvm_rx_card_state_notif */ #include "iwl-prph.h" #include "iwl-eeprom-parse.h" +#include "fw/acpi.h" #include "mvm.h" #include "fw/dbg.h" @@ -660,37 +660,15 @@ static union acpi_object *iwl_mvm_sar_find_wifi_pkg(struct iwl_mvm *mvm, static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) { - union acpi_object *wifi_pkg, *table; - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wrds = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; + union acpi_object *wifi_pkg, *table, *data; bool enabled; int ret; - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_RADIO(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } + data = iwl_acpi_get_object(mvm->dev, ACPI_WRDS_METHOD); + if (IS_ERR(data)) + return PTR_ERR(data); - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)ACPI_WRDS_METHOD, - &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "WRDS method not found\n"); - return -ENOENT; - } - - /* Call WRDS with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wrds); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "WRDS invocation failed (0x%x)\n", status); - return -ENOENT; - } - - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, wrds.pointer, + wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, ACPI_WRDS_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); @@ -712,45 +690,22 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) */ ret = iwl_mvm_sar_set_profile(mvm, table, &mvm->sar_profiles[0], enabled); - out_free: - kfree(wrds.pointer); + kfree(data); return ret; } static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) { - union acpi_object *wifi_pkg; - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer ewrd = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; + union acpi_object *wifi_pkg, *data; bool enabled; int i, n_profiles, ret; - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_RADIO(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } + data = iwl_acpi_get_object(mvm->dev, ACPI_EWRD_METHOD); + if (IS_ERR(data)) + return PTR_ERR(data); - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)ACPI_EWRD_METHOD, - &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "EWRD method not found\n"); - return -ENOENT; - } - - /* Call EWRD with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &ewrd); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "EWRD invocation failed (0x%x)\n", status); - return -ENOENT; - } - - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, ewrd.pointer, + wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, ACPI_EWRD_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); @@ -792,43 +747,21 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) } out_free: - kfree(ewrd.pointer); + kfree(data); return ret; } static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) { - union acpi_object *wifi_pkg; - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wgds = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; + union acpi_object *wifi_pkg, *data; int i, j, ret; int idx = 1; - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_RADIO(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } + data = iwl_acpi_get_object(mvm->dev, ACPI_WGDS_METHOD); + if (IS_ERR(data)) + return PTR_ERR(data); - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)ACPI_WGDS_METHOD, - &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "WGDS method not found\n"); - return -ENOENT; - } - - /* Call WGDS with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wgds); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_RADIO(mvm, "WGDS invocation failed (0x%x)\n", status); - return -ENOENT; - } - - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, wgds.pointer, + wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, ACPI_WGDS_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); @@ -851,7 +784,7 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) } ret = 0; out_free: - kfree(wgds.pointer); + kfree(data); return ret; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index e0966f656376..09fb27e9adf5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -73,6 +73,8 @@ #include #include +#include "fw/acpi.h" + #include "iwl-trans.h" #include "iwl-drv.h" #include "internal.h" @@ -641,37 +643,16 @@ static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { - acpi_handle pxsx_handle; - acpi_handle handle; - struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; + union acpi_object *data; - pxsx_handle = ACPI_HANDLE(&pdev->dev); - if (!pxsx_handle) { - IWL_DEBUG_INFO(trans, - "Could not retrieve root port ACPI handle\n"); + data = iwl_acpi_get_object(trans->dev, ACPI_SPLC_METHOD); + if (IS_ERR(data)) return; - } - /* Get the method's handle */ - status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD, - &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_INFO(trans, "SPLC method not found\n"); - return; - } - - /* Call SPLC with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &splc); - if (ACPI_FAILURE(status)) { - IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status); - return; - } - - trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer); + trans->dflt_pwr_limit = splc_get_pwr_limit(trans, data); IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", trans->dflt_pwr_limit); - kfree(splc.pointer); + kfree(data); } #else /* CONFIG_ACPI */ From 1c73acf58bd63a5dcc38132cd3060efbeae4a9ef Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 21 Sep 2017 11:02:45 +0300 Subject: [PATCH 0589/2177] iwlwifi: acpi: move ACPI method definitions to acpi.h Instead of defining each method where they are used and re-defining WIFI_DOMAIN in each one of them, move all the definitions to a central place and define the domain only a single time. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 15 +++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 8 ++------ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 11 ----------- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 5 +---- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index e7612a1a8e1a..a6d2907f75f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -65,6 +65,21 @@ #ifdef CONFIG_ACPI +#define ACPI_WRDS_METHOD "WRDS" +#define ACPI_EWRD_METHOD "EWRD" +#define ACPI_WGDS_METHOD "WGDS" +#define ACPI_WRDD_METHOD "WRDD" +#define ACPI_SPLC_METHOD "SPLC" + +#define ACPI_WIFI_DOMAIN (0x07) + +#define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2) +#define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \ + IWL_MVM_SAR_TABLE_SIZE + 3) +#define ACPI_WGDS_WIFI_DATA_SIZE 18 +#define ACPI_WGDS_NUM_BANDS 2 +#define ACPI_WGDS_TABLE_SIZE 3 + void *iwl_acpi_get_object(struct device *dev, acpi_string method); #else /* CONFIG_ACPI */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 5165af25f010..2ece3c531b88 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -946,10 +946,6 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); #ifdef CONFIG_ACPI -#define WRDD_METHOD "WRDD" -#define WRDD_WIFI (0x07) -#define WRDD_WIGIG (0x10) - static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) { union acpi_object *mcc_pkg, *domain_type, *mcc_value; @@ -975,7 +971,7 @@ static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) } domain_type = &mcc_pkg->package.elements[0]; - if (domain_type->integer.value == WRDD_WIFI) + if (domain_type->integer.value == ACPI_WIFI_DOMAIN) break; mcc_pkg = NULL; @@ -994,7 +990,7 @@ int iwl_get_bios_mcc(struct device *dev, char *mcc) union acpi_object *data; u32 mcc_val; - data = iwl_acpi_get_object(dev, WRDD_METHOD); + data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD); if (IS_ERR(data)) return PTR_ERR(data); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b2a9e7de15b7..6c8bcf03afc7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -579,17 +579,6 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm) } #ifdef CONFIG_ACPI -#define ACPI_WRDS_METHOD "WRDS" -#define ACPI_EWRD_METHOD "EWRD" -#define ACPI_WGDS_METHOD "WGDS" -#define ACPI_WIFI_DOMAIN (0x07) -#define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2) -#define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \ - IWL_MVM_SAR_TABLE_SIZE + 3) -#define ACPI_WGDS_WIFI_DATA_SIZE 18 -#define ACPI_WGDS_NUM_BANDS 2 -#define ACPI_WGDS_TABLE_SIZE 3 - static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm, union acpi_object *table, struct iwl_mvm_sar_profile *profile, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 09fb27e9adf5..a6c4f096cb19 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,9 +588,6 @@ static const struct pci_device_id iwl_hw_card_ids[] = { MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); #ifdef CONFIG_ACPI -#define ACPI_SPLC_METHOD "SPLC" -#define ACPI_SPLC_DOMAIN_WIFI (0x07) - static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) { union acpi_object *data_pkg, *dflt_pwr_limit; @@ -625,7 +622,7 @@ static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) continue; domain = &data_pkg->package.elements[0]; - if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI) + if (domain->integer.value == ACPI_WIFI_DOMAIN) break; data_pkg = NULL; From e7a3b8d87910bbd653908091242263c4e48d588f Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 21 Sep 2017 13:22:59 +0300 Subject: [PATCH 0590/2177] iwlwifi: acpi: move ACPI-related definitions to acpi.h The ACPI table size definitions were spread around the different files that used them. Move them all to a common place. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 20 ++++++++--- .../net/wireless/intel/iwlwifi/fw/api/power.h | 3 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 36 ++++++++++--------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 13 +++---- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index a6d2907f75f5..be3f49d5713a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -63,8 +63,6 @@ #include -#ifdef CONFIG_ACPI - #define ACPI_WRDS_METHOD "WRDS" #define ACPI_EWRD_METHOD "EWRD" #define ACPI_WGDS_METHOD "WGDS" @@ -73,13 +71,25 @@ #define ACPI_WIFI_DOMAIN (0x07) -#define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2) -#define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \ - IWL_MVM_SAR_TABLE_SIZE + 3) +#define ACPI_SAR_TABLE_SIZE 10 +#define ACPI_SAR_PROFILE_NUM 4 + +#define ACPI_GEO_TABLE_SIZE 6 +#define ACPI_NUM_GEO_PROFILES 3 +#define ACPI_GEO_PER_CHAIN_SIZE 3 + +#define ACPI_SAR_NUM_CHAIN_LIMITS 2 +#define ACPI_SAR_NUM_SUB_BANDS 5 + +#define ACPI_WRDS_WIFI_DATA_SIZE (ACPI_SAR_TABLE_SIZE + 2) +#define ACPI_EWRD_WIFI_DATA_SIZE ((ACPI_SAR_PROFILE_NUM - 1) * \ + ACPI_SAR_TABLE_SIZE + 3) #define ACPI_WGDS_WIFI_DATA_SIZE 18 #define ACPI_WGDS_NUM_BANDS 2 #define ACPI_WGDS_TABLE_SIZE 3 +#ifdef CONFIG_ACPI + void *iwl_acpi_get_object(struct device *dev, acpi_string method); #else /* CONFIG_ACPI */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index a06afb5605d2..a3c77e01863b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -357,8 +357,7 @@ struct iwl_dev_tx_power_cmd { u8 reserved[3]; } __packed; /* TX_REDUCED_POWER_API_S_VER_4 */ -#define IWL_NUM_GEO_PROFILES 3 -#define IWL_GEO_PER_CHAIN_SIZE 3 +#define IWL_NUM_GEO_PROFILES 3 /** * enum iwl_geo_per_chain_offset_operation - type of operation diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 6c8bcf03afc7..789aa7b74110 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -588,7 +588,7 @@ static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm, profile->enabled = enabled; - for (i = 0; i < IWL_MVM_SAR_TABLE_SIZE; i++) { + for (i = 0; i < ACPI_SAR_TABLE_SIZE; i++) { if ((table[i].type != ACPI_TYPE_INTEGER) || (table[i].integer.value > U8_MAX)) return -EINVAL; @@ -732,7 +732,7 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) break; /* go to the next table */ - pos += IWL_MVM_SAR_TABLE_SIZE; + pos += ACPI_SAR_TABLE_SIZE; } out_free: @@ -757,8 +757,8 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) goto out_free; } - for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) { - for (j = 0; j < IWL_MVM_GEO_TABLE_SIZE; j++) { + for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) { + for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) { union acpi_object *entry; entry = &wifi_pkg->package.elements[idx++]; @@ -783,25 +783,25 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b) .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), }; int i, j, idx; - int profs[IWL_NUM_CHAIN_LIMITS] = { prof_a, prof_b }; + int profs[ACPI_SAR_NUM_CHAIN_LIMITS] = { prof_a, prof_b }; int len = sizeof(cmd); - BUILD_BUG_ON(IWL_NUM_CHAIN_LIMITS < 2); - BUILD_BUG_ON(IWL_NUM_CHAIN_LIMITS * IWL_NUM_SUB_BANDS != - IWL_MVM_SAR_TABLE_SIZE); + BUILD_BUG_ON(ACPI_SAR_NUM_CHAIN_LIMITS < 2); + BUILD_BUG_ON(ACPI_SAR_NUM_CHAIN_LIMITS * ACPI_SAR_NUM_SUB_BANDS != + ACPI_SAR_TABLE_SIZE); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); - for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) { + for (i = 0; i < ACPI_SAR_NUM_CHAIN_LIMITS; i++) { struct iwl_mvm_sar_profile *prof; /* don't allow SAR to be disabled (profile 0 means disable) */ if (profs[i] == 0) return -EPERM; - /* we are off by one, so allow up to IWL_MVM_SAR_PROFILE_NUM */ - if (profs[i] > IWL_MVM_SAR_PROFILE_NUM) + /* we are off by one, so allow up to ACPI_SAR_PROFILE_NUM */ + if (profs[i] > ACPI_SAR_PROFILE_NUM) return -EINVAL; /* profiles go from 1 to 4, so decrement to access the array */ @@ -816,8 +816,8 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b) } IWL_DEBUG_RADIO(mvm, " Chain[%d]:\n", i); - for (j = 0; j < IWL_NUM_SUB_BANDS; j++) { - idx = (i * IWL_NUM_SUB_BANDS) + j; + for (j = 0; j < ACPI_SAR_NUM_SUB_BANDS; j++) { + idx = (i * ACPI_SAR_NUM_SUB_BANDS) + j; cmd.v3.per_chain_restriction[i][j] = cpu_to_le16(prof->table[idx]); IWL_DEBUG_RADIO(mvm, " Band[%d] = %d * .125dBm\n", @@ -853,7 +853,7 @@ int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm) resp = (void *)cmd.resp_pkt->data; ret = le32_to_cpu(resp->profile_idx); - if (WARN_ON(ret > IWL_NUM_GEO_PROFILES)) { + if (WARN_ON(ret > ACPI_NUM_GEO_PROFILES)) { ret = -EIO; IWL_WARN(mvm, "Invalid geographic profile idx (%d)\n", ret); } @@ -881,10 +881,12 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "Sending GEO_TX_POWER_LIMIT\n"); - BUILD_BUG_ON(IWL_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * + BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * ACPI_WGDS_TABLE_SIZE != ACPI_WGDS_WIFI_DATA_SIZE); - for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) { + BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES > IWL_NUM_GEO_PROFILES); + + for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) { struct iwl_per_chain_offset *chain = (struct iwl_per_chain_offset *)&cmd.table[i]; @@ -892,7 +894,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) u8 *value; value = &mvm->geo_profiles[i].values[j * - IWL_GEO_PER_CHAIN_SIZE]; + ACPI_GEO_PER_CHAIN_SIZE]; chain[j].max_tx_power = cpu_to_le16(value[0]); chain[j].chain_a = value[1]; chain[j].chain_b = value[2]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2867683f1aa0..d6c9e8a8df5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -89,6 +89,7 @@ #include "tof.h" #include "fw/runtime.h" #include "fw/dbg.h" +#include "fw/acpi.h" #define IWL_MVM_MAX_ADDRESSES 5 /* RSSI offset for WkP */ @@ -684,17 +685,13 @@ enum iwl_mvm_queue_status { #define IWL_MVM_NUM_CIPHERS 10 #ifdef CONFIG_ACPI -#define IWL_MVM_SAR_TABLE_SIZE 10 -#define IWL_MVM_SAR_PROFILE_NUM 4 -#define IWL_MVM_GEO_TABLE_SIZE 6 - struct iwl_mvm_sar_profile { bool enabled; - u8 table[IWL_MVM_SAR_TABLE_SIZE]; + u8 table[ACPI_SAR_TABLE_SIZE]; }; struct iwl_mvm_geo_profile { - u8 values[IWL_MVM_GEO_TABLE_SIZE]; + u8 values[ACPI_GEO_TABLE_SIZE]; }; #endif @@ -1014,8 +1011,8 @@ struct iwl_mvm { struct delayed_work cs_tx_unblock_dwork; #ifdef CONFIG_ACPI - struct iwl_mvm_sar_profile sar_profiles[IWL_MVM_SAR_PROFILE_NUM]; - struct iwl_mvm_geo_profile geo_profiles[IWL_NUM_GEO_PROFILES]; + struct iwl_mvm_sar_profile sar_profiles[ACPI_SAR_PROFILE_NUM]; + struct iwl_mvm_geo_profile geo_profiles[ACPI_NUM_GEO_PROFILES]; #endif }; From 2fa388cfeb1af9fa94acc7fd1b250fb863fb5d91 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 21 Sep 2017 14:30:53 +0300 Subject: [PATCH 0591/2177] iwlwifi: acpi: generalize iwl_mvm_sar_find_wifi_pkg() Move this function to acpi.c, renaming it to iwl_acpi_get_wifi_pkg(), because it can also be used with other methods (i.e. SPLC and WRDD). Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 52 +++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 10 ++++ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 60 ++------------------ 3 files changed, 68 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index a7b2a48618cd..5a3b75e45f5c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -95,3 +95,55 @@ void *iwl_acpi_get_object(struct device *dev, acpi_string method) return buf.pointer; } IWL_EXPORT_SYMBOL(iwl_acpi_get_object); + +union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, + union acpi_object *data, + int data_size) +{ + int i; + union acpi_object *wifi_pkg; + + /* + * We need at least one entry in the wifi package that + * describes the domain, and one more entry, otherwise there's + * no point in reading it. + */ + if (WARN_ON_ONCE(data_size < 2)) + return ERR_PTR(-EINVAL); + + /* + * We need at least two packages, one for the revision and one + * for the data itself. Also check that the revision is valid + * (i.e. it is an integer set to 0). + */ + if (data->type != ACPI_TYPE_PACKAGE || + data->package.count < 2 || + data->package.elements[0].type != ACPI_TYPE_INTEGER || + data->package.elements[0].integer.value != 0) { + IWL_DEBUG_DEV_RADIO(dev, "Unsupported packages structure\n"); + return ERR_PTR(-EINVAL); + } + + /* loop through all the packages to find the one for WiFi */ + for (i = 1; i < data->package.count; i++) { + union acpi_object *domain; + + wifi_pkg = &data->package.elements[i]; + + /* skip entries that are not a package with the right size */ + if (wifi_pkg->type != ACPI_TYPE_PACKAGE || + wifi_pkg->package.count != data_size) + continue; + + domain = &wifi_pkg->package.elements[0]; + if (domain->type == ACPI_TYPE_INTEGER && + domain->integer.value == ACPI_WIFI_DOMAIN) + goto found; + } + + return ERR_PTR(-ENOENT); + +found: + return wifi_pkg; +} +IWL_EXPORT_SYMBOL(iwl_acpi_get_wifi_pkg); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index be3f49d5713a..f1db619c3ea0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -91,6 +91,9 @@ #ifdef CONFIG_ACPI void *iwl_acpi_get_object(struct device *dev, acpi_string method); +union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, + union acpi_object *data, + int data_size); #else /* CONFIG_ACPI */ @@ -99,5 +102,12 @@ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) return ERR_PTR(-ENOENT); } +static inline union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, + union acpi_object *data, + int data_size) +{ + return ERR_PTR(-ENOENT); +} + #endif /* CONFIG_ACPI */ #endif /* __iwl_fw_acpi__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 789aa7b74110..7a7b72bf1621 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -599,54 +599,6 @@ static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm, return 0; } -static union acpi_object *iwl_mvm_sar_find_wifi_pkg(struct iwl_mvm *mvm, - union acpi_object *data, - int data_size) -{ - union acpi_object *wifi_pkg = NULL; - int i; - - /* - * We need at least two packages, one for the revision and one - * for the data itself. Also check that the revision is valid - * (i.e. it is an integer set to 0). - */ - if (data->type != ACPI_TYPE_PACKAGE || - data->package.count < 2 || - data->package.elements[0].type != ACPI_TYPE_INTEGER || - data->package.elements[0].integer.value != 0) { - IWL_DEBUG_RADIO(mvm, "Unsupported packages structure\n"); - return ERR_PTR(-EINVAL); - } - - /* loop through all the packages to find the one for WiFi */ - for (i = 1; i < data->package.count; i++) { - union acpi_object *domain; - - wifi_pkg = &data->package.elements[i]; - - /* Skip anything that is not a package with the right - * amount of elements (i.e. domain_type, - * enabled/disabled plus the actual data size. - */ - if (wifi_pkg->type != ACPI_TYPE_PACKAGE || - wifi_pkg->package.count != data_size) - continue; - - domain = &wifi_pkg->package.elements[0]; - if (domain->type == ACPI_TYPE_INTEGER && - domain->integer.value == ACPI_WIFI_DOMAIN) - break; - - wifi_pkg = NULL; - } - - if (!wifi_pkg) - return ERR_PTR(-ENOENT); - - return wifi_pkg; -} - static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) { union acpi_object *wifi_pkg, *table, *data; @@ -657,8 +609,8 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) if (IS_ERR(data)) return PTR_ERR(data); - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, - ACPI_WRDS_WIFI_DATA_SIZE); + wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, + ACPI_WRDS_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; @@ -694,8 +646,8 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) if (IS_ERR(data)) return PTR_ERR(data); - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, - ACPI_EWRD_WIFI_DATA_SIZE); + wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, + ACPI_EWRD_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; @@ -750,8 +702,8 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) if (IS_ERR(data)) return PTR_ERR(data); - wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, data, - ACPI_WGDS_WIFI_DATA_SIZE); + wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, + ACPI_WGDS_WIFI_DATA_SIZE); if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; From 45a5c6f68b2649ccb71721b95be22720f04b2bc3 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 21 Sep 2017 14:45:27 +0300 Subject: [PATCH 0592/2177] iwlwifi: acpi: use iwl_acpi_get_wifi_pkg when reading reading SPLC Instead of finding the wifi package with its own code, we can reuse the new iwl_acpi_get_wifi_pkg() function when reading the default power limit from SPLC. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 2 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 61 +++---------------- 2 files changed, 12 insertions(+), 51 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index f1db619c3ea0..f5c8c856311a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -85,6 +85,8 @@ #define ACPI_EWRD_WIFI_DATA_SIZE ((ACPI_SAR_PROFILE_NUM - 1) * \ ACPI_SAR_TABLE_SIZE + 3) #define ACPI_WGDS_WIFI_DATA_SIZE 18 +#define ACPI_SPLC_WIFI_DATA_SIZE 2 + #define ACPI_WGDS_NUM_BANDS 2 #define ACPI_WGDS_TABLE_SIZE 3 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index a6c4f096cb19..b5fce721b0b0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,67 +588,26 @@ static const struct pci_device_id iwl_hw_card_ids[] = { MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); #ifdef CONFIG_ACPI -static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) -{ - union acpi_object *data_pkg, *dflt_pwr_limit; - int i; - - /* We need at least two elements, one for the revision and one - * for the data itself. Also check that the revision is - * supported (currently only revision 0). - */ - if (splc->type != ACPI_TYPE_PACKAGE || - splc->package.count < 2 || - splc->package.elements[0].type != ACPI_TYPE_INTEGER || - splc->package.elements[0].integer.value != 0) { - IWL_DEBUG_INFO(trans, - "Unsupported structure returned by the SPLC method. Ignoring.\n"); - return 0; - } - - /* loop through all the packages to find the one for WiFi */ - for (i = 1; i < splc->package.count; i++) { - union acpi_object *domain; - - data_pkg = &splc->package.elements[i]; - - /* Skip anything that is not a package with the right - * amount of elements (i.e. at least 2 integers). - */ - if (data_pkg->type != ACPI_TYPE_PACKAGE || - data_pkg->package.count < 2 || - data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || - data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) - continue; - - domain = &data_pkg->package.elements[0]; - if (domain->integer.value == ACPI_WIFI_DOMAIN) - break; - - data_pkg = NULL; - } - - if (!data_pkg) { - IWL_DEBUG_INFO(trans, - "No element for the WiFi domain returned by the SPLC method.\n"); - return 0; - } - - dflt_pwr_limit = &data_pkg->package.elements[1]; - return dflt_pwr_limit->integer.value; -} static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { - union acpi_object *data; + union acpi_object *data, *wifi_pkg; data = iwl_acpi_get_object(trans->dev, ACPI_SPLC_METHOD); if (IS_ERR(data)) return; - trans->dflt_pwr_limit = splc_get_pwr_limit(trans, data); + wifi_pkg = iwl_acpi_get_wifi_pkg(trans->dev, data, + ACPI_SPLC_WIFI_DATA_SIZE); + if (IS_ERR(wifi_pkg) || + wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER) + goto out; + + trans->dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value; + IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", trans->dflt_pwr_limit); +out: kfree(data); } From ed1a962db760bfb5e30b8ab284873767790119b9 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 26 Sep 2017 15:33:56 +0300 Subject: [PATCH 0593/2177] iwlwifi: acpi: make iwl_get_bios_mcc() use the common acpi functions The way iwl_get_bios_mcc() gets the WiFi package and checks for its integrity is almost identical to the new iwl_acpi_get_wifi_pkg() function. Instead of having duplicate code, convert it to use the common code. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 1 + .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 66 ++++++------------- .../wireless/intel/iwlwifi/iwl-nvm-parse.h | 7 -- 3 files changed, 20 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index f5c8c856311a..a7deb62f3f86 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -85,6 +85,7 @@ #define ACPI_EWRD_WIFI_DATA_SIZE ((ACPI_SAR_PROFILE_NUM - 1) * \ ACPI_SAR_TABLE_SIZE + 3) #define ACPI_WGDS_WIFI_DATA_SIZE 18 +#define ACPI_WRDD_WIFI_DATA_SIZE 2 #define ACPI_SPLC_WIFI_DATA_SIZE 2 #define ACPI_WGDS_NUM_BANDS 2 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 2ece3c531b88..d98318f93b9e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -945,64 +945,36 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); -#ifdef CONFIG_ACPI -static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) -{ - union acpi_object *mcc_pkg, *domain_type, *mcc_value; - u32 i; - - if (wrdd->type != ACPI_TYPE_PACKAGE || - wrdd->package.count < 2 || - wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || - wrdd->package.elements[0].integer.value != 0) { - IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n"); - return 0; - } - - for (i = 1 ; i < wrdd->package.count ; ++i) { - mcc_pkg = &wrdd->package.elements[i]; - - if (mcc_pkg->type != ACPI_TYPE_PACKAGE || - mcc_pkg->package.count < 2 || - mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || - mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { - mcc_pkg = NULL; - continue; - } - - domain_type = &mcc_pkg->package.elements[0]; - if (domain_type->integer.value == ACPI_WIFI_DOMAIN) - break; - - mcc_pkg = NULL; - } - - if (mcc_pkg) { - mcc_value = &mcc_pkg->package.elements[1]; - return mcc_value->integer.value; - } - - return 0; -} - int iwl_get_bios_mcc(struct device *dev, char *mcc) { - union acpi_object *data; + union acpi_object *wifi_pkg, *data; u32 mcc_val; + int ret; data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD); if (IS_ERR(data)) return PTR_ERR(data); - mcc_val = iwl_wrdd_get_mcc(dev, data); - kfree(data); - if (!mcc_val) - return -ENOENT; + wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE); + if (IS_ERR(wifi_pkg)) { + ret = PTR_ERR(wifi_pkg); + goto out_free; + } + + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + ret = -EINVAL; + goto out_free; + } + + mcc_val = wifi_pkg->package.elements[1].integer.value; mcc[0] = (mcc_val >> 8) & 0xff; mcc[1] = mcc_val & 0xff; mcc[2] = '\0'; - return 0; + + ret = 0; +out_free: + kfree(data); + return ret; } IWL_EXPORT_SYMBOL(iwl_get_bios_mcc); -#endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index 2d1a24dd8410..a39bd5c17c26 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -109,7 +109,6 @@ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc); -#ifdef CONFIG_ACPI /** * iwl_get_bios_mcc - read MCC from BIOS, if available * @@ -119,11 +118,5 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, * This function tries to read the current MCC from ACPI if available. */ int iwl_get_bios_mcc(struct device *dev, char *mcc); -#else -static inline int iwl_get_bios_mcc(struct device *dev, char *mcc) -{ - return -ENOENT; -} -#endif #endif /* __iwl_nvm_parse_h__ */ From d953cdb8ee1ba94f3e64751fe4740c82d09cffa8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 26 Sep 2017 16:13:23 +0300 Subject: [PATCH 0594/2177] iwlwifi: acpi: remove a couple of unnecessary ifdefs Some of the #ifdef CONFIG_ACPI are not needed anymore, so they can be removed. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d6c9e8a8df5b..7fa7849367ef 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -684,7 +684,6 @@ enum iwl_mvm_queue_status { #define IWL_MVM_NUM_CIPHERS 10 -#ifdef CONFIG_ACPI struct iwl_mvm_sar_profile { bool enabled; u8 table[ACPI_SAR_TABLE_SIZE]; @@ -693,7 +692,6 @@ struct iwl_mvm_sar_profile { struct iwl_mvm_geo_profile { u8 values[ACPI_GEO_TABLE_SIZE]; }; -#endif struct iwl_mvm { /* for logger access */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b5fce721b0b0..08f71b4297be 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -587,8 +587,6 @@ static const struct pci_device_id iwl_hw_card_ids[] = { }; MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); -#ifdef CONFIG_ACPI - static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { union acpi_object *data, *wifi_pkg; @@ -611,10 +609,6 @@ out: kfree(data); } -#else /* CONFIG_ACPI */ -static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) {} -#endif - /* PCI registers */ #define PCI_CFG_RETRY_TIMEOUT 0x041 From 45f65569e0d968c5fd1e8c3c6d8983b402caaa27 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 26 Sep 2017 16:31:10 +0300 Subject: [PATCH 0595/2177] iwlwifi: acpi: move function to get mcc into acpi code The iwl_get_bios_mcc() function was in the iwl-nvm-parse.c file, but it has nothing to do with the NVM. Move it to fw/acpi.c and rename it to iwl_acpi_get_mcc(). Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 34 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 15 ++++++++ .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 34 ------------------- .../wireless/intel/iwlwifi/iwl-nvm-parse.h | 10 ------ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 1 - drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 3 +- 6 files changed, 51 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 5a3b75e45f5c..adce36112b7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -147,3 +147,37 @@ found: return wifi_pkg; } IWL_EXPORT_SYMBOL(iwl_acpi_get_wifi_pkg); + +int iwl_acpi_get_mcc(struct device *dev, char *mcc) +{ + union acpi_object *wifi_pkg, *data; + u32 mcc_val; + int ret; + + data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD); + if (IS_ERR(data)) + return PTR_ERR(data); + + wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE); + if (IS_ERR(wifi_pkg)) { + ret = PTR_ERR(wifi_pkg); + goto out_free; + } + + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + ret = -EINVAL; + goto out_free; + } + + mcc_val = wifi_pkg->package.elements[1].integer.value; + + mcc[0] = (mcc_val >> 8) & 0xff; + mcc[1] = mcc_val & 0xff; + mcc[2] = '\0'; + + ret = 0; +out_free: + kfree(data); + return ret; +} +IWL_EXPORT_SYMBOL(iwl_acpi_get_mcc); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index a7deb62f3f86..fe9c1066c3b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -98,6 +98,16 @@ union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, union acpi_object *data, int data_size); +/** + * iwl_acpi_get_mcc - read MCC from ACPI, if available + * + * @dev: the struct device + * @mcc: output buffer (3 bytes) that will get the MCC + * + * This function tries to read the current MCC from ACPI if available. + */ +int iwl_acpi_get_mcc(struct device *dev, char *mcc); + #else /* CONFIG_ACPI */ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) @@ -112,5 +122,10 @@ static inline union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, return ERR_PTR(-ENOENT); } +static inline int iwl_acpi_get_mcc(struct device *dev, char *mcc) +{ + return -ENOENT; +} + #endif /* CONFIG_ACPI */ #endif /* __iwl_fw_acpi__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index d98318f93b9e..d9a2ea9dd93f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -944,37 +944,3 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, return regd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); - -int iwl_get_bios_mcc(struct device *dev, char *mcc) -{ - union acpi_object *wifi_pkg, *data; - u32 mcc_val; - int ret; - - data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD); - if (IS_ERR(data)) - return PTR_ERR(data); - - wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE); - if (IS_ERR(wifi_pkg)) { - ret = PTR_ERR(wifi_pkg); - goto out_free; - } - - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { - ret = -EINVAL; - goto out_free; - } - - mcc_val = wifi_pkg->package.elements[1].integer.value; - - mcc[0] = (mcc_val >> 8) & 0xff; - mcc[1] = mcc_val & 0xff; - mcc[2] = '\0'; - - ret = 0; -out_free: - kfree(data); - return ret; -} -IWL_EXPORT_SYMBOL(iwl_get_bios_mcc); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index a39bd5c17c26..306736c7a042 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -109,14 +109,4 @@ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc); -/** - * iwl_get_bios_mcc - read MCC from BIOS, if available - * - * @dev: the struct device - * @mcc: output buffer (3 bytes) that will get the MCC - * - * This function tries to read the current MCC from ACPI if available. - */ -int iwl_get_bios_mcc(struct device *dev, char *mcc); - #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7a7b72bf1621..f476882291ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -74,7 +74,6 @@ #include "iwl-csr.h" /* for iwl_mvm_rx_card_state_notif */ #include "iwl-io.h" /* for iwl_mvm_rx_card_state_notif */ #include "iwl-prph.h" -#include "iwl-eeprom-parse.h" #include "fw/acpi.h" #include "mvm.h" diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 422aa6be9932..f4a5fcf4cc1f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -73,6 +73,7 @@ #include "iwl-eeprom-read.h" #include "iwl-nvm-parse.h" #include "iwl-prph.h" +#include "fw/acpi.h" /* Default NVM size to read */ #define IWL_NVM_DEFAULT_CHUNK_SIZE (2*1024) @@ -775,7 +776,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) return -EIO; if (iwl_mvm_is_wifi_mcc_supported(mvm) && - !iwl_get_bios_mcc(mvm->dev, mcc)) { + !iwl_acpi_get_mcc(mvm->dev, mcc)) { kfree(regd); regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc, MCC_SOURCE_BIOS, NULL); From e59a00f4884815a2887837b9d34e0ae59c41d3d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Sep 2017 12:59:00 +0200 Subject: [PATCH 0596/2177] iwlwifi: fix indentation in a000 family configuration Fix the double indentation in the configuration structs for a000 family devices. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/a000.c | 100 +++++++++--------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c index ed8bccd228f8..076f8d6d1124 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c @@ -138,74 +138,74 @@ static const struct iwl_ht_params iwl_a000_ht_params = { .tx_cmd_queue_size = 32 const struct iwl_cfg iwla000_2ac_cfg_hr = { - .name = "Intel(R) Dual Band Wireless AC a000", - .fw_name_pre = IWL_A000_HR_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AC a000", + .fw_name_pre = IWL_A000_HR_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; const struct iwl_cfg iwla000_2ac_cfg_hr_cdb = { - .name = "Intel(R) Dual Band Wireless AC a000", - .fw_name_pre = IWL_A000_HR_CDB_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, - .cdb = true, + .name = "Intel(R) Dual Band Wireless AC a000", + .fw_name_pre = IWL_A000_HR_CDB_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .cdb = true, }; const struct iwl_cfg iwla000_2ac_cfg_jf = { - .name = "Intel(R) Dual Band Wireless AC a000", - .fw_name_pre = IWL_A000_JF_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AC a000", + .fw_name_pre = IWL_A000_JF_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; const struct iwl_cfg iwla000_2ax_cfg_hr = { - .name = "Intel(R) Dual Band Wireless AX a000", - .fw_name_pre = IWL_A000_HR_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AX a000", + .fw_name_pre = IWL_A000_HR_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_f0 = { - .name = "Intel(R) Dual Band Wireless AX a000", - .fw_name_pre = IWL_A000_HR_F0_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AX a000", + .fw_name_pre = IWL_A000_HR_F0_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; const struct iwl_cfg iwla000_2ax_cfg_qnj_jf_b0 = { - .name = "Intel(R) Dual Band Wireless AX a000", - .fw_name_pre = IWL_A000_JF_B0_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AX a000", + .fw_name_pre = IWL_A000_JF_B0_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_a0 = { - .name = "Intel(R) Dual Band Wireless AX a000", - .fw_name_pre = IWL_A000_HR_A0_FW_PRE, - IWL_DEVICE_A000, - .ht_params = &iwl_a000_ht_params, - .nvm_ver = IWL_A000_NVM_VERSION, - .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AX a000", + .fw_name_pre = IWL_A000_HR_A0_FW_PRE, + IWL_DEVICE_A000, + .ht_params = &iwl_a000_ht_params, + .nvm_ver = IWL_A000_NVM_VERSION, + .nvm_calib_ver = IWL_A000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; MODULE_FIRMWARE(IWL_A000_HR_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX)); From 1184611ee88f309fe4a6ca3d50bde762434e386e Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 28 Sep 2017 15:18:33 +0300 Subject: [PATCH 0597/2177] iwlwifi: acpi: move code that reads SPLC to acpi Move most of the set_dflt_pwr_limit() function to acpi.c and make it return the pwr_limit value instead of setting directly. Also rename it to iwl_acpi_get_pwr_limit(). Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 27 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 7 +++++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 25 ++--------------- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index adce36112b7f..75cae54ea7de 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -181,3 +181,30 @@ out_free: return ret; } IWL_EXPORT_SYMBOL(iwl_acpi_get_mcc); + +u64 iwl_acpi_get_pwr_limit(struct device *dev) +{ + union acpi_object *data, *wifi_pkg; + u64 dflt_pwr_limit; + + data = iwl_acpi_get_object(dev, ACPI_SPLC_METHOD); + if (IS_ERR(data)) { + dflt_pwr_limit = 0; + goto out; + } + + wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, + ACPI_SPLC_WIFI_DATA_SIZE); + if (IS_ERR(wifi_pkg) || + wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER) { + dflt_pwr_limit = 0; + goto out_free; + } + + dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value; +out_free: + kfree(data); +out: + return dflt_pwr_limit; +} +IWL_EXPORT_SYMBOL(iwl_acpi_get_pwr_limit); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index fe9c1066c3b6..cb5f32c1d705 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -108,6 +108,8 @@ union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, */ int iwl_acpi_get_mcc(struct device *dev, char *mcc); +u64 iwl_acpi_get_pwr_limit(struct device *dev); + #else /* CONFIG_ACPI */ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) @@ -127,5 +129,10 @@ static inline int iwl_acpi_get_mcc(struct device *dev, char *mcc) return -ENOENT; } +static inline u64 iwl_acpi_get_pwr_limit(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_ACPI */ #endif /* __iwl_fw_acpi__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 08f71b4297be..d81f2414fd8e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -78,6 +78,7 @@ #include "iwl-trans.h" #include "iwl-drv.h" #include "internal.h" +#include "fw/acpi.h" #define IWL_PCI_DEVICE(dev, subdev, cfg) \ .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \ @@ -587,28 +588,6 @@ static const struct pci_device_id iwl_hw_card_ids[] = { }; MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); -static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) -{ - union acpi_object *data, *wifi_pkg; - - data = iwl_acpi_get_object(trans->dev, ACPI_SPLC_METHOD); - if (IS_ERR(data)) - return; - - wifi_pkg = iwl_acpi_get_wifi_pkg(trans->dev, data, - ACPI_SPLC_WIFI_DATA_SIZE); - if (IS_ERR(wifi_pkg) || - wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER) - goto out; - - trans->dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value; - - IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", - trans->dflt_pwr_limit); -out: - kfree(data); -} - /* PCI registers */ #define PCI_CFG_RETRY_TIMEOUT 0x041 @@ -672,7 +651,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_trans; } - set_dflt_pwr_limit(iwl_trans, pdev); + iwl_trans->dflt_pwr_limit = iwl_acpi_get_pwr_limit(iwl_trans->dev); /* register transport layer debugfs here */ ret = iwl_trans_pcie_dbgfs_register(iwl_trans); From 42fa5e0e323b2709079d81d90e9a142aa98cd3b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Sep 2017 16:37:12 +0200 Subject: [PATCH 0598/2177] iwlwifi: mvm: warn on invalid statistics size Getting the wrong statistics size is a problem, having a warning will help us catch it quicker during firmware/driver development. In released firmware/driver versions, we obviously make sure this won't happen. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 184c749766f2..185a1d8414b9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -661,11 +661,10 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm, expected_size = sizeof(struct iwl_notif_statistics_cdb); } - if (iwl_rx_packet_payload_len(pkt) != expected_size) { - IWL_ERR(mvm, "received invalid statistics size (%d)!\n", - iwl_rx_packet_payload_len(pkt)); + if (WARN_ONCE(iwl_rx_packet_payload_len(pkt) != expected_size, + "received invalid statistics size (%d)!\n", + iwl_rx_packet_payload_len(pkt))) return; - } if (!iwl_mvm_has_new_rx_stats_api(mvm)) { struct iwl_notif_statistics_v11 *stats = (void *)&pkt->data; From f2abcfa6c86e503b352846ace9d6ab9b02ade0ab Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 28 Sep 2017 15:29:27 +0300 Subject: [PATCH 0599/2177] iwlwifi: remove dflt_pwr_limit from the transport The default power limit read from the SPLC method in ACPI doesn't have anything to do with the transport and is only used in the opmode, so we can remove it from the trans. Additionally, this value is only user when the opmode is starting, so we don't need to store it anywhere. Remove the dflt_pwr_limit element from the trans and move call to iwl_acpi_get_pwr_limit() call to mvm. Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-trans.h | 2 -- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 20 +++++++++++-------- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 --- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 91ec077900f6..bba4f54cbbbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -749,8 +749,6 @@ struct iwl_trans { struct lockdep_map sync_cmd_lockdep_map; #endif - u64 dflt_pwr_limit; - const struct iwl_fw_dbg_dest_tlv *dbg_dest_tlv; const struct iwl_fw_dbg_conf_tlv *dbg_conf_tlv[FW_DBG_CONF_MAX]; struct iwl_fw_dbg_trigger_tlv * const *dbg_trigger_tlv; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index d855920f5456..e82b4462722b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -86,6 +86,7 @@ #include "time-event.h" #include "fw-api.h" #include "fw/api/scan.h" +#include "fw/acpi.h" #define DRV_DESCRIPTION "The new Intel(R) wireless AGN driver for Linux" MODULE_DESCRIPTION(DRV_DESCRIPTION); @@ -488,18 +489,21 @@ static const struct iwl_hcmd_arr iwl_mvm_groups[] = { static void iwl_mvm_async_handlers_wk(struct work_struct *wk); static void iwl_mvm_d0i3_exit_work(struct work_struct *wk); -static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg) +static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm) { - const struct iwl_pwr_tx_backoff *pwr_tx_backoff = cfg->pwr_tx_backoffs; + const struct iwl_pwr_tx_backoff *backoff = mvm->cfg->pwr_tx_backoffs; + u64 dflt_pwr_limit; - if (!pwr_tx_backoff) + if (!backoff) return 0; - while (pwr_tx_backoff->pwr) { - if (trans->dflt_pwr_limit >= pwr_tx_backoff->pwr) - return pwr_tx_backoff->backoff; + dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev); - pwr_tx_backoff++; + while (backoff->pwr) { + if (dflt_pwr_limit >= backoff->pwr) + return backoff->backoff; + + backoff++; } return 0; @@ -769,7 +773,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, goto out_free; mvm->hw_registered = true; - min_backoff = calc_min_backoff(trans, cfg); + min_backoff = iwl_mvm_min_backoff(mvm); iwl_mvm_thermal_initialize(mvm, min_backoff); err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index d81f2414fd8e..ce7254ec0514 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -78,7 +78,6 @@ #include "iwl-trans.h" #include "iwl-drv.h" #include "internal.h" -#include "fw/acpi.h" #define IWL_PCI_DEVICE(dev, subdev, cfg) \ .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \ @@ -651,8 +650,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_trans; } - iwl_trans->dflt_pwr_limit = iwl_acpi_get_pwr_limit(iwl_trans->dev); - /* register transport layer debugfs here */ ret = iwl_trans_pcie_dbgfs_register(iwl_trans); if (ret) From ec2f25d203aaede93cef64198d93df698913101f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sat, 19 Aug 2017 00:20:31 +0300 Subject: [PATCH 0600/2177] i40e: fix a typo in i40e_pf documentation This patch fixes a typo in i40e_pf object documentation; num_req_vfs refers to the number of VFs requested for the PF. Signed-off-by: Rami Rosen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 439c63cb2a0c..2bc4dd0dbbf1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -350,7 +350,7 @@ struct i40e_pf { u16 num_vmdq_vsis; /* num vmdq vsis this PF has set up */ u16 num_vmdq_qps; /* num queue pairs per vmdq pool */ u16 num_vmdq_msix; /* num queue vectors per vmdq pool */ - u16 num_req_vfs; /* num VFs requested for this VF */ + u16 num_req_vfs; /* num VFs requested for this PF */ u16 num_vf_qps; /* num queue pairs per VF */ u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ From 64615b5418bac1d3b3a50a83fb2e42091fe299fe Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Tue, 29 Aug 2017 05:32:30 -0400 Subject: [PATCH 0601/2177] i40e: add private flag to control source pruning By default, our devices do source pruning, that is, they drop receive packets that have the source MAC matching one of the receive filters. Unfortunately, this breaks ARP monitoring in channel bonding, as the bonding driver expects devices to receive ARPs containing their own source address. Add an ethtool private flag to control this feature. Also, remove the netif_running() check when we process our private flags. It's OK to reset when the device is closed and in most cases we need the reset the apply these changes. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../net/ethernet/intel/i40e/i40e_ethtool.c | 7 ++++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 25 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2bc4dd0dbbf1..c78448daa7a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -452,6 +452,7 @@ struct i40e_pf { #define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55) #define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56) #define I40E_FLAG_LEGACY_RX BIT_ULL(58) +#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT_ULL(59) struct i40e_client_instance *cinst; bool stat_offsets_loaded; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 1136d02e2e95..6203d362438c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -227,6 +227,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0), I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0), I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0), + I40E_PRIV_FLAG("disable-source-pruning", + I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -4189,8 +4191,9 @@ flags_complete: /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset */ - if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) || - ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev))) + if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | + I40E_FLAG_LEGACY_RX | + I40E_FLAG_SOURCE_PRUNING_DISABLED)) i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); return 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3f9e89b054ec..b539469f576f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9903,6 +9903,31 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) enabled_tc = i40e_pf_get_tc_map(pf); + /* Source pruning is enabled by default, so the flag is + * negative logic - if it's set, we need to fiddle with + * the VSI to disable source pruning. + */ + if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) { + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + ctxt.vf_num = 0; + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "update vsi failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + ret = -ENOENT; + goto err; + } + } + /* MFP mode setup queue map and update VSI */ if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi)) { /* NIC type PF */ From be664cbefc50977aaefc868ba6a1109ec9b7449d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 29 Aug 2017 05:32:31 -0400 Subject: [PATCH 0602/2177] i40e/i40evf: spread CPU affinity hints across online CPUs only Currently, when setting up the IRQ for a q_vector, we set an affinity hint based on the v_idx of that q_vector. Meaning a loop iterates on v_idx, which is an incremental value, and the cpumask is created based on this value. This is a problem in systems with multiple logical CPUs per core (like in simultaneous multithreading (SMT) scenarios). If we disable some logical CPUs, by turning SMT off for example, we will end up with a sparse cpu_online_mask, i.e., only the first CPU in a core is online, and incremental filling in q_vector cpumask might lead to multiple offline CPUs being assigned to q_vectors. Example: if we have a system with 8 cores each one containing 8 logical CPUs (SMT == 8 in this case), we have 64 CPUs in total. But if SMT is disabled, only the 1st CPU in each core remains online, so the cpu_online_mask in this case would have only 8 bits set, in a sparse way. In general case, when SMT is off the cpu_online_mask has only C bits set: 0, 1*N, 2*N, ..., C*(N-1) where C == # of cores; N == # of logical CPUs per core. In our example, only bits 0, 8, 16, 24, 32, 40, 48, 56 would be set. Instead, we should only assign hints for CPUs which are online. Even better, the kernel already provides a function, cpumask_local_spread() which takes an index and returns a CPU, spreading the interrupts across local NUMA nodes first, and then remote ones if necessary. Since we generally have a 1:1 mapping between vectors and CPUs, there is no real advantage to spreading vectors to local CPUs first. In order to avoid mismatch of the default XPS hints, we'll pass -1 so that it spreads across all CPUs without regard to the node locality. Note that we don't need to change the q_vector->affinity_mask as this is initialized to cpu_possible_mask, until an actual affinity is set and then notified back to us. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++++----- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 9 ++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b539469f576f..d2bb4f17c89e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2885,14 +2885,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { struct i40e_vsi *vsi = ring->vsi; + int cpu; if (!ring->q_vector || !ring->netdev) return; if ((vsi->tc_config.numtc <= 1) && !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) { - netif_set_xps_queue(ring->netdev, - get_cpu_mask(ring->q_vector->v_idx), + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), ring->queue_index); } @@ -3482,6 +3483,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int tx_int_idx = 0; int vector, err; int irq_num; + int cpu; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; @@ -3517,10 +3519,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* get_cpu_mask returns a static constant mask with - * a permanent lifetime so it's ok to use here. + /* Spread affinity hints out across online CPUs. + * + * get_cpu_mask returns a static constant mask with + * a permanent lifetime so it's ok to pass to + * irq_set_affinity_hint without making a copy. */ - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); } vsi->irqs_ready = true; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f2f1e754c2ce..bc76378a71e2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -515,6 +515,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) unsigned int vector, q_vectors; unsigned int rx_int_idx = 0, tx_int_idx = 0; int irq_num, err; + int cpu; i40evf_irq_disable(adapter); /* Decrement for Other and TCP Timer vectors */ @@ -553,10 +554,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) q_vector->affinity_notify.release = i40evf_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* get_cpu_mask returns a static constant mask with - * a permanent lifetime so it's ok to use here. + /* Spread the IRQ affinity hints across online CPUs. Note that + * get_cpu_mask returns a mask with a permanent lifetime so + * it's safe to use as a hint for irq_set_affinity_hint. */ - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); } return 0; From c3d26b75c22b0487c452bd610338aa015eae517b Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 29 Aug 2017 05:32:32 -0400 Subject: [PATCH 0603/2177] i40e: re-enable PTP L4 capabilities for XL710 if FW >6.0 Starting with XL710 FW 5.3 PTP L4 was disabled for XL710 due to a bug. The bug has since been resolved in XL710 FW >6.0 and PTP L4 can now be re-enabled on those devices with updated firmware. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d2bb4f17c89e..85132eee9f64 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9074,6 +9074,11 @@ static int i40e_sw_init(struct i40e_pf *pf) (pf->hw.aq.fw_maj_ver >= 5))) pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB; + /* Enable PTP L4 if FW > v6.0 */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + pf->hw.aq.fw_maj_ver >= 6) + pf->hw_features |= I40E_HW_PTP_L4_CAPABLE; + if (pf->hw.func_caps.vmdq) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; pf->flags |= I40E_FLAG_VMDQ_ENABLED; From 60518a048919a2781e3192981d2946149240b837 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Tue, 29 Aug 2017 05:32:33 -0400 Subject: [PATCH 0604/2177] i40e: redfine I40E_PHY_TYPE_MAX Since I40E_PHY_TYPE_MAX is used as an iterator, usually combined with some sort of bit-shifting, it should only include actual PHY types and not error cases. Move it up in the enum declaration so that loops only iterate across valid PHY types. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 2 +- drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 4c85ea9cd89a..50c5a4c630b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1771,9 +1771,9 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_MAX, I40E_PHY_TYPE_EMPTY = 0xFE, I40E_PHY_TYPE_DEFAULT = 0xFF, - I40E_PHY_TYPE_MAX }; #define I40E_LINK_SPEED_100MB_SHIFT 0x1 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index ed5602f4bbcd..dc6fc8b1bc79 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1767,9 +1767,9 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_MAX, I40E_PHY_TYPE_EMPTY = 0xFE, I40E_PHY_TYPE_DEFAULT = 0xFF, - I40E_PHY_TYPE_MAX }; #define I40E_LINK_SPEED_100MB_SHIFT 0x1 From 0a0d9af5bc0b2130edaed9d9c57fc08bd338e3f5 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Tue, 29 Aug 2017 05:32:34 -0400 Subject: [PATCH 0605/2177] i40e: fix incorrect register definition This register was defined incorrectly. Fix the increment value to 8, and replace the iterator with _i to make the definition consistent with other statistics registers. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_register.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 86ca27f72f02..c234758dad15 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2794,7 +2794,7 @@ #define I40E_GLV_RUPP_MAX_INDEX 383 #define I40E_GLV_RUPP_RUPP_SHIFT 0 #define I40E_GLV_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RUPP_RUPP_SHIFT) -#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */ +#define I40E_GLV_TEPC(_i) (0x00344000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */ #define I40E_GLV_TEPC_MAX_INDEX 383 #define I40E_GLV_TEPC_TEPC_SHIFT 0 #define I40E_GLV_TEPC_TEPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_TEPC_TEPC_SHIFT) From bd6cd4e6dd38a35215d3f28f12db51213c9aead6 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 29 Aug 2017 05:32:35 -0400 Subject: [PATCH 0606/2177] i40e/i40evf: use DECLARE_BITMAP for state When using set_bit and friends, we should be using actual bitmaps, and fix all the locations where we might access it. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 8 ++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 3 ++- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 3 ++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8f326f87a815..6f2725fc50a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -278,8 +278,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->netdev, rx_ring->rx_bi); dev_info(&pf->pdev->dev, - " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, rx_ring->state, + " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", + i, *rx_ring->state, rx_ring->queue_index, rx_ring->reg_idx); dev_info(&pf->pdev->dev, @@ -334,8 +334,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->netdev, tx_ring->tx_bi); dev_info(&pf->pdev->dev, - " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, tx_ring->state, + " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", + i, *tx_ring->state, tx_ring->queue_index, tx_ring->reg_idx); dev_info(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 85132eee9f64..49401be7a2f4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2891,7 +2891,7 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) return; if ((vsi->tc_config.numtc <= 1) && - !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) { + !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) { cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), ring->queue_index); @@ -3010,7 +3010,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) struct i40e_hmc_obj_rxq rx_ctx; i40e_status err = 0; - ring->state = 0; + bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2f848bc5e391..a4e3e665a1a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -342,6 +342,7 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ }; /* some useful defines for virtchannel interface, which @@ -366,7 +367,7 @@ struct i40e_ring { struct i40e_tx_buffer *tx_bi; struct i40e_rx_buffer *rx_bi; }; - unsigned long state; + DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); u16 queue_index; /* Queue number of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0d9f98bc07bd..d8ca802a71a9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -325,6 +325,7 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ }; /* some useful defines for virtchannel interface, which @@ -348,7 +349,7 @@ struct i40e_ring { struct i40e_tx_buffer *tx_bi; struct i40e_rx_buffer *rx_bi; }; - unsigned long state; + DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); u16 queue_index; /* Queue number of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; From 7f6618226360b4aa0230a49525c81bf47d1d53c8 Mon Sep 17 00:00:00 2001 From: Alice Michael Date: Tue, 29 Aug 2017 05:32:36 -0400 Subject: [PATCH 0607/2177] i40e: fix merge error This patch removes some code that was accidentally added to the wrong function with a merge error. Fixes: c53934c6d1b1 ("i40e: fix: do not sleep in netdev_ops") Signed-off-by: Alice Michael Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 49401be7a2f4..628101bb08d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1776,11 +1776,6 @@ static void i40e_set_rx_mode(struct net_device *netdev) vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } - - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); } /** From 9a858178ef6dfee2b6e6ec65c42b65a129b4df02 Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 29 Aug 2017 05:32:37 -0400 Subject: [PATCH 0608/2177] i40e: Display error message if module does not meet thermal requirements This patch causes error message to be displayed when NIC detects insertion of module that does not meet thermal requirements. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 24 +++++++++++++++---- .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 1 + 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index c78448daa7a1..4dc6d43f8812 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -451,6 +451,7 @@ struct i40e_pf { #define I40E_FLAG_CLIENT_RESET BIT_ULL(54) #define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55) #define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56) +#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT_ULL(57) #define I40E_FLAG_LEGACY_RX BIT_ULL(58) #define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT_ULL(59) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 50c5a4c630b8..a8f65aed5421 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1772,6 +1772,7 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, I40E_PHY_TYPE_MAX, + I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, I40E_PHY_TYPE_EMPTY = 0xFE, I40E_PHY_TYPE_DEFAULT = 0xFF, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 628101bb08d4..3d6d6a283327 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6558,12 +6558,26 @@ static void i40e_handle_link_event(struct i40e_pf *pf, */ i40e_link_event(pf); - /* check for unqualified module, if link is down */ - if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && - (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && - (!(status->link_info & I40E_AQ_LINK_UP))) + /* Check if module meets thermal requirements */ + if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) { dev_err(&pf->pdev->dev, - "The driver failed to link because an unqualified module was detected.\n"); + "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n"); + dev_err(&pf->pdev->dev, + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } else { + /* check for unqualified module, if link is down, suppress + * the message if link was forced to be down. + */ + if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && + (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && + (!(status->link_info & I40E_AQ_LINK_UP)) && + (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) { + dev_err(&pf->pdev->dev, + "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n"); + dev_err(&pf->pdev->dev, + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } + } } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index dc6fc8b1bc79..60c892f559b9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1768,6 +1768,7 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, I40E_PHY_TYPE_MAX, + I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, I40E_PHY_TYPE_EMPTY = 0xFE, I40E_PHY_TYPE_DEFAULT = 0xFF, }; From 013df598d61161d356d5545f56422766ed3a3a38 Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 29 Aug 2017 05:32:38 -0400 Subject: [PATCH 0609/2177] i40e: Properly maintain flow director filters list When there is no space for more flow director filters and user requested to add a new one it is rejected by firmware and automatically removed from the filter list maintained by driver. This behaviour is correct. Afterwards existing filter can be removed making free slot for the new one. This however causes the newly added filter to be accepted by firmware but removed from driver filter list resulting in not showing after issuing 'ethtool -n '. This happened due to not clearing the variable pf->fd_inv which stores filter number to be removed from the list when firmware refused to add the requested filter. It caused the filter with this specific ID to be constantly removed once it was added to the list although it has been accepted by firmware and effectively applied to the NIC. It was fixed by clearing pf->fd_inv variable after removal of the filter from the list when it was rejected by firmware. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3d6d6a283327..9704cfef2f05 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6232,6 +6232,7 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) hlist_del(&filter->fdir_node); kfree(filter); pf->fdir_pf_active_filters--; + pf->fd_inv = 0; } } } From 19b7960b2da1db56ec3f8b478c70bab244feb644 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 29 Aug 2017 05:32:39 -0400 Subject: [PATCH 0610/2177] i40e: implement split PCI error reset handler This patch implements the PCI error handler reset_prepare and reset_done. This allows us to handle function level reset. Without this patch we are unable to perform and recover from an FLR correctly and this will cause VFs to be unable to recover from an FLR on the PF. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9704cfef2f05..60b11fdeca2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12045,6 +12045,28 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) return result; } +/** + * i40e_pci_error_reset_prepare - prepare device driver for pci reset + * @pdev: PCI device information struct + */ +static void i40e_pci_error_reset_prepare(struct pci_dev *pdev) +{ + struct i40e_pf *pf = pci_get_drvdata(pdev); + + i40e_prep_for_reset(pf, false); +} + +/** + * i40e_pci_error_reset_done - pci reset done, device driver reset can begin + * @pdev: PCI device information struct + */ +static void i40e_pci_error_reset_done(struct pci_dev *pdev) +{ + struct i40e_pf *pf = pci_get_drvdata(pdev); + + i40e_reset_and_rebuild(pf, false, false); +} + /** * i40e_pci_error_resume - restart operations after PCI error recovery * @pdev: PCI device information struct @@ -12235,6 +12257,8 @@ static int i40e_resume(struct device *dev) static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, + .reset_prepare = i40e_pci_error_reset_prepare, + .reset_done = i40e_pci_error_reset_done, .resume = i40e_pci_error_resume, }; From 052b93d0c2ecf693f13561b4b100d541ced41af0 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Tue, 29 Aug 2017 05:32:40 -0400 Subject: [PATCH 0611/2177] i40e: do not enter PHY debug mode while setting LEDs behaviour Previous implementation of LED set/get functions required to enter PHY debug mode, in order to prevent access to it from FW and SW at the same time. Reset of all ports was a unwanted side effect. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 6203d362438c..de0dfe340494 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2010,7 +2010,9 @@ static int i40e_set_phys_id(struct net_device *netdev, if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) { pf->led_status = i40e_led_get(hw); } else { - i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL); + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, + NULL); ret = i40e_led_get_phy(hw, &temp_status, &pf->phy_led_val); pf->led_status = temp_status; @@ -2035,7 +2037,8 @@ static int i40e_set_phys_id(struct net_device *netdev, ret = i40e_led_set_phy(hw, false, pf->led_status, (pf->phy_led_val | I40E_PHY_LED_MODE_ORIG)); - i40e_aq_set_phy_debug(hw, 0, NULL); + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + i40e_aq_set_phy_debug(hw, 0, NULL); } break; default: From 0a3b4f702fb1f76b03530d58af9efc5e10392185 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 29 Aug 2017 05:32:41 -0400 Subject: [PATCH 0612/2177] i40evf: enable support for VF VLAN tag stripping control A recent commit 809481484e5d ("i40e/i40evf: support for VF VLAN tag stripping control") added support for VFs to negotiate the control of VLAN tag stripping. This should have allowed VFs to disable the feature. Unfortunately, the flag was set only in netdev->feature flags and not in netdev->hw_features. This ultimately causes the stack to assume that it cannot change the flag, so it was unchangeable and marked as [fixed] in the ethtool -k output. Fix this by setting the feature in hw_features first, just as we do for the PF code. This enables ethtool -K to disable the feature correctly, and fully enables user control of the VLAN tag stripping feature. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40evf/i40evf_main.c | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index bc76378a71e2..1d2fc898b664 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2423,10 +2423,6 @@ out_err: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\ - NETIF_F_HW_VLAN_CTAG_RX |\ - NETIF_F_HW_VLAN_CTAG_FILTER) - /** * i40evf_fix_features - fix up the netdev feature bits * @netdev: our net device @@ -2439,9 +2435,11 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev, { struct i40evf_adapter *adapter = netdev_priv(netdev); - features &= ~I40EVF_VLAN_FEATURES; - if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) - features |= I40EVF_VLAN_FEATURES; + if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER); + return features; } @@ -2572,9 +2570,17 @@ int i40evf_process_config(struct i40evf_adapter *adapter) */ hw_features = hw_enc_features; + /* Enable VLAN features if supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); + netdev->hw_features |= hw_features; - netdev->features |= hw_features | I40EVF_VLAN_FEATURES; + netdev->features |= hw_features; + + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; adapter->vsi.id = adapter->vsi_res->vsi_id; From a5340d933e3cd7829a24bacc156dd1e475a1ae2c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 29 Aug 2017 05:32:42 -0400 Subject: [PATCH 0613/2177] i40e: ignore skb->xmit_more when deciding to set RS bit Since commit 6a7fded776a7 ("i40e: Fix RS bit update in Tx path and disable force WB workaround") we've tried to "optimize" setting the RS bit based around skb->xmit_more. This same logic was refactored in commit 1dc8b538795f ("i40e: Reorder logic for coalescing RS bits"), but ultimately was not functionally changed. Using skb->xmit_more in this way is incorrect, because in certain circumstances we may see a large number of skbs in sequence with xmit_more set. This leads to a performance loss as the hardware does not writeback anything for those packets, which delays the time it takes for us to respond to the stack transmit requests. This significantly impacts UDP performance, especially when layered with multiple devices, such as bonding, VLANs, and vnet setups. This was not noticed until now because it is difficult to create a setup which reproduces the issue. It was discovered in a UDP_STREAM test in a VM, connected using a vnet device to a bridge, which is connected to a bonded pair of X710 ports in active-backup mode with a VLAN. These layered devices seem to compound the number of skbs transmitted at once by the qdisc. Additionally, the problem can be masked by reducing the ITR value. Since the original commit does not provide strong justification for this RS bit "optimization", revert to the previous behavior of setting the RS bit every 4th packet. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 34 +++------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index d9fdf69bbc6e..3bd176606c09 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3167,38 +3167,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, /* write last descriptor with EOP bit */ td_cmd |= I40E_TX_DESC_CMD_EOP; - /* We can OR these values together as they both are checked against - * 4 below and at this point desc_count will be used as a boolean value - * after this if/else block. + /* We OR these values together to check both against 4 (WB_STRIDE) + * below. This is safe since we don't re-use desc_count afterwards. */ desc_count |= ++tx_ring->packet_stride; - /* Algorithm to optimize tail and RS bit setting: - * if queue is stopped - * mark RS bit - * reset packet counter - * else if xmit_more is supported and is true - * advance packet counter to 4 - * reset desc_count to 0 - * - * if desc_count >= 4 - * mark RS bit - * reset packet counter - * if desc_count > 0 - * update tail - * - * Note: If there are less than 4 descriptors - * pending and interrupts were disabled the service task will - * trigger a force WB. - */ - if (netif_xmit_stopped(txring_txq(tx_ring))) { - goto do_rs; - } else if (skb->xmit_more) { - /* set stride to arm on next packet and reset desc_count */ - tx_ring->packet_stride = WB_STRIDE; - desc_count = 0; - } else if (desc_count >= WB_STRIDE) { -do_rs: + if (desc_count >= WB_STRIDE) { /* write last descriptor with RS bit set */ td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; @@ -3219,7 +3193,7 @@ do_rs: first->next_to_watch = tx_desc; /* notify HW of packet */ - if (desc_count) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail From b74f571f59a8a3dae998e3b95e0f88fac39bfef3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 1 Sep 2017 13:54:07 -0700 Subject: [PATCH 0614/2177] i40e/i40evf: organize and re-number feature flags Now that we've reduced the number of flags, organize similar flags together and re-number them accordingly. Since we don't yet have more than 32 flags, we'll use a u32 for both the hw_features and flag field. Should we gain more flags in the future, we may need to convert to a u64 or separate flags out into two fields. One alternative approach considered, but not implemented here, was to use an enumeration for the flag variables, and create a macro I40E_FLAG() which used string concatenation to generate BIT_ULL values. This has the advantage of making the actual bit values compile-time dynamic so that we do not need to worry about matching the order to the bit value. However, this does produce a high level of code churn, and makes it more difficult to read a dumped flags value when debugging. Change-ID: I8653fff69453cd547d6fe98d29dfa9d8710387d1 Signed-off-by: Jacob Keller Reviewed-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 98 +++++++++---------- .../net/ethernet/intel/i40e/i40e_ethtool.c | 6 +- drivers/net/ethernet/intel/i40evf/i40evf.h | 32 +++--- 3 files changed, 68 insertions(+), 68 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4dc6d43f8812..18c453a3e728 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -403,57 +403,57 @@ struct i40e_pf { struct timer_list service_timer; struct work_struct service_task; - u64 hw_features; -#define I40E_HW_RSS_AQ_CAPABLE BIT_ULL(0) -#define I40E_HW_128_QP_RSS_CAPABLE BIT_ULL(1) -#define I40E_HW_ATR_EVICT_CAPABLE BIT_ULL(2) -#define I40E_HW_WB_ON_ITR_CAPABLE BIT_ULL(3) -#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(4) -#define I40E_HW_NO_PCI_LINK_CHECK BIT_ULL(5) -#define I40E_HW_100M_SGMII_CAPABLE BIT_ULL(6) -#define I40E_HW_NO_DCB_SUPPORT BIT_ULL(7) -#define I40E_HW_USE_SET_LLDP_MIB BIT_ULL(8) -#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT_ULL(9) -#define I40E_HW_PTP_L4_CAPABLE BIT_ULL(10) -#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT_ULL(11) -#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT_ULL(12) -#define I40E_HW_HAVE_CRT_RETIMER BIT_ULL(13) -#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT_ULL(14) -#define I40E_HW_PHY_CONTROLS_LEDS BIT_ULL(15) -#define I40E_HW_STOP_FW_LLDP BIT_ULL(16) -#define I40E_HW_PORT_ID_VALID BIT_ULL(17) -#define I40E_HW_RESTART_AUTONEG BIT_ULL(18) + u32 hw_features; +#define I40E_HW_RSS_AQ_CAPABLE BIT(0) +#define I40E_HW_128_QP_RSS_CAPABLE BIT(1) +#define I40E_HW_ATR_EVICT_CAPABLE BIT(2) +#define I40E_HW_WB_ON_ITR_CAPABLE BIT(3) +#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT(4) +#define I40E_HW_NO_PCI_LINK_CHECK BIT(5) +#define I40E_HW_100M_SGMII_CAPABLE BIT(6) +#define I40E_HW_NO_DCB_SUPPORT BIT(7) +#define I40E_HW_USE_SET_LLDP_MIB BIT(8) +#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT(9) +#define I40E_HW_PTP_L4_CAPABLE BIT(10) +#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT(11) +#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT(12) +#define I40E_HW_HAVE_CRT_RETIMER BIT(13) +#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT(14) +#define I40E_HW_PHY_CONTROLS_LEDS BIT(15) +#define I40E_HW_STOP_FW_LLDP BIT(16) +#define I40E_HW_PORT_ID_VALID BIT(17) +#define I40E_HW_RESTART_AUTONEG BIT(18) u64 flags; -#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(1) -#define I40E_FLAG_MSI_ENABLED BIT_ULL(2) -#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3) -#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT_ULL(4) -#define I40E_FLAG_RSS_ENABLED BIT_ULL(6) -#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7) -#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10) -#define I40E_FLAG_FILTER_SYNC BIT_ULL(15) -#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16) -#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19) -#define I40E_FLAG_DCB_ENABLED BIT_ULL(20) -#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21) -#define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) -#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT_ULL(23) -#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT_ULL(24) -#define I40E_FLAG_PTP BIT_ULL(25) -#define I40E_FLAG_MFP_ENABLED BIT_ULL(26) -#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27) -#define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) -#define I40E_FLAG_VEB_STATS_ENABLED BIT_ULL(37) -#define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39) -#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) -#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT_ULL(51) -#define I40E_FLAG_CLIENT_RESET BIT_ULL(54) -#define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55) -#define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56) -#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT_ULL(57) -#define I40E_FLAG_LEGACY_RX BIT_ULL(58) -#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT_ULL(59) +#define I40E_FLAG_RX_CSUM_ENABLED BIT(0) +#define I40E_FLAG_MSI_ENABLED BIT(1) +#define I40E_FLAG_MSIX_ENABLED BIT(2) +#define I40E_FLAG_RSS_ENABLED BIT(3) +#define I40E_FLAG_VMDQ_ENABLED BIT(4) +#define I40E_FLAG_FILTER_SYNC BIT(5) +#define I40E_FLAG_SRIOV_ENABLED BIT(6) +#define I40E_FLAG_DCB_CAPABLE BIT(7) +#define I40E_FLAG_DCB_ENABLED BIT(8) +#define I40E_FLAG_FD_SB_ENABLED BIT(9) +#define I40E_FLAG_FD_ATR_ENABLED BIT(10) +#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT(11) +#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT(12) +#define I40E_FLAG_MFP_ENABLED BIT(13) +#define I40E_FLAG_UDP_FILTER_SYNC BIT(14) +#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(15) +#define I40E_FLAG_VEB_MODE_ENABLED BIT(16) +#define I40E_FLAG_VEB_STATS_ENABLED BIT(17) +#define I40E_FLAG_LINK_POLLING_ENABLED BIT(18) +#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(19) +#define I40E_FLAG_TEMP_LINK_POLLING BIT(20) +#define I40E_FLAG_LEGACY_RX BIT(21) +#define I40E_FLAG_PTP BIT(22) +#define I40E_FLAG_IWARP_ENABLED BIT(23) +#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT(24) +#define I40E_FLAG_CLIENT_L2_CHANGE BIT(25) +#define I40E_FLAG_CLIENT_RESET BIT(26) +#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27) +#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28) struct i40e_client_instance *cinst; bool stat_offsets_loaded; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index de0dfe340494..afd3ca8d9851 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4095,7 +4095,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u64 orig_flags, new_flags, changed_flags; + u32 orig_flags, new_flags, changed_flags; u32 i, j; orig_flags = READ_ONCE(pf->flags); @@ -4147,12 +4147,12 @@ flags_complete: return -EOPNOTSUPP; /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg64 returns anything but the old value, this means that + * is if cmpxchg returns anything but the old value, this means that * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. */ - if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { + if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, "Unable to update pf->flags as it was modified by another thread...\n"); return -EAGAIN; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 5982362c5643..de0af521d602 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -222,22 +222,22 @@ struct i40evf_adapter { u32 flags; #define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40EVF_FLAG_IMIR_ENABLED BIT(5) -#define I40EVF_FLAG_MQ_CAPABLE BIT(6) -#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) -#define I40EVF_FLAG_RESET_PENDING BIT(9) -#define I40EVF_FLAG_RESET_NEEDED BIT(10) -#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) -#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) -#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) -#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(14) -#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(15) -#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(16) -#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(17) -#define I40EVF_FLAG_PROMISC_ON BIT(18) -#define I40EVF_FLAG_ALLMULTI_ON BIT(19) -#define I40EVF_FLAG_LEGACY_RX BIT(20) -#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(21) +#define I40EVF_FLAG_IMIR_ENABLED BIT(1) +#define I40EVF_FLAG_MQ_CAPABLE BIT(2) +#define I40EVF_FLAG_PF_COMMS_FAILED BIT(3) +#define I40EVF_FLAG_RESET_PENDING BIT(4) +#define I40EVF_FLAG_RESET_NEEDED BIT(5) +#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) +#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(7) +#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(8) +#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9) +#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) +#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) +#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) +#define I40EVF_FLAG_PROMISC_ON BIT(13) +#define I40EVF_FLAG_ALLMULTI_ON BIT(14) +#define I40EVF_FLAG_LEGACY_RX BIT(15) +#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED From 4e64b1ed15e25b8dcc2819c6d43dab72eb0bea26 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Oct 2017 23:46:14 -0700 Subject: [PATCH 0615/2177] net/ipv6: Convert icmpv6_push_pending_frames to void commit cc71b7b07119 ("net/ipv6: remove unused err variable on icmpv6_push_pending_frames") exposed icmpv6_push_pending_frames return value not being used. Remove now unnecessary int err declarations and uses. Miscellanea: o Remove unnecessary goto and out: labels o Realign arguments Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- net/ipv6/icmp.c | 43 ++++++++++++++++++------------------------- net/ipv6/ping.c | 5 ++--- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6eac5cf8f1e6..3cda3b521c36 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -300,8 +300,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); -int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, - struct icmp6hdr *thdr, int len); +void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len); int ip6_ra_control(struct sock *sk, int sel); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index aeb49b4d8c7d..4e52d52a6752 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -250,15 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, - struct icmp6hdr *thdr, int len) +void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; skb = skb_peek(&sk->sk_write_queue); if (!skb) - goto out; + return; icmp6h = icmp6_hdr(skb); memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); @@ -286,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, tmp_csum); } ip6_push_pending_frames(sk); -out: - return 0; } struct icmpv6_msg { @@ -437,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, int iif = 0; int addr_type = 0; int len; - int err = 0; u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || @@ -574,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - err = ip6_append_data(sk, icmpv6_getfrag, &msg, - len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), - &ipc6, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, &sockc_unused); - if (err) { + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), + &ipc6, &fl6, (struct rt6_info *)dst, + MSG_DONTWAIT, &sockc_unused)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, - len + sizeof(struct icmp6hdr)); + icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + len + sizeof(struct icmp6hdr)); } rcu_read_unlock(); out_dst_release: @@ -681,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct icmpv6_msg msg; struct dst_entry *dst; struct ipcm6_cookie ipc6; - int err = 0; u32 mark = IP6_REPLY_MARK(net, skb->mark); struct sockcm_cookie sockc_unused = {0}; @@ -718,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - err = ip6_dst_lookup(net, sk, &dst, &fl6); - if (err) + if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) @@ -736,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipc6.dontfrag = np->dontfrag; ipc6.opt = NULL; - err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), &ipc6, &fl6, - (struct rt6_info *)dst, MSG_DONTWAIT, - &sockc_unused); - - if (err) { + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + skb->len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), &ipc6, &fl6, + (struct rt6_info *)dst, MSG_DONTWAIT, + &sockc_unused)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, - skb->len + sizeof(struct icmp6hdr)); + icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + skb->len + sizeof(struct icmp6hdr)); } dst_release(dst); out: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ac826dd338ff..d12c55dad7d1 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, - (struct icmp6hdr *) &pfh.icmph, - len); + icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *)&pfh.icmph, len); } release_sock(sk); From bb055c198d9b2ba7baf292a440c2d24fe87db494 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:20 +0200 Subject: [PATCH 0616/2177] nfp: add mpls match offloading support Previously MPLS match offloading was not supported. This patch enables MPLS match offloading support for label, bos and tc fields. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.h | 5 +++++ .../net/ethernet/netronome/nfp/flower/match.c | 17 +++++++++++++++-- .../net/ethernet/netronome/nfp/flower/offload.c | 6 +----- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 504ddaa21701..fe4751607b2b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -57,6 +57,11 @@ #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) +#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) +#define NFP_FLOWER_MASK_MPLS_TC GENMASK(11, 9) +#define NFP_FLOWER_MASK_MPLS_BOS BIT(8) +#define NFP_FLOWER_MASK_MPLS_Q BIT(0) + #define NFP_FL_SC_ACT_DROP 0x80000000 #define NFP_FL_SC_ACT_USER 0x7D000000 #define NFP_FL_SC_ACT_POPV 0x6A000000 diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 865a815ab92a..e35ade9cd3d5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -111,8 +111,21 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame, ether_addr_copy(frame->mac_src, &addr->src[0]); } - if (mask_version) - frame->mpls_lse = cpu_to_be32(~0); + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_dissector_key_mpls *mpls; + u32 t_mpls; + + mpls = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_MPLS, + target); + + t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + frame->mpls_lse = cpu_to_be32(t_mpls); + } } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 3d9537ebdea4..a721a00a2bcc 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -57,6 +57,7 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ @@ -238,11 +239,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, case cpu_to_be16(ETH_P_ARP): return -EOPNOTSUPP; - /* Currently we do not offload MPLS. */ - case cpu_to_be16(ETH_P_MPLS_UC): - case cpu_to_be16(ETH_P_MPLS_MC): - return -EOPNOTSUPP; - /* Will be included in layer 2. */ case cpu_to_be16(ETH_P_8021Q): break; From a1e9203cc6e5247f6e7af897252ca92cdf5edb70 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:21 +0200 Subject: [PATCH 0617/2177] nfp: add IPv4 ttl and tos match offloading support Previously matching on IPv4 ttl and tos fields were not offloaded. This patch enables offloading IPv4 ttl and tos as match fields. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/match.c | 11 ++++++++++- drivers/net/ethernet/netronome/nfp/flower/offload.c | 4 ---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index e35ade9cd3d5..d6096b4f1391 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -156,7 +156,6 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, struct flow_dissector_key_ipv4_addrs *addr; struct flow_dissector_key_basic *basic; - /* Wildcard TOS/TTL for now. */ memset(frame, 0, sizeof(struct nfp_flower_ipv4)); if (dissector_uses_key(flow->dissector, @@ -174,6 +173,16 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, target); frame->proto = basic->ip_proto; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *flow_ip; + + flow_ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IP, + target); + frame->tos = flow_ip->tos; + frame->ttl = flow_ip->ttl; + } } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index a721a00a2bcc..3651db5dfb8b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -216,10 +216,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, /* Ethernet type is present in the key. */ switch (key_basic->n_proto) { case cpu_to_be16(ETH_P_IP): - if (mask_ip && mask_ip->tos) - return -EOPNOTSUPP; - if (mask_ip && mask_ip->ttl) - return -EOPNOTSUPP; key_layer |= NFP_FLOWER_LAYER_IPV4; key_size += sizeof(struct nfp_flower_ipv4); break; From fc53b4a7014aab8c260c2b81ae6c24687dff3045 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:22 +0200 Subject: [PATCH 0618/2177] nfp: add IPv6 ttl and tos match offloading support Previously matching on IPv6 ttl and tos fields were not offloaded. This patch enables offloading IPv6 ttl and tos as match fields. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/match.c | 11 ++++++++++- drivers/net/ethernet/netronome/nfp/flower/offload.c | 10 ---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index d6096b4f1391..60614d4f0e22 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -194,7 +194,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, struct flow_dissector_key_ipv6_addrs *addr; struct flow_dissector_key_basic *basic; - /* Wildcard LABEL/TOS/TTL for now. */ memset(frame, 0, sizeof(struct nfp_flower_ipv6)); if (dissector_uses_key(flow->dissector, @@ -212,6 +211,16 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, target); frame->proto = basic->ip_proto; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *flow_ip; + + flow_ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IP, + target); + frame->tos = flow_ip->tos; + frame->ttl = flow_ip->ttl; + } } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 3651db5dfb8b..6f239c27964e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -135,7 +135,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, { struct flow_dissector_key_basic *mask_basic = NULL; struct flow_dissector_key_basic *key_basic = NULL; - struct flow_dissector_key_ip *mask_ip = NULL; u32 key_layer_two; u8 key_layer; int key_size; @@ -207,11 +206,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, flow->key); } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) - mask_ip = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_IP, - flow->mask); - if (mask_basic && mask_basic->n_proto) { /* Ethernet type is present in the key. */ switch (key_basic->n_proto) { @@ -221,10 +215,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, break; case cpu_to_be16(ETH_P_IPV6): - if (mask_ip && mask_ip->tos) - return -EOPNOTSUPP; - if (mask_ip && mask_ip->ttl) - return -EOPNOTSUPP; key_layer |= NFP_FLOWER_LAYER_IPV6; key_size += sizeof(struct nfp_flower_ipv6); break; From da83d8fe5889822691384d2b3edf1716fb6debdb Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:23 +0200 Subject: [PATCH 0619/2177] nfp: add set ethernet header action flower offload Previously we did not have offloading support for set ethernet actions. This patch enables TC flower offload of set ethernet actions. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 85 +++++++++++++++++++ .../net/ethernet/netronome/nfp/flower/cmsg.h | 8 ++ 2 files changed, 93 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 38f3835ae176..631ea4b7d08e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -223,6 +224,87 @@ nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, return 0; } +static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) +{ + u32 oldvalue = get_unaligned((u32 *)p_exact); + u32 oldmask = get_unaligned((u32 *)p_mask); + + value &= mask; + value |= oldvalue & ~mask; + + put_unaligned(oldmask | mask, (u32 *)p_mask); + put_unaligned(value, (u32 *)p_exact); +} + +static int +nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_eth *set_eth) +{ + u16 tmp_set_eth_op; + u32 exact, mask; + + if (off + 4 > ETH_ALEN * 2) + return -EOPNOTSUPP; + + mask = ~tcf_pedit_mask(action, idx); + exact = tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off], + &set_eth->eth_addr_mask[off]); + + set_eth->reserved = cpu_to_be16(0); + tmp_set_eth_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, + sizeof(*set_eth) >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, + NFP_FL_ACTION_OPCODE_SET_ETHERNET); + set_eth->a_op = cpu_to_be16(tmp_set_eth_op); + + return 0; +} + +static int +nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) +{ + struct nfp_fl_set_eth set_eth; + enum pedit_header_type htype; + int idx, nkeys, err; + size_t act_size; + u32 offset, cmd; + + memset(&set_eth, 0, sizeof(set_eth)); + nkeys = tcf_pedit_nkeys(action); + + for (idx = 0; idx < nkeys; idx++) { + cmd = tcf_pedit_cmd(action, idx); + htype = tcf_pedit_htype(action, idx); + offset = tcf_pedit_offset(action, idx); + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) + return -EOPNOTSUPP; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + err = nfp_fl_set_eth(action, idx, offset, &set_eth); + break; + default: + return -EOPNOTSUPP; + } + if (err) + return err; + } + + if (set_eth.a_op) { + act_size = sizeof(set_eth); + memcpy(nfp_action, &set_eth, act_size); + *a_len += act_size; + } + + return 0; +} + static int nfp_flower_loop_action(const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, @@ -301,6 +383,9 @@ nfp_flower_loop_action(const struct tc_action *a, } else if (is_tcf_tunnel_release(a)) { /* Tunnel decap is handled by default so accept action. */ return 0; + } else if (is_tcf_pedit(a)) { + if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len)) + return -EOPNOTSUPP; } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index fe4751607b2b..ffeaf85aa420 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -77,6 +77,7 @@ #define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 #define NFP_FL_ACTION_OPCODE_POP_VLAN 2 #define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -107,6 +108,13 @@ enum nfp_flower_tun_type { NFP_FL_TUNNEL_VXLAN = 2, }; +struct nfp_fl_set_eth { + __be16 a_op; + __be16 reserved; + u8 eth_addr_mask[ETH_ALEN * 2]; + u8 eth_addr_val[ETH_ALEN * 2]; +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; From c0b1bd9a8b8ac81e4e0985aad4a8de869ab6a668 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:24 +0200 Subject: [PATCH 0620/2177] nfp: add set ipv4 header action flower offload Previously we did not have offloading support for set IPv4 actions. This patch enables TC flower offload of set IPv4 src and dst address actions. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 46 +++++++++++++++++++ .../net/ethernet/netronome/nfp/flower/cmsg.h | 10 ++++ 2 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 631ea4b7d08e..2f886a529ee4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -265,15 +265,54 @@ nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, return 0; } +static int +nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_ip4_addrs *set_ip_addr) +{ + u16 tmp_set_ipv4_op; + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~tcf_pedit_mask(action, idx); + exact = (__force __be32)tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + switch (off) { + case offsetof(struct iphdr, daddr): + set_ip_addr->ipv4_dst_mask = mask; + set_ip_addr->ipv4_dst = exact; + break; + case offsetof(struct iphdr, saddr): + set_ip_addr->ipv4_src_mask = mask; + set_ip_addr->ipv4_src = exact; + break; + default: + return -EOPNOTSUPP; + } + + set_ip_addr->reserved = cpu_to_be16(0); + tmp_set_ipv4_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, + sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, + NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS); + set_ip_addr->a_op = cpu_to_be16(tmp_set_ipv4_op); + + return 0; +} + static int nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) { + struct nfp_fl_set_ip4_addrs set_ip_addr; struct nfp_fl_set_eth set_eth; enum pedit_header_type htype; int idx, nkeys, err; size_t act_size; u32 offset, cmd; + memset(&set_ip_addr, 0, sizeof(set_ip_addr)); memset(&set_eth, 0, sizeof(set_eth)); nkeys = tcf_pedit_nkeys(action); @@ -289,6 +328,9 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: err = nfp_fl_set_eth(action, idx, offset, &set_eth); break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr); + break; default: return -EOPNOTSUPP; } @@ -300,6 +342,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); *a_len += act_size; + } else if (set_ip_addr.a_op) { + act_size = sizeof(set_ip_addr); + memcpy(nfp_action, &set_ip_addr, act_size); + *a_len += act_size; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index ffeaf85aa420..7ace557fdf84 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -78,6 +78,7 @@ #define NFP_FL_ACTION_OPCODE_POP_VLAN 2 #define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -115,6 +116,15 @@ struct nfp_fl_set_eth { u8 eth_addr_val[ETH_ALEN * 2]; }; +struct nfp_fl_set_ip4_addrs { + __be16 a_op; + __be16 reserved; + __be32 ipv4_src_mask; + __be32 ipv4_src; + __be32 ipv4_dst_mask; + __be32 ipv4_dst; +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; From 354b82bb320e04547e4755d2cc2ebab87a6d8abe Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:25 +0200 Subject: [PATCH 0621/2177] nfp: add set ipv6 source and destination address Previously we did not have offloading support for set IPv6 actions. This patch enables TC flower offload of set IPv6 src and dst address actions. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 72 +++++++++++++++++++ .../net/ethernet/netronome/nfp/flower/cmsg.h | 11 +++ 2 files changed, 83 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2f886a529ee4..4394e4f15fdb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -302,9 +302,55 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, return 0; } +static void +nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_addr *ip6) +{ + u16 tmp_set_op; + + ip6->ipv6[idx % 4].mask = mask; + ip6->ipv6[idx % 4].exact = exact; + + ip6->reserved = cpu_to_be16(0); + tmp_set_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, sizeof(*ip6) >> + NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, opcode_tag); + ip6->a_op = cpu_to_be16(tmp_set_op); +} + +static int +nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_ipv6_addr *ip_dst, + struct nfp_fl_set_ipv6_addr *ip_src) +{ + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~tcf_pedit_mask(action, idx); + exact = (__force __be32)tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + if (off < offsetof(struct ipv6hdr, saddr)) + return -EOPNOTSUPP; + else if (off < offsetof(struct ipv6hdr, daddr)) + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx, + exact, mask, ip_src); + else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx, + exact, mask, ip_dst); + else + return -EOPNOTSUPP; + + return 0; +} + static int nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) { + struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; struct nfp_fl_set_ip4_addrs set_ip_addr; struct nfp_fl_set_eth set_eth; enum pedit_header_type htype; @@ -312,6 +358,8 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) size_t act_size; u32 offset, cmd; + memset(&set_ip6_dst, 0, sizeof(set_ip6_dst)); + memset(&set_ip6_src, 0, sizeof(set_ip6_src)); memset(&set_ip_addr, 0, sizeof(set_ip_addr)); memset(&set_eth, 0, sizeof(set_eth)); nkeys = tcf_pedit_nkeys(action); @@ -331,6 +379,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr); break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst, + &set_ip6_src); + break; default: return -EOPNOTSUPP; } @@ -346,6 +398,26 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; + } else if (set_ip6_dst.a_op && set_ip6_src.a_op) { + /* TC compiles set src and dst IPv6 address as a single action, + * the hardware requires this to be 2 separate actions. + */ + act_size = sizeof(set_ip6_src); + memcpy(nfp_action, &set_ip6_src, act_size); + *a_len += act_size; + + act_size = sizeof(set_ip6_dst); + memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst, + act_size); + *a_len += act_size; + } else if (set_ip6_dst.a_op) { + act_size = sizeof(set_ip6_dst); + memcpy(nfp_action, &set_ip6_dst, act_size); + *a_len += act_size; + } else if (set_ip6_src.a_op) { + act_size = sizeof(set_ip6_src); + memcpy(nfp_action, &set_ip6_src, act_size); + *a_len += act_size; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7ace557fdf84..527914e294d7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -79,6 +79,8 @@ #define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -125,6 +127,15 @@ struct nfp_fl_set_ip4_addrs { __be32 ipv4_dst; }; +struct nfp_fl_set_ipv6_addr { + __be16 a_op; + __be16 reserved; + struct { + __be32 mask; + __be32 exact; + } ipv6[4]; +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; From f8b7b0a6b113eea5b528e51a2086e6f93f4e4933 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 6 Oct 2017 10:21:26 +0200 Subject: [PATCH 0622/2177] nfp: add set tcp and udp header action flower offload Previously we did not have offloading support for set TCP/UDP actions. This patch enables TC flower offload of set TCP/UDP sport and dport actions. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 42 +++++++++++++++++++ .../net/ethernet/netronome/nfp/flower/cmsg.h | 9 ++++ 2 files changed, 51 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 4394e4f15fdb..1194c47ef827 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -347,11 +347,40 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, return 0; } +static int +nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_tport *set_tport, int opcode) +{ + u32 exact, mask; + u16 tmp_set_op; + + if (off) + return -EOPNOTSUPP; + + mask = ~tcf_pedit_mask(action, idx); + exact = tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val, + set_tport->tp_port_mask); + + set_tport->reserved = cpu_to_be16(0); + tmp_set_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, + sizeof(*set_tport) >> NFP_FL_LW_SIZ); + tmp_set_op |= FIELD_PREP(NFP_FL_ACT_JMP_ID, opcode); + set_tport->a_op = cpu_to_be16(tmp_set_op); + + return 0; +} + static int nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) { struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; struct nfp_fl_set_ip4_addrs set_ip_addr; + struct nfp_fl_set_tport set_tport; struct nfp_fl_set_eth set_eth; enum pedit_header_type htype; int idx, nkeys, err; @@ -361,6 +390,7 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) memset(&set_ip6_dst, 0, sizeof(set_ip6_dst)); memset(&set_ip6_src, 0, sizeof(set_ip6_src)); memset(&set_ip_addr, 0, sizeof(set_ip_addr)); + memset(&set_tport, 0, sizeof(set_tport)); memset(&set_eth, 0, sizeof(set_eth)); nkeys = tcf_pedit_nkeys(action); @@ -383,6 +413,14 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst, &set_ip6_src); break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + err = nfp_fl_set_tport(action, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_TCP); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + err = nfp_fl_set_tport(action, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_UDP); + break; default: return -EOPNOTSUPP; } @@ -418,6 +456,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; + } else if (set_tport.a_op) { + act_size = sizeof(set_tport); + memcpy(nfp_action, &set_tport, act_size); + *a_len += act_size; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 527914e294d7..f7b7242a22bc 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -81,6 +81,8 @@ #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 #define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 #define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 +#define NFP_FL_ACTION_OPCODE_SET_UDP 14 +#define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -136,6 +138,13 @@ struct nfp_fl_set_ipv6_addr { } ipv6[4]; }; +struct nfp_fl_set_tport { + __be16 a_op; + __be16 reserved; + u8 tp_port_mask[4]; + u8 tp_port_val[4]; +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; From f4ce0a0116bc90803adac10865f14429313cb2b6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 6 Oct 2017 10:41:41 +0200 Subject: [PATCH 0623/2177] samples/bpf: xdp_monitor first 8 bytes are not accessible by bpf The first 8 bytes of the tracepoint context struct are not accessible by the bpf code. This is a choice that dates back to the original inclusion of this code. See explaination in: commit 98b5c2c65c29 ("perf, bpf: allow bpf programs attach to tracepoints") Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/xdp_monitor_kern.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 74f3fd8ed729..cc7e19d2ad76 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -17,19 +17,15 @@ struct bpf_map_def SEC("maps") redirect_err_cnt = { * Code in: kernel/include/trace/events/xdp.h */ struct xdp_redirect_ctx { - unsigned short common_type; // offset:0; size:2; signed:0; - unsigned char common_flags; // offset:2; size:1; signed:0; - unsigned char common_preempt_count;// offset:3; size:1; signed:0; - int common_pid; // offset:4; size:4; signed:1; - - int prog_id; // offset:8; size:4; signed:1; - u32 act; // offset:12 size:4; signed:0; - int ifindex; // offset:16 size:4; signed:1; - int err; // offset:20 size:4; signed:1; - int to_ifindex; // offset:24 size:4; signed:1; - u32 map_id; // offset:28 size:4; signed:0; - int map_index; // offset:32 size:4; signed:1; -}; // offset:36 + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 enum { XDP_REDIRECT_SUCCESS = 0, From 280b058d4801cb431477dc101a776e4b24995f2f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 6 Oct 2017 10:41:46 +0200 Subject: [PATCH 0624/2177] samples/bpf: xdp_monitor also record xdp_exception tracepoint Also monitor the tracepoint xdp_exception. This tracepoint is usually invoked by the drivers. Programs themselves can activate this by returning XDP_ABORTED, which will drop the packet but also trigger the tracepoint. This is useful for distinguishing intentional (XDP_DROP) vs. ebpf-program error cases that cased a drop (XDP_ABORTED). Drivers also use this tracepoint for reporting on XDP actions that are unknown to the specific driver. This can help the user to detect if a driver e.g. doesn't implement XDP_REDIRECT yet. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/xdp_monitor_kern.c | 38 +++++++++++- samples/bpf/xdp_monitor_user.c | 110 +++++++++++++++++++++++++-------- 2 files changed, 122 insertions(+), 26 deletions(-) diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index cc7e19d2ad76..2fe2f761a0d0 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -13,6 +13,14 @@ struct bpf_map_def SEC("maps") redirect_err_cnt = { /* TODO: have entries for all possible errno's */ }; +#define XDP_UNKNOWN XDP_REDIRECT + 1 +struct bpf_map_def SEC("maps") exception_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = XDP_UNKNOWN + 1, +}; + /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format * Code in: kernel/include/trace/events/xdp.h */ @@ -44,7 +52,7 @@ int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); if (!cnt) - return 0; + return 1; *cnt += 1; return 0; /* Indicate event was filtered (no further processing)*/ @@ -82,3 +90,31 @@ int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) { return xdp_redirect_collect_stat(ctx); } + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_exception_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int ifindex; // offset:16; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_exception") +int trace_xdp_exception(struct xdp_exception_ctx *ctx) +{ + u64 *cnt;; + u32 key; + + key = ctx->act; + if (key > XDP_REDIRECT) + key = XDP_UNKNOWN; + + cnt = bpf_map_lookup_elem(&exception_cnt, &key); + if (!cnt) + return 1; + *cnt += 1; + + return 0; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index c5ab8b776973..97c3456c11b2 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -89,6 +89,23 @@ static const char *err2str(int err) return redir_names[err]; return NULL; } +/* enum xdp_action */ +#define XDP_UNKNOWN XDP_REDIRECT + 1 +#define XDP_ACTION_MAX (XDP_UNKNOWN + 1) +static const char *xdp_action_names[XDP_ACTION_MAX] = { + [XDP_ABORTED] = "XDP_ABORTED", + [XDP_DROP] = "XDP_DROP", + [XDP_PASS] = "XDP_PASS", + [XDP_TX] = "XDP_TX", + [XDP_REDIRECT] = "XDP_REDIRECT", + [XDP_UNKNOWN] = "XDP_UNKNOWN", +}; +static const char *action2str(int action) +{ + if (action < XDP_ACTION_MAX) + return xdp_action_names[action]; + return NULL; +} struct record { __u64 counter; @@ -97,6 +114,7 @@ struct record { struct stats_record { struct record xdp_redir[REDIR_RES_MAX]; + struct record xdp_exception[XDP_ACTION_MAX]; }; static void stats_print_headers(bool err_only) @@ -104,39 +122,72 @@ static void stats_print_headers(bool err_only) if (err_only) printf("\n%s\n", __doc_err_only__); - printf("%-14s %-10s %-18s %-9s\n", - "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period"); + printf("%-14s %-11s %-10s %-18s %-9s\n", + "ACTION", "result", "pps ", "pps-human-readable", "measure-period"); +} + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double) period / NANOSEC_PER_SEC); + + return period_; +} + +static double calc_pps(struct record *r, struct record *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->counter - p->counter; + pps = packets / period; + } + return pps; } static void stats_print(struct stats_record *rec, struct stats_record *prev, bool err_only) { + double period = 0, pps = 0; + struct record *r, *p; int i = 0; + char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n"; + + /* tracepoint: xdp:xdp_redirect_* */ if (err_only) i = REDIR_ERROR; for (; i < REDIR_RES_MAX; i++) { - struct record *r = &rec->xdp_redir[i]; - struct record *p = &prev->xdp_redir[i]; - __u64 period = 0; - __u64 packets = 0; - double pps = 0; - double period_ = 0; + r = &rec->xdp_redir[i]; + p = &prev->xdp_redir[i]; if (p->timestamp) { - packets = r->counter - p->counter; - period = r->timestamp - p->timestamp; - if (period > 0) { - period_ = ((double) period / NANOSEC_PER_SEC); - pps = packets / period_; - } + period = calc_period(r, p); + pps = calc_pps(r, p, period); } - - printf("%-14s %-10.0f %'-18.0f %f\n", - err2str(i), pps, pps, period_); + printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period); } + + /* tracepoint: xdp:xdp_exception */ + for (i = 0; i < XDP_ACTION_MAX; i++) { + r = &rec->xdp_exception[i]; + p = &prev->xdp_exception[i]; + if (p->timestamp) { + period = calc_period(r, p); + pps = calc_pps(r, p, period); + } + if (pps > 0) + printf(fmt, action2str(i), "Exception", + pps, pps, period); + } + printf("\n"); } static __u64 get_key32_value64_percpu(int fd, __u32 key) @@ -160,25 +211,33 @@ static __u64 get_key32_value64_percpu(int fd, __u32 key) return sum; } -static bool stats_collect(int fd, struct stats_record *rec) +static bool stats_collect(struct stats_record *rec) { + int fd; int i; /* TODO: Detect if someone unloaded the perf event_fd's, as * this can happen by someone running perf-record -e */ + fd = map_data[0].fd; /* map0: redirect_err_cnt */ for (i = 0; i < REDIR_RES_MAX; i++) { rec->xdp_redir[i].timestamp = gettime(); rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); } + + fd = map_data[1].fd; /* map1: exception_cnt */ + for (i = 0; i < XDP_ACTION_MAX; i++) { + rec->xdp_exception[i].timestamp = gettime(); + rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i); + } + return true; } static void stats_poll(int interval, bool err_only) { struct stats_record rec, prev; - int map_fd; memset(&rec, 0, sizeof(rec)); @@ -190,16 +249,17 @@ static void stats_poll(int interval, bool err_only) printf("\n%s", __doc__); /* TODO Need more advanced stats on error types */ - if (verbose) - printf(" - Stats map: %s\n", map_data[0].name); - map_fd = map_data[0].fd; - - stats_print_headers(err_only); + if (verbose) { + printf(" - Stats map0: %s\n", map_data[0].name); + printf(" - Stats map1: %s\n", map_data[1].name); + printf("\n"); + } fflush(stdout); while (1) { memcpy(&prev, &rec, sizeof(rec)); - stats_collect(map_fd, &rec); + stats_collect(&rec); + stats_print_headers(err_only); stats_print(&rec, &prev, err_only); fflush(stdout); sleep(interval); From c4eb7f4643ce3741e63b7d3f9b70bb7a637e738a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 6 Oct 2017 10:41:51 +0200 Subject: [PATCH 0625/2177] samples/bpf: xdp_monitor increase memory rlimit Other concurrent running programs, like perf or the XDP program what needed to be monitored, might take up part of the max locked memory limit. Thus, the xdp_monitor tool have to set the RLIMIT_MEMLOCK to RLIM_INFINITY, as it cannot determine a more sane limit. Using the man exit(3) specified EXIT_FAILURE return exit code, and correct other users too. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/xdp_monitor_user.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 97c3456c11b2..eaba165b3549 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -20,6 +20,7 @@ static const char *__doc_err_only__= #include #include +#include #include #include #include @@ -295,6 +296,7 @@ static void print_bpf_prog_info(void) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int longindex = 0, opt; int ret = EXIT_SUCCESS; char bpf_obj_file[256]; @@ -325,13 +327,18 @@ int main(int argc, char **argv) } } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return EXIT_FAILURE; + } + if (load_bpf_file(bpf_obj_file)) { printf("ERROR - bpf_log_buf: %s", bpf_log_buf); - return 1; + return EXIT_FAILURE; } if (!prog_fd[0]) { printf("ERROR - load_bpf_file: %s\n", strerror(errno)); - return 1; + return EXIT_FAILURE; } if (debug) { From d2746fe5380e9af79807994756672baaf42cb130 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 6 Oct 2017 06:00:30 -0500 Subject: [PATCH 0626/2177] bnx2x: Use pci_ari_enabled() instead of local copy Use pci_ari_enabled() from the PCI core instead of the identical local copy bnx2x_ari_enabled(). No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 9ca994d0bab6..3591077a5f6b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1074,11 +1074,6 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf) } } -static int bnx2x_ari_enabled(struct pci_dev *dev) -{ - return dev->bus->self && dev->bus->self->ari_enabled; -} - static int bnx2x_get_vf_igu_cam_info(struct bnx2x *bp) { @@ -1212,7 +1207,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, err = -EIO; /* verify ari is enabled */ - if (!bnx2x_ari_enabled(bp->pdev)) { + if (!pci_ari_enabled(bp->pdev->bus)) { BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n"); return 0; } From 6e518111060c2290427d79c43d4add9600ad852b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 5 Sep 2017 12:26:03 +0200 Subject: [PATCH 0627/2177] Bluetooth: btqcomsmd: Add support for BD address setup This patch implements the hdev setup function since wcnss-bt does not have persistent memory to store an allocated BD address. The device is therefore marked as unconfigured if no BD address has been previously retrieved. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- drivers/bluetooth/btqcomsmd.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index d00c4fdae924..bd810d01538a 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -26,6 +26,7 @@ struct btqcomsmd { struct hci_dev *hdev; + bdaddr_t bdaddr; struct rpmsg_endpoint *acl_channel; struct rpmsg_endpoint *cmd_channel; }; @@ -100,6 +101,38 @@ static int btqcomsmd_close(struct hci_dev *hdev) return 0; } +static int btqcomsmd_setup(struct hci_dev *hdev) +{ + struct btqcomsmd *btq = hci_get_drvdata(hdev); + struct sk_buff *skb; + int err; + + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) + return PTR_ERR(skb); + kfree_skb(skb); + + /* Devices do not have persistent storage for BD address. If no + * BD address has been retrieved during probe, mark the device + * as having an invalid BD address. + */ + if (!bacmp(&btq->bdaddr, BDADDR_ANY)) { + set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + return 0; + } + + /* When setting a configured BD address fails, mark the device + * as having an invalid BD address. + */ + err = qca_set_bdaddr_rome(hdev, &btq->bdaddr); + if (err) { + set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + return 0; + } + + return 0; +} + static int btqcomsmd_probe(struct platform_device *pdev) { struct btqcomsmd *btq; @@ -135,6 +168,7 @@ static int btqcomsmd_probe(struct platform_device *pdev) hdev->open = btqcomsmd_open; hdev->close = btqcomsmd_close; hdev->send = btqcomsmd_send; + hdev->setup = btqcomsmd_setup; hdev->set_bdaddr = qca_set_bdaddr_rome; ret = hci_register_dev(hdev); From 28517c02e1dd8fef7a0eab5e9f18013a0e297aba Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 8 Sep 2017 15:57:53 +0200 Subject: [PATCH 0628/2177] dt-bindings: net: document Bluetooth bindings in one place In the same way as Ethernet, gather the Bluetooth related bindings in one file. Introduce the bluetooth-bd-address property which can be used to store the assigned BD address. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- Documentation/devicetree/bindings/net/bluetooth.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/bluetooth.txt diff --git a/Documentation/devicetree/bindings/net/bluetooth.txt b/Documentation/devicetree/bindings/net/bluetooth.txt new file mode 100644 index 000000000000..94797df751b8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/bluetooth.txt @@ -0,0 +1,5 @@ +The following properties are common to the Bluetooth controllers: + +- local-bd-address: array of 6 bytes, specifies the BD address that was + uniquely assigned to the Bluetooth device, formatted with least significant + byte first (little-endian). From e7868a2f71bd9f81494f0c90a64e4dd454248850 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 8 Sep 2017 15:57:54 +0200 Subject: [PATCH 0629/2177] dt-bindings: soc: qcom: Add local-bd-address property to WCNSS-BT Add optional local-bd-address property which is a 6-byte array storing the assigned BD address. Since having a unique BD address is critical, a per-device property value should be allocated. This property is usually added by the boot loader which has access to the provisioned data. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt index 4ea39e9186a7..042a2e4159bd 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt @@ -37,6 +37,11 @@ The following properties are defined to the bluetooth node: Definition: must be: "qcom,wcnss-bt" +- local-bd-address: + Usage: optional + Value type: + Definition: see Documentation/devicetree/bindings/net/bluetooth.txt + == WiFi The following properties are defined to the WiFi node: @@ -91,6 +96,9 @@ smd { bt { compatible = "qcom,wcnss-bt"; + + /* BD address 00:11:22:33:44:55 */ + local-bd-address = [ 55 44 33 22 11 00 ]; }; wlan { From 766154b7d47b092605171df8930b864efc8ef5c8 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 8 Sep 2017 15:57:55 +0200 Subject: [PATCH 0630/2177] Bluetooth: btqcomsmd: retrieve BD address from DT property Retrieve BD address from the local-bd-address property. This address must be unique and is usually added in the DT by the bootloader which has access to the provisioned data. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqcomsmd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index bd810d01538a..663bed63b871 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include #include @@ -156,6 +158,15 @@ static int btqcomsmd_probe(struct platform_device *pdev) if (IS_ERR(btq->cmd_channel)) return PTR_ERR(btq->cmd_channel); + /* The local-bd-address property is usually injected by the + * bootloader which has access to the allocated BD address. + */ + if (!of_property_read_u8_array(pdev->dev.of_node, "local-bd-address", + (u8 *)&btq->bdaddr, sizeof(bdaddr_t))) { + dev_info(&pdev->dev, "BD address %pMR retrieved from device-tree", + &btq->bdaddr); + } + hdev = hci_alloc_dev(); if (!hdev) return -ENOMEM; From 753f5d91d35b4d709505b73e640e64448857be37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2017 12:16:24 +0200 Subject: [PATCH 0631/2177] ieee802154: fix gcc-4.9 warnings All older compiler versions up to gcc-4.9 produce these harmless warnings: drivers/net/ieee802154/ca8210.c: In function 'ca8210_skb_tx': drivers/net/ieee802154/ca8210.c:1947:9: warning: missing braces around initializer [-Wmissing-braces] This changes the syntax to something that works on all versions without warnings. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Arnd Bergmann Acked-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 24a1eabbbc9d..e6b8ce81a6c3 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1944,7 +1944,7 @@ static int ca8210_skb_tx( ) { int status; - struct ieee802154_hdr header = { 0 }; + struct ieee802154_hdr header = { }; struct secspec secspec; unsigned int mac_len; From 24a3a32a99ca9dcd8b2cdbaf5c99e6d99878cdb0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 25 Sep 2017 13:07:39 +0530 Subject: [PATCH 0632/2177] Bluetooth: btmrvl: *_err() and *_info() strings should end with newlines pr_err(), dev_err() and pr_info() messages should terminated with a new-line to avoid other messages being concatenated onto the end. Signed-off-by: Arvind Yadav Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btmrvl_sdio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 03341ce98c32..7dbb4463b539 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -64,7 +64,7 @@ static irqreturn_t btmrvl_wake_irq_bt(int irq, void *priv) struct btmrvl_sdio_card *card = priv; struct btmrvl_plt_wake_cfg *cfg = card->plt_wake_cfg; - pr_info("%s: wake by bt", __func__); + pr_info("%s: wake by bt\n", __func__); cfg->wake_by_bt = true; disable_irq_nosync(irq); @@ -87,7 +87,7 @@ static int btmrvl_sdio_probe_of(struct device *dev, if (!dev->of_node || !of_match_node(btmrvl_sdio_of_match_table, dev->of_node)) { - pr_err("sdio platform data not available"); + pr_err("sdio platform data not available\n"); return -1; } @@ -99,7 +99,7 @@ static int btmrvl_sdio_probe_of(struct device *dev, if (cfg && card->plt_of_node) { cfg->irq_bt = irq_of_parse_and_map(card->plt_of_node, 0); if (!cfg->irq_bt) { - dev_err(dev, "fail to parse irq_bt from device tree"); + dev_err(dev, "fail to parse irq_bt from device tree\n"); cfg->irq_bt = -1; } else { ret = devm_request_irq(dev, cfg->irq_bt, From 47eb2ac809189e0a60ad78eec6db9e84004e11be Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Sep 2017 17:14:51 +0300 Subject: [PATCH 0633/2177] Bluetooth: move ecdh allocation outside of ecdh_helper Before this change, a new crypto tfm was allocated, each time, for both key generation and shared secret computation. Allocate a single tfm for both cases. Signed-off-by: Tudor Ambarus Signed-off-by: Marcel Holtmann --- net/bluetooth/ecdh_helper.c | 32 ++++------------- net/bluetooth/ecdh_helper.h | 8 +++-- net/bluetooth/selftest.c | 29 +++++++++++----- net/bluetooth/smp.c | 68 ++++++++++++++++++++++++++++++------- 4 files changed, 87 insertions(+), 50 deletions(-) diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index c7b1a9aee579..ac2c7087921d 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -23,7 +23,6 @@ #include "ecdh_helper.h" #include -#include #include struct ecdh_completion { @@ -50,10 +49,9 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) out[i] = __swab64(in[ndigits - 1 - i]); } -bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], - u8 secret[32]) +bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], + const u8 private_key[32], u8 secret[32]) { - struct crypto_kpp *tfm; struct kpp_request *req; struct ecdh p; struct ecdh_completion result; @@ -66,16 +64,9 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], if (!tmp) return false; - tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); - if (IS_ERR(tfm)) { - pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", - PTR_ERR(tfm)); - goto free_tmp; - } - req = kpp_request_alloc(tfm, GFP_KERNEL); if (!req) - goto free_kpp; + goto free_tmp; init_completion(&result.completion); @@ -126,16 +117,14 @@ free_all: kzfree(buf); free_req: kpp_request_free(req); -free_kpp: - crypto_free_kpp(tfm); free_tmp: kfree(tmp); return (err == 0); } -bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) +bool generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], + u8 private_key[32]) { - struct crypto_kpp *tfm; struct kpp_request *req; struct ecdh p; struct ecdh_completion result; @@ -150,16 +139,9 @@ bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) if (!tmp) return false; - tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); - if (IS_ERR(tfm)) { - pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", - PTR_ERR(tfm)); - goto free_tmp; - } - req = kpp_request_alloc(tfm, GFP_KERNEL); if (!req) - goto free_kpp; + goto free_tmp; init_completion(&result.completion); @@ -218,8 +200,6 @@ free_all: kzfree(buf); free_req: kpp_request_free(req); -free_kpp: - crypto_free_kpp(tfm); free_tmp: kfree(tmp); return (err == 0); diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h index 7a423faf76e5..5cde37d12fd9 100644 --- a/net/bluetooth/ecdh_helper.h +++ b/net/bluetooth/ecdh_helper.h @@ -20,8 +20,10 @@ * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS * SOFTWARE IS DISCLAIMED. */ +#include #include -bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32], - u8 secret[32]); -bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]); +bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pub_a[64], + const u8 priv_b[32], u8 secret[32]); +bool generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], + u8 private_key[32]); diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index 34a1227f4391..126bdc5a77a7 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -138,9 +138,9 @@ static const u8 dhkey_3[32] __initconst = { 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, }; -static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], - const u8 pub_a[64], const u8 pub_b[64], - const u8 dhkey[32]) +static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], + const u8 priv_b[32], const u8 pub_a[64], + const u8 pub_b[64], const u8 dhkey[32]) { u8 *tmp, *dhkey_a, *dhkey_b; int ret = 0; @@ -152,8 +152,8 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], dhkey_a = &tmp[0]; dhkey_b = &tmp[32]; - compute_ecdh_secret(pub_b, priv_a, dhkey_a); - compute_ecdh_secret(pub_a, priv_b, dhkey_b); + compute_ecdh_secret(tfm, pub_b, priv_a, dhkey_a); + compute_ecdh_secret(tfm, pub_a, priv_b, dhkey_b); if (memcmp(dhkey_a, dhkey, 32)) { ret = -EINVAL; @@ -185,30 +185,43 @@ static const struct file_operations test_ecdh_fops = { static int __init test_ecdh(void) { + struct crypto_kpp *tfm; ktime_t calltime, delta, rettime; unsigned long long duration; int err; calltime = ktime_get(); - err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); + tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm)) { + BT_ERR("Unable to create ECDH crypto context"); + err = PTR_ERR(tfm); + goto done; + } + + err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1, + dhkey_1); if (err) { BT_ERR("ECDH sample 1 failed"); goto done; } - err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); + err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2, + dhkey_2); if (err) { BT_ERR("ECDH sample 2 failed"); goto done; } - err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); + err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3, + dhkey_3); if (err) { BT_ERR("ECDH sample 3 failed"); goto done; } + crypto_free_kpp(tfm); + rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index a0ef89772c36..31b64bc3cf16 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -92,6 +93,7 @@ struct smp_dev { struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; }; struct smp_chan { @@ -131,6 +133,7 @@ struct smp_chan { struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; }; /* These debug key values are defined in the SMP section of the core @@ -574,7 +577,8 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) get_random_bytes(smp->local_sk, 32); /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) + if (!generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, + smp->local_sk)) return -EIO; /* This is unlikely, but we need to check that @@ -771,6 +775,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); /* Ensure that we don't leave any debug key around if debug key * support hasn't been explicitly enabled. @@ -1391,16 +1396,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(smp->tfm_aes)) { BT_ERR("Unable to create AES crypto context"); - kzfree(smp); - return NULL; + goto zfree_smp; } smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(smp->tfm_cmac)) { BT_ERR("Unable to create CMAC crypto context"); - crypto_free_cipher(smp->tfm_aes); - kzfree(smp); - return NULL; + goto free_cipher; + } + + smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(smp->tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + goto free_shash; } smp->conn = conn; @@ -1413,6 +1421,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) hci_conn_hold(conn->hcon); return smp; + +free_shash: + crypto_free_shash(smp->tfm_cmac); +free_cipher: + crypto_free_cipher(smp->tfm_aes); +zfree_smp: + kzfree(smp); + return NULL; } static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) @@ -1903,7 +1919,8 @@ static u8 sc_send_public_key(struct smp_chan *smp) get_random_bytes(smp->local_sk, 32); /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) + if (!generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, + smp->local_sk)) return SMP_UNSPECIFIED; /* This is unlikely, but we need to check that @@ -2677,7 +2694,8 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); - if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) + if (!compute_ecdh_secret(smp->tfm_ecdh, smp->remote_pk, smp->local_sk, + smp->dhkey)) return SMP_UNSPECIFIED; SMP_DBG("DHKey %32phN", smp->dhkey); @@ -3169,6 +3187,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) struct smp_dev *smp; struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; if (cid == L2CAP_CID_SMP_BREDR) { smp = NULL; @@ -3194,8 +3213,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) return ERR_CAST(tfm_cmac); } + tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + crypto_free_shash(tfm_cmac); + crypto_free_cipher(tfm_aes); + kzfree(smp); + return ERR_CAST(tfm_ecdh); + } + smp->tfm_aes = tfm_aes; smp->tfm_cmac = tfm_cmac; + smp->tfm_ecdh = tfm_ecdh; smp->min_key_size = SMP_MIN_ENC_KEY_SIZE; smp->max_key_size = SMP_MAX_ENC_KEY_SIZE; @@ -3205,6 +3234,7 @@ create_chan: if (smp) { crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); kzfree(smp); } return ERR_PTR(-ENOMEM); @@ -3252,6 +3282,7 @@ static void smp_del_chan(struct l2cap_chan *chan) chan->data = NULL; crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); kzfree(smp); } @@ -3498,13 +3529,13 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) out[i] = __swab64(in[ndigits - 1 - i]); } -static int __init test_debug_key(void) +static int __init test_debug_key(struct crypto_kpp *tfm_ecdh) { u8 pk[64], sk[32]; swap_digits((u64 *)debug_sk, (u64 *)sk, 4); - if (!generate_ecdh_keys(pk, sk)) + if (!generate_ecdh_keys(tfm_ecdh, pk, sk)) return -EINVAL; if (crypto_memneq(sk, debug_sk, 32)) @@ -3763,7 +3794,8 @@ static const struct file_operations test_smp_fops = { }; static int __init run_selftests(struct crypto_cipher *tfm_aes, - struct crypto_shash *tfm_cmac) + struct crypto_shash *tfm_cmac, + struct crypto_kpp *tfm_ecdh) { ktime_t calltime, delta, rettime; unsigned long long duration; @@ -3771,7 +3803,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes, calltime = ktime_get(); - err = test_debug_key(); + err = test_debug_key(tfm_ecdh); if (err) { BT_ERR("debug_key test failed"); goto done; @@ -3848,6 +3880,7 @@ int __init bt_selftest_smp(void) { struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; int err; tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); @@ -3863,10 +3896,19 @@ int __init bt_selftest_smp(void) return PTR_ERR(tfm_cmac); } - err = run_selftests(tfm_aes, tfm_cmac); + tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + crypto_free_shash(tfm_cmac); + crypto_free_cipher(tfm_aes); + return PTR_ERR(tfm_ecdh); + } + + err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh); crypto_free_shash(tfm_cmac); crypto_free_cipher(tfm_aes); + crypto_free_kpp(tfm_ecdh); return err; } From a297641610963bbb238ea77b32a2e958c4f7b184 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Sep 2017 17:14:52 +0300 Subject: [PATCH 0634/2177] Bluetooth: ecdh_helper - reveal error codes ecdh_helper functions were hiding the error codes and chose to return the return value of an relational operator, "==". Remove the unnecessary query and reveal the error codes. While updating the return values, code in a way that compilers will warn in case of uninitialized err. Signed-off-by: Tudor Ambarus Signed-off-by: Marcel Holtmann --- net/bluetooth/ecdh_helper.c | 32 +++++++++++++++++++------------- net/bluetooth/ecdh_helper.h | 8 ++++---- net/bluetooth/smp.c | 17 ++++++++++------- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index ac2c7087921d..22c8daa0b451 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -49,8 +49,8 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) out[i] = __swab64(in[ndigits - 1 - i]); } -bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], - const u8 private_key[32], u8 secret[32]) +int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], + const u8 private_key[32], u8 secret[32]) { struct kpp_request *req; struct ecdh p; @@ -58,15 +58,17 @@ bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], struct scatterlist src, dst; u8 *tmp, *buf; unsigned int buf_len; - int err = -ENOMEM; + int err; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) - return false; + return -ENOMEM; req = kpp_request_alloc(tfm, GFP_KERNEL); - if (!req) + if (!req) { + err = -ENOMEM; goto free_tmp; + } init_completion(&result.completion); @@ -80,8 +82,10 @@ bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], p.curve_id = ECC_CURVE_NIST_P256; buf_len = crypto_ecdh_key_len(&p); buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) + if (!buf) { + err = -ENOMEM; goto free_req; + } crypto_ecdh_encode_key(buf, buf_len, &p); @@ -119,11 +123,11 @@ free_req: kpp_request_free(req); free_tmp: kfree(tmp); - return (err == 0); + return err; } -bool generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], - u8 private_key[32]) +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], + u8 private_key[32]) { struct kpp_request *req; struct ecdh p; @@ -131,17 +135,19 @@ bool generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], struct scatterlist dst; u8 *tmp, *buf; unsigned int buf_len; - int err = -ENOMEM; + int err; const unsigned short max_tries = 16; unsigned short tries = 0; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) - return false; + return -ENOMEM; req = kpp_request_alloc(tfm, GFP_KERNEL); - if (!req) + if (!req) { + err = -ENOMEM; goto free_tmp; + } init_completion(&result.completion); @@ -202,5 +208,5 @@ free_req: kpp_request_free(req); free_tmp: kfree(tmp); - return (err == 0); + return err; } diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h index 5cde37d12fd9..50e6676aebf5 100644 --- a/net/bluetooth/ecdh_helper.h +++ b/net/bluetooth/ecdh_helper.h @@ -23,7 +23,7 @@ #include #include -bool compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pub_a[64], - const u8 priv_b[32], u8 secret[32]); -bool generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], - u8 private_key[32]); +int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pub_a[64], + const u8 priv_b[32], u8 secret[32]); +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], + u8 private_key[32]); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 31b64bc3cf16..af7e6100e55b 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -577,9 +577,10 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) get_random_bytes(smp->local_sk, 32); /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, - smp->local_sk)) - return -EIO; + err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, + smp->local_sk); + if (err) + return err; /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. @@ -1919,8 +1920,8 @@ static u8 sc_send_public_key(struct smp_chan *smp) get_random_bytes(smp->local_sk, 32); /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, - smp->local_sk)) + if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, + smp->local_sk)) return SMP_UNSPECIFIED; /* This is unlikely, but we need to check that @@ -3532,11 +3533,13 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) static int __init test_debug_key(struct crypto_kpp *tfm_ecdh) { u8 pk[64], sk[32]; + int err; swap_digits((u64 *)debug_sk, (u64 *)sk, 4); - if (!generate_ecdh_keys(tfm_ecdh, pk, sk)) - return -EINVAL; + err = generate_ecdh_keys(tfm_ecdh, pk, sk); + if (err) + return err; if (crypto_memneq(sk, debug_sk, 32)) return -EINVAL; From 3814baf3f2473226e479fc1f4f48d01de2ce0a7b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Sep 2017 17:14:53 +0300 Subject: [PATCH 0635/2177] Bluetooth: selftest - check for errors when computing ZZ Signed-off-by: Tudor Ambarus Signed-off-by: Marcel Holtmann --- net/bluetooth/selftest.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index 126bdc5a77a7..ce99648ed870 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -143,7 +143,7 @@ static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], const u8 pub_b[64], const u8 dhkey[32]) { u8 *tmp, *dhkey_a, *dhkey_b; - int ret = 0; + int ret; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) @@ -152,8 +152,13 @@ static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], dhkey_a = &tmp[0]; dhkey_b = &tmp[32]; - compute_ecdh_secret(tfm, pub_b, priv_a, dhkey_a); - compute_ecdh_secret(tfm, pub_a, priv_b, dhkey_b); + ret = compute_ecdh_secret(tfm, pub_b, priv_a, dhkey_a); + if (ret) + goto out; + + ret = compute_ecdh_secret(tfm, pub_a, priv_b, dhkey_b); + if (ret) + goto out; if (memcmp(dhkey_a, dhkey, 32)) { ret = -EINVAL; From 168ed65483a1777c2570f4c0a4a64e20a823cf25 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Sep 2017 17:14:54 +0300 Subject: [PATCH 0636/2177] Bluetooth: ecdh_helper - fix leak of private key tmp buffer contains the swapped private key. In case the setkey call failed, the tmp buffer was freed without clearing the private key. Zeroize the temporary buffer so we don't leak the private key. Signed-off-by: Tudor Ambarus Signed-off-by: Marcel Holtmann --- net/bluetooth/ecdh_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index 22c8daa0b451..16e022f5ab27 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -122,7 +122,7 @@ free_all: free_req: kpp_request_free(req); free_tmp: - kfree(tmp); + kzfree(tmp); return err; } From c0153b0b901a16663ff91504fea25fb51d57cc29 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 28 Sep 2017 17:14:55 +0300 Subject: [PATCH 0637/2177] Bluetooth: let the crypto subsystem generate the ecc privkey That Bluetooth SMP knows about the private key is pointless, since the detection of debug key usage is actually via the public key portion. With this patch, the Bluetooth SMP will stop keeping a copy of the ecdh private key and will let the crypto subsystem to generate and handle the ecdh private key, potentially benefiting of hardware ecc private key generation and retention. The loop that tries to generate a correct private key is now removed and we trust the crypto subsystem to generate a correct private key. This backup logic should be done in crypto, if really needed. Signed-off-by: Tudor Ambarus Signed-off-by: Marcel Holtmann --- net/bluetooth/ecdh_helper.c | 188 ++++++++++++++++++++---------------- net/bluetooth/ecdh_helper.h | 9 +- net/bluetooth/selftest.c | 14 ++- net/bluetooth/smp.c | 66 ++++++------- 4 files changed, 148 insertions(+), 129 deletions(-) diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index 16e022f5ab27..2155ce802877 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -49,15 +49,21 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) out[i] = __swab64(in[ndigits - 1 - i]); } +/* compute_ecdh_secret() - function assumes that the private key was + * already set. + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: pair's ecc public key. + * secret: memory where the ecdh computed shared secret will be saved. + * + * Return: zero on success; error code in case of error. + */ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], - const u8 private_key[32], u8 secret[32]) + u8 secret[32]) { struct kpp_request *req; - struct ecdh p; + u8 *tmp; struct ecdh_completion result; struct scatterlist src, dst; - u8 *tmp, *buf; - unsigned int buf_len; int err; tmp = kmalloc(64, GFP_KERNEL); @@ -72,28 +78,6 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], init_completion(&result.completion); - /* Security Manager Protocol holds digits in litte-endian order - * while ECC API expect big-endian data - */ - swap_digits((u64 *)private_key, (u64 *)tmp, 4); - p.key = (char *)tmp; - p.key_size = 32; - /* Set curve_id */ - p.curve_id = ECC_CURVE_NIST_P256; - buf_len = crypto_ecdh_key_len(&p); - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto free_req; - } - - crypto_ecdh_encode_key(buf, buf_len, &p); - - /* Set A private Key */ - err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len); - if (err) - goto free_all; - swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ @@ -118,26 +102,76 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], memcpy(secret, tmp, 32); free_all: - kzfree(buf); -free_req: kpp_request_free(req); free_tmp: kzfree(tmp); return err; } -int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], - u8 private_key[32]) +/* set_ecdh_privkey() - set or generate ecc private key. + * + * Function generates an ecc private key in the crypto subsystem when receiving + * a NULL private key or sets the received key when not NULL. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @private_key: user's ecc private key. When not NULL, the key is expected + * in little endian format. + * + * Return: zero on success; error code in case of error. + */ +int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32]) { - struct kpp_request *req; - struct ecdh p; - struct ecdh_completion result; - struct scatterlist dst; - u8 *tmp, *buf; + u8 *buf, *tmp = NULL; unsigned int buf_len; int err; - const unsigned short max_tries = 16; - unsigned short tries = 0; + struct ecdh p = {0}; + + p.curve_id = ECC_CURVE_NIST_P256; + + if (private_key) { + tmp = kmalloc(32, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + swap_digits((u64 *)private_key, (u64 *)tmp, 4); + p.key = tmp; + p.key_size = 32; + } + + buf_len = crypto_ecdh_key_len(&p); + buf = kmalloc(buf_len, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto free_tmp; + } + + err = crypto_ecdh_encode_key(buf, buf_len, &p); + if (err) + goto free_all; + + err = crypto_kpp_set_secret(tfm, buf, buf_len); + /* fall through */ +free_all: + kzfree(buf); +free_tmp: + kzfree(tmp); + return err; +} + +/* generate_ecdh_public_key() - function assumes that the private key was + * already set. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: memory where the computed ecc public key will be saved. + * + * Return: zero on success; error code in case of error. + */ +int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]) +{ + struct kpp_request *req; + u8 *tmp; + struct ecdh_completion result; + struct scatterlist dst; + int err; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) @@ -150,63 +184,47 @@ int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], } init_completion(&result.completion); + sg_init_one(&dst, tmp, 64); + kpp_request_set_input(req, NULL, 0); + kpp_request_set_output(req, &dst, 64); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ecdh_complete, &result); - /* Set curve_id */ - p.curve_id = ECC_CURVE_NIST_P256; - p.key_size = 32; - buf_len = crypto_ecdh_key_len(&p); - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) - goto free_req; + err = crypto_kpp_generate_public_key(req); + if (err == -EINPROGRESS) { + wait_for_completion(&result.completion); + err = result.err; + } + if (err < 0) + goto free_all; - do { - if (tries++ >= max_tries) - goto free_all; - - /* Set private Key */ - p.key = (char *)private_key; - crypto_ecdh_encode_key(buf, buf_len, &p); - err = crypto_kpp_set_secret(tfm, buf, buf_len); - if (err) - goto free_all; - - sg_init_one(&dst, tmp, 64); - kpp_request_set_input(req, NULL, 0); - kpp_request_set_output(req, &dst, 64); - kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - ecdh_complete, &result); - - err = crypto_kpp_generate_public_key(req); - - if (err == -EINPROGRESS) { - wait_for_completion(&result.completion); - err = result.err; - } - - /* Private key is not valid. Regenerate */ - if (err == -EINVAL) - continue; - - if (err < 0) - goto free_all; - else - break; - - } while (true); - - /* Keys are handed back in little endian as expected by Security - * Manager Protocol + /* The public key is handed back in little endian as expected by + * the Security Manager Protocol. */ swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */ swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */ - swap_digits((u64 *)private_key, (u64 *)tmp, 4); - memcpy(private_key, tmp, 32); free_all: - kzfree(buf); -free_req: kpp_request_free(req); free_tmp: kfree(tmp); return err; } + +/* generate_ecdh_keys() - generate ecc key pair. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: memory where the computed ecc public key will be saved. + * + * Return: zero on success; error code in case of error. + */ +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]) +{ + int err; + + err = set_ecdh_privkey(tfm, NULL); + if (err) + return err; + + return generate_ecdh_public_key(tfm, public_key); +} diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h index 50e6676aebf5..a6f8d03d4aaf 100644 --- a/net/bluetooth/ecdh_helper.h +++ b/net/bluetooth/ecdh_helper.h @@ -23,7 +23,8 @@ #include #include -int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pub_a[64], - const u8 priv_b[32], u8 secret[32]); -int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64], - u8 private_key[32]); +int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64], + u8 secret[32]); +int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key); +int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]); +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]); diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index ce99648ed870..2d1519d0affa 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -152,11 +152,11 @@ static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], dhkey_a = &tmp[0]; dhkey_b = &tmp[32]; - ret = compute_ecdh_secret(tfm, pub_b, priv_a, dhkey_a); + ret = set_ecdh_privkey(tfm, priv_a); if (ret) goto out; - ret = compute_ecdh_secret(tfm, pub_a, priv_b, dhkey_b); + ret = compute_ecdh_secret(tfm, pub_b, dhkey_a); if (ret) goto out; @@ -165,9 +165,17 @@ static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], goto out; } + ret = set_ecdh_privkey(tfm, priv_b); + if (ret) + goto out; + + ret = compute_ecdh_secret(tfm, pub_a, dhkey_b); + if (ret) + goto out; + if (memcmp(dhkey_b, dhkey, 32)) ret = -EINVAL; - + /* fall through*/ out: kfree(tmp); return ret; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index af7e6100e55b..d41449b9e9d6 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -84,7 +84,6 @@ enum { struct smp_dev { /* Secure Connections OOB data */ u8 local_pk[64]; - u8 local_sk[32]; u8 local_rand[16]; bool debug_key; @@ -126,7 +125,6 @@ struct smp_chan { /* Secure Connections variables */ u8 local_pk[64]; - u8 local_sk[32]; u8 remote_pk[64]; u8 dhkey[32]; u8 mackey[16]; @@ -568,24 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); + err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk); + if (err) + return err; memcpy(smp->local_pk, debug_pk, 64); - memcpy(smp->local_sk, debug_sk, 32); smp->debug_key = true; } else { while (true) { - /* Seed private key with random number */ - get_random_bytes(smp->local_sk, 32); - - /* Generate local key pair for Secure Connections */ - err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, - smp->local_sk); + /* Generate key pair for Secure Connections */ + err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk); if (err) return err; /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (crypto_memneq(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; } smp->debug_key = false; @@ -593,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); - SMP_DBG("OOB Private Key: %32phN", smp->local_sk); get_random_bytes(smp->local_rand, 16); @@ -1900,7 +1895,6 @@ static u8 sc_send_public_key(struct smp_chan *smp) smp_dev = chan->data; memcpy(smp->local_pk, smp_dev->local_pk, 64); - memcpy(smp->local_sk, smp_dev->local_sk, 32); memcpy(smp->lr, smp_dev->local_rand, 16); if (smp_dev->debug_key) @@ -1911,23 +1905,20 @@ static u8 sc_send_public_key(struct smp_chan *smp) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); + if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk)) + return SMP_UNSPECIFIED; memcpy(smp->local_pk, debug_pk, 64); - memcpy(smp->local_sk, debug_sk, 32); set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); } else { while (true) { - /* Seed private key with random number */ - get_random_bytes(smp->local_sk, 32); - - /* Generate local key pair for Secure Connections */ - if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk, - smp->local_sk)) + /* Generate key pair for Secure Connections */ + if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk)) return SMP_UNSPECIFIED; /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (crypto_memneq(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; } } @@ -1935,7 +1926,6 @@ static u8 sc_send_public_key(struct smp_chan *smp) done: SMP_DBG("Local Public Key X: %32phN", smp->local_pk); SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); - SMP_DBG("Local Private Key: %32phN", smp->local_sk); smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); @@ -2663,6 +2653,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; struct hci_dev *hdev = hcon->hdev; + struct crypto_kpp *tfm_ecdh; struct smp_cmd_pairing_confirm cfm; int err; @@ -2695,8 +2686,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); - if (!compute_ecdh_secret(smp->tfm_ecdh, smp->remote_pk, smp->local_sk, - smp->dhkey)) + /* Compute the shared secret on the same crypto tfm on which the private + * key was set/generated. + */ + if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) { + struct smp_dev *smp_dev = chan->data; + + tfm_ecdh = smp_dev->tfm_ecdh; + } else { + tfm_ecdh = smp->tfm_ecdh; + } + + if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey)) return SMP_UNSPECIFIED; SMP_DBG("DHKey %32phN", smp->dhkey); @@ -3522,27 +3523,18 @@ void smp_unregister(struct hci_dev *hdev) #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) -static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) -{ - int i; - - for (i = 0; i < ndigits; i++) - out[i] = __swab64(in[ndigits - 1 - i]); -} - static int __init test_debug_key(struct crypto_kpp *tfm_ecdh) { - u8 pk[64], sk[32]; + u8 pk[64]; int err; - swap_digits((u64 *)debug_sk, (u64 *)sk, 4); - - err = generate_ecdh_keys(tfm_ecdh, pk, sk); + err = set_ecdh_privkey(tfm_ecdh, debug_sk); if (err) return err; - if (crypto_memneq(sk, debug_sk, 32)) - return -EINVAL; + err = generate_ecdh_public_key(tfm_ecdh, pk); + if (err) + return err; if (crypto_memneq(pk, debug_pk, 64)) return -EINVAL; From 7841d554809b518a22349e7e39b6b63f8a48d0fb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:35 +0200 Subject: [PATCH 0638/2177] Bluetooth: hci_uart_set_flow_control: Fix NULL deref when using serdev Fix a NULL pointer deref (hu->tty) when calling hci_uart_set_flow_control on hci_uart-s using serdev. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ldisc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index a746627e784e..eec95019f15c 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) unsigned int set = 0; unsigned int clear = 0; + if (hu->serdev) { + serdev_device_set_flow_control(hu->serdev, !enable); + serdev_device_set_rts(hu->serdev, !enable); + return; + } + if (enable) { /* Disable hardware flow control */ ktermios = tty->termios; From 227630cccdbb8f8a1b24ac26517b75079c9a69c9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:36 +0200 Subject: [PATCH 0639/2177] Bluetooth: hci_bcm: Fix setting of irq trigger type This commit fixes 2 issues with host-wake irq trigger type handling in hci_bcm: 1) bcm_setup_sleep sets sleep_params.host_wake_active based on bcm_device.irq_polarity, but bcm_request_irq was always requesting IRQF_TRIGGER_RISING as trigger type independent of irq_polarity. This was a problem when the irq is described as a GpioInt rather then an Interrupt in the DSDT as for GpioInt-s the value passed to request_irq is honored. This commit fixes this by requesting the correct trigger type depending on bcm_device.irq_polarity. 2) bcm_device.irq_polarity was used to directly store an ACPI polarity value (ACPI_ACTIVE_*). This is undesirable because hci_bcm is also used with device-tree and checking for something like ACPI_ACTIVE_LOW in a non ACPI specific function like bcm_request_irq feels wrong. This commit fixes this by renaming irq_polarity to irq_active_low and changing its type to a bool. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index e2540113d0da..73d2d88ddc03 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -68,7 +68,7 @@ struct bcm_device { u32 init_speed; u32 oper_speed; int irq; - u8 irq_polarity; + bool irq_active_low; #ifdef CONFIG_PM struct hci_uart *hu; @@ -213,7 +213,9 @@ static int bcm_request_irq(struct bcm_data *bcm) } err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake, - IRQF_TRIGGER_RISING, "host_wake", bdev); + bdev->irq_active_low ? IRQF_TRIGGER_FALLING : + IRQF_TRIGGER_RISING, + "host_wake", bdev); if (err) goto unlock; @@ -253,7 +255,7 @@ static int bcm_setup_sleep(struct hci_uart *hu) struct sk_buff *skb; struct bcm_set_sleep_mode sleep_params = default_sleep_params; - sleep_params.host_wake_active = !bcm->dev->irq_polarity; + sleep_params.host_wake_active = !bcm->dev->irq_active_low; skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params), &sleep_params, HCI_INIT_TIMEOUT); @@ -690,10 +692,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = { }; #ifdef CONFIG_ACPI -static u8 acpi_active_low = ACPI_ACTIVE_LOW; - /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */ -static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { +static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = { { .ident = "Asus T100TA", .matches = { @@ -701,7 +701,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { "ASUSTeK COMPUTER INC."), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), }, - .driver_data = &acpi_active_low, }, { .ident = "Asus T100CHI", @@ -710,7 +709,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { "ASUSTeK COMPUTER INC."), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"), }, - .driver_data = &acpi_active_low, }, { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ .ident = "Lenovo ThinkPad 8", @@ -718,7 +716,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), }, - .driver_data = &acpi_active_low, }, { } }; @@ -733,13 +730,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data) switch (ares->type) { case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: irq = &ares->data.extended_irq; - dev->irq_polarity = irq->polarity; + dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW; break; case ACPI_RESOURCE_TYPE_GPIO: gpio = &ares->data.gpio; if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT) - dev->irq_polarity = gpio->polarity; + dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW; break; case ACPI_RESOURCE_TYPE_SERIAL_BUS: @@ -834,11 +831,11 @@ static int bcm_acpi_probe(struct bcm_device *dev) return ret; acpi_dev_free_resource_list(&resources); - dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table); + dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table); if (dmi_id) { bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low", dmi_id->ident); - dev->irq_polarity = *(u8 *)dmi_id->driver_data; + dev->irq_active_low = true; } return 0; From 201762e21f308ec23bebe8bc0c4c033afb2879d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:37 +0200 Subject: [PATCH 0640/2177] Bluetooth: hci_bcm: Move bcm_platform_probe call out of bcm_acpi_probe Since bcm_acpi_probe calls bcm_platform_probe, bcm_probe always ends up calling bcm_platform_probe. This commit simplifies things by making bcm_probe always call bcm_platform_probe itself. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 73d2d88ddc03..1a9ce68b9a9b 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -820,10 +820,6 @@ static int bcm_acpi_probe(struct bcm_device *dev) if (ret) return ret; - ret = bcm_platform_probe(dev); - if (ret) - return ret; - /* Retrieve UART ACPI info */ ret = acpi_dev_get_resources(ACPI_COMPANION(&dev->pdev->dev), &resources, bcm_resource, dev); @@ -858,10 +854,13 @@ static int bcm_probe(struct platform_device *pdev) dev->pdev = pdev; - if (has_acpi_companion(&pdev->dev)) + if (has_acpi_companion(&pdev->dev)) { ret = bcm_acpi_probe(dev); - else - ret = bcm_platform_probe(dev); + if (ret) + return ret; + } + + ret = bcm_platform_probe(dev); if (ret) return ret; From 4a56f891efceee88d422af2e99d00c8321c671c1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:38 +0200 Subject: [PATCH 0641/2177] Bluetooth: hci_bcm: Move platform_get_irq call to bcm_probe The ACPI subsys is going to move over to instantiating ACPI enumerated HCIs as serdevs, rather then as platform devices. Most of the code in bcm_platform_probe is actually not platform specific and will work with any struct device passed to it, the one platform specific call in bcm_platform_probe is platform_get_irq. This commit moves platform_get_irq call to the platform-driver's bcm_probe function, this is a preparation patch for adding (runtime)pm support to the serdev path. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 1a9ce68b9a9b..3cbd7dab112a 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -776,7 +776,6 @@ static int bcm_platform_probe(struct bcm_device *dev) return PTR_ERR(dev->shutdown); /* IRQ can be declared in ACPI table as Interrupt or GpioInt */ - dev->irq = platform_get_irq(pdev, 0); if (dev->irq <= 0) { struct gpio_desc *gpio; @@ -853,6 +852,7 @@ static int bcm_probe(struct platform_device *pdev) return -ENOMEM; dev->pdev = pdev; + dev->irq = platform_get_irq(pdev, 0); if (has_acpi_companion(&pdev->dev)) { ret = bcm_acpi_probe(dev); From c0d3ce580b7c8f0b7c14306f7d8654a4f30a665d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:39 +0200 Subject: [PATCH 0642/2177] Bluetooth: hci_bcm: Store device pointer instead of platform_device pointer The ACPI subsys is going to move over to instantiating ACPI enumerated HCIs as serdevs, rather then as platform devices. This means that the serdev driver paths of hci_bcm.c also need to start supporting (runtime)pm through GPIOs and a host-wake IRQ. The hci_bcm code is already mostly independent of how the HCI gets instantiated, but even though the code only cares about pdev->dev, it was storing pdev itself in struct bcm_device. This commit stores pdev->dev rather then pdev in struct bcm_device, this is a preparation patch for adding (runtime)pm support to the serdev path. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 73 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 3cbd7dab112a..101b17ae9aa7 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -56,7 +56,7 @@ struct bcm_device { struct list_head list; - struct platform_device *pdev; + struct device *dev; const char *name; struct gpio_desc *device_wakeup; @@ -188,9 +188,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data) bt_dev_dbg(bdev, "Host wake IRQ"); - pm_runtime_get(&bdev->pdev->dev); - pm_runtime_mark_last_busy(&bdev->pdev->dev); - pm_runtime_put_autosuspend(&bdev->pdev->dev); + pm_runtime_get(bdev->dev); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); return IRQ_HANDLED; } @@ -212,20 +212,20 @@ static int bcm_request_irq(struct bcm_data *bcm) goto unlock; } - err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake, + err = devm_request_irq(bdev->dev, bdev->irq, bcm_host_wake, bdev->irq_active_low ? IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING, "host_wake", bdev); if (err) goto unlock; - device_init_wakeup(&bdev->pdev->dev, true); + device_init_wakeup(bdev->dev, true); - pm_runtime_set_autosuspend_delay(&bdev->pdev->dev, + pm_runtime_set_autosuspend_delay(bdev->dev, BCM_AUTOSUSPEND_DELAY); - pm_runtime_use_autosuspend(&bdev->pdev->dev); - pm_runtime_set_active(&bdev->pdev->dev); - pm_runtime_enable(&bdev->pdev->dev); + pm_runtime_use_autosuspend(bdev->dev); + pm_runtime_set_active(bdev->dev); + pm_runtime_enable(bdev->dev); unlock: mutex_unlock(&bcm_device_lock); @@ -332,7 +332,7 @@ static int bcm_open(struct hci_uart *hu) * platform device (saved during device probe) and * parent of tty device used by hci_uart */ - if (hu->tty->dev->parent == dev->pdev->dev.parent) { + if (hu->tty->dev->parent == dev->dev->parent) { bcm->dev = dev; hu->init_speed = dev->init_speed; hu->oper_speed = dev->oper_speed; @@ -367,12 +367,12 @@ static int bcm_close(struct hci_uart *hu) if (bcm_device_exists(bdev)) { bcm_gpio_set_power(bdev, false); #ifdef CONFIG_PM - pm_runtime_disable(&bdev->pdev->dev); - pm_runtime_set_suspended(&bdev->pdev->dev); + pm_runtime_disable(bdev->dev); + pm_runtime_set_suspended(bdev->dev); - if (device_can_wakeup(&bdev->pdev->dev)) { - devm_free_irq(&bdev->pdev->dev, bdev->irq, bdev); - device_init_wakeup(&bdev->pdev->dev, false); + if (device_can_wakeup(bdev->dev)) { + devm_free_irq(bdev->dev, bdev->irq, bdev); + device_init_wakeup(bdev->dev, false); } bdev->hu = NULL; @@ -506,9 +506,9 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) /* Delay auto-suspend when receiving completed packet */ mutex_lock(&bcm_device_lock); if (bcm->dev && bcm_device_exists(bcm->dev)) { - pm_runtime_get(&bcm->dev->pdev->dev); - pm_runtime_mark_last_busy(&bcm->dev->pdev->dev); - pm_runtime_put_autosuspend(&bcm->dev->pdev->dev); + pm_runtime_get(bcm->dev->dev); + pm_runtime_mark_last_busy(bcm->dev->dev); + pm_runtime_put_autosuspend(bcm->dev->dev); } mutex_unlock(&bcm_device_lock); } @@ -539,15 +539,15 @@ static struct sk_buff *bcm_dequeue(struct hci_uart *hu) if (bcm_device_exists(bcm->dev)) { bdev = bcm->dev; - pm_runtime_get_sync(&bdev->pdev->dev); + pm_runtime_get_sync(bdev->dev); /* Shall be resumed here */ } skb = skb_dequeue(&bcm->txq); if (bdev) { - pm_runtime_mark_last_busy(&bdev->pdev->dev); - pm_runtime_put_autosuspend(&bdev->pdev->dev); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); } mutex_unlock(&bcm_device_lock); @@ -623,7 +623,7 @@ static int bcm_suspend(struct device *dev) if (pm_runtime_active(dev)) bcm_suspend_device(dev); - if (device_may_wakeup(&bdev->pdev->dev)) { + if (device_may_wakeup(dev)) { error = enable_irq_wake(bdev->irq); if (!error) bt_dev_dbg(bdev, "BCM irq: enabled"); @@ -651,7 +651,7 @@ static int bcm_resume(struct device *dev) if (!bdev->hu) goto unlock; - if (device_may_wakeup(&bdev->pdev->dev)) { + if (device_may_wakeup(dev)) { disable_irq_wake(bdev->irq); bt_dev_dbg(bdev, "BCM irq: disabled"); } @@ -758,19 +758,17 @@ static int bcm_resource(struct acpi_resource *ares, void *data) static int bcm_platform_probe(struct bcm_device *dev) { - struct platform_device *pdev = dev->pdev; + dev->name = dev_name(dev->dev); - dev->name = dev_name(&pdev->dev); + dev->clk = devm_clk_get(dev->dev, NULL); - dev->clk = devm_clk_get(&pdev->dev, NULL); - - dev->device_wakeup = devm_gpiod_get_optional(&pdev->dev, + dev->device_wakeup = devm_gpiod_get_optional(dev->dev, "device-wakeup", GPIOD_OUT_LOW); if (IS_ERR(dev->device_wakeup)) return PTR_ERR(dev->device_wakeup); - dev->shutdown = devm_gpiod_get_optional(&pdev->dev, "shutdown", + dev->shutdown = devm_gpiod_get_optional(dev->dev, "shutdown", GPIOD_OUT_LOW); if (IS_ERR(dev->shutdown)) return PTR_ERR(dev->shutdown); @@ -779,7 +777,7 @@ static int bcm_platform_probe(struct bcm_device *dev) if (dev->irq <= 0) { struct gpio_desc *gpio; - gpio = devm_gpiod_get_optional(&pdev->dev, "host-wakeup", + gpio = devm_gpiod_get_optional(dev->dev, "host-wakeup", GPIOD_IN); if (IS_ERR(gpio)) return PTR_ERR(gpio); @@ -787,13 +785,13 @@ static int bcm_platform_probe(struct bcm_device *dev) dev->irq = gpiod_to_irq(gpio); } - dev_info(&pdev->dev, "BCM irq: %d\n", dev->irq); + dev_info(dev->dev, "BCM irq: %d\n", dev->irq); /* Make sure at-least one of the GPIO is defined and that * a name is specified for this instance */ if ((!dev->device_wakeup && !dev->shutdown) || !dev->name) { - dev_err(&pdev->dev, "invalid platform data\n"); + dev_err(dev->dev, "invalid platform data\n"); return -EINVAL; } @@ -803,7 +801,6 @@ static int bcm_platform_probe(struct bcm_device *dev) #ifdef CONFIG_ACPI static int bcm_acpi_probe(struct bcm_device *dev) { - struct platform_device *pdev = dev->pdev; LIST_HEAD(resources); const struct dmi_system_id *dmi_id; const struct acpi_gpio_mapping *gpio_mapping = acpi_bcm_int_last_gpios; @@ -811,16 +808,16 @@ static int bcm_acpi_probe(struct bcm_device *dev) int ret; /* Retrieve GPIO data */ - id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); + id = acpi_match_device(dev->dev->driver->acpi_match_table, dev->dev); if (id) gpio_mapping = (const struct acpi_gpio_mapping *) id->driver_data; - ret = devm_acpi_dev_add_driver_gpios(&pdev->dev, gpio_mapping); + ret = devm_acpi_dev_add_driver_gpios(dev->dev, gpio_mapping); if (ret) return ret; /* Retrieve UART ACPI info */ - ret = acpi_dev_get_resources(ACPI_COMPANION(&dev->pdev->dev), + ret = acpi_dev_get_resources(ACPI_COMPANION(dev->dev), &resources, bcm_resource, dev); if (ret < 0) return ret; @@ -851,7 +848,7 @@ static int bcm_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; - dev->pdev = pdev; + dev->dev = &pdev->dev; dev->irq = platform_get_irq(pdev, 0); if (has_acpi_companion(&pdev->dev)) { From 42ef18f09f593e6dd84777948a5e8189502cba0c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:40 +0200 Subject: [PATCH 0643/2177] Bluetooth: hci_bcm: Rename bcm_platform_probe to bcm_get_resources After our previous changes, there is nothing platform specific about bcm_platform_probe anymore, rename it to bcm_get_resources. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 101b17ae9aa7..7f1971adeea8 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -756,7 +756,7 @@ static int bcm_resource(struct acpi_resource *ares, void *data) } #endif /* CONFIG_ACPI */ -static int bcm_platform_probe(struct bcm_device *dev) +static int bcm_get_resources(struct bcm_device *dev) { dev->name = dev_name(dev->dev); @@ -857,7 +857,7 @@ static int bcm_probe(struct platform_device *pdev) return ret; } - ret = bcm_platform_probe(dev); + ret = bcm_get_resources(dev); if (ret) return ret; From 9d54fd6a90ff1a85b66873e67004cfe61563408a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:41 +0200 Subject: [PATCH 0644/2177] Bluetooth: hci_bcm: Make acpi_probe get irq from ACPI resources The ACPI subsys is going to move over to instantiating ACPI enumerated HCIs as serdevs, rather then as platform devices. So we need to make bcm_acpi_probe() suitable for use on non platform- devices too, which means that we cannot rely on platform_get_irq() getting called. This commit modifies bcm_acpi_probe() to directly get the irq from the ACPI resources, this is a preparation patch for adding (runtime)pm support to the serdev path. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 7f1971adeea8..cadf6291c615 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -751,8 +751,7 @@ static int bcm_resource(struct acpi_resource *ares, void *data) break; } - /* Always tell the ACPI core to skip this resource */ - return 1; + return 0; } #endif /* CONFIG_ACPI */ @@ -805,6 +804,7 @@ static int bcm_acpi_probe(struct bcm_device *dev) const struct dmi_system_id *dmi_id; const struct acpi_gpio_mapping *gpio_mapping = acpi_bcm_int_last_gpios; const struct acpi_device_id *id; + struct resource_entry *entry; int ret; /* Retrieve GPIO data */ @@ -821,6 +821,13 @@ static int bcm_acpi_probe(struct bcm_device *dev) &resources, bcm_resource, dev); if (ret < 0) return ret; + + resource_list_for_each_entry(entry, &resources) { + if (resource_type(entry->res) == IORESOURCE_IRQ) { + dev->irq = entry->res->start; + break; + } + } acpi_dev_free_resource_list(&resources); dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table); From 78277d73714a1381a80d96de68074c9503b2c014 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:42 +0200 Subject: [PATCH 0645/2177] Bluetooth: hci_bcm: Make suspend/resume functions platform_dev independent Use dev_get_drvdata instead of platform_get_drvdata in the suspend / resume functions. This is a preparation patch for adding (runtime)pm support to the serdev path. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index cadf6291c615..a9acb1af983c 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -558,7 +558,7 @@ static struct sk_buff *bcm_dequeue(struct hci_uart *hu) #ifdef CONFIG_PM static int bcm_suspend_device(struct device *dev) { - struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev)); + struct bcm_device *bdev = dev_get_drvdata(dev); bt_dev_dbg(bdev, ""); @@ -581,7 +581,7 @@ static int bcm_suspend_device(struct device *dev) static int bcm_resume_device(struct device *dev) { - struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev)); + struct bcm_device *bdev = dev_get_drvdata(dev); bt_dev_dbg(bdev, ""); @@ -606,7 +606,7 @@ static int bcm_resume_device(struct device *dev) /* Platform suspend callback */ static int bcm_suspend(struct device *dev) { - struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev)); + struct bcm_device *bdev = dev_get_drvdata(dev); int error; bt_dev_dbg(bdev, "suspend: is_suspended %d", bdev->is_suspended); @@ -638,7 +638,7 @@ unlock: /* Platform resume callback */ static int bcm_resume(struct device *dev) { - struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev)); + struct bcm_device *bdev = dev_get_drvdata(dev); bt_dev_dbg(bdev, "resume: is_suspended %d", bdev->is_suspended); From 8a92056837fd5168fce730c5e21eaf3af7cf0b16 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2017 20:43:43 +0200 Subject: [PATCH 0646/2177] Bluetooth: hci_bcm: Add (runtime)pm support to the serdev driver Make the serdev driver use struct bcm_device as its driver data and share all the pm / GPIO / IRQ related code paths with the platform driver. After this commit the 2 drivers are in essence the same and the serdev driver interface can be used for all ACPI enumerated HCI UARTs. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 118 +++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index a9acb1af983c..ab1455e63b92 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -52,8 +52,10 @@ #define BCM_AUTOSUSPEND_DELAY 5000 /* default autosleep delay */ -/* platform device driver resources */ +/* device driver resources */ struct bcm_device { + /* Must be the first member, hci_serdev.c expects this. */ + struct hci_uart serdev_hu; struct list_head list; struct device *dev; @@ -76,11 +78,6 @@ struct bcm_device { #endif }; -/* serdev driver resources */ -struct bcm_serdev { - struct hci_uart hu; -}; - /* generic bcm uart resources */ struct bcm_data { struct sk_buff *rx_skb; @@ -155,6 +152,10 @@ static bool bcm_device_exists(struct bcm_device *device) { struct list_head *p; + /* Devices using serdev always exist */ + if (device && device->hu && device->hu->serdev) + return true; + list_for_each(p, &bcm_device_list) { struct bcm_device *dev = list_entry(p, struct bcm_device, list); @@ -200,7 +201,6 @@ static int bcm_request_irq(struct bcm_data *bcm) struct bcm_device *bdev = bcm->dev; int err; - /* If this is not a platform device, do not enable PM functionalities */ mutex_lock(&bcm_device_lock); if (!bcm_device_exists(bdev)) { err = -ENODEV; @@ -313,18 +313,17 @@ static int bcm_open(struct hci_uart *hu) hu->priv = bcm; - /* If this is a serdev defined device, then only use - * serdev open primitive and skip the rest. - */ + mutex_lock(&bcm_device_lock); + if (hu->serdev) { serdev_device_open(hu->serdev); + bcm->dev = serdev_device_get_drvdata(hu->serdev); goto out; } if (!hu->tty->dev) goto out; - mutex_lock(&bcm_device_lock); list_for_each(p, &bcm_device_list) { struct bcm_device *dev = list_entry(p, struct bcm_device, list); @@ -334,37 +333,45 @@ static int bcm_open(struct hci_uart *hu) */ if (hu->tty->dev->parent == dev->dev->parent) { bcm->dev = dev; - hu->init_speed = dev->init_speed; - hu->oper_speed = dev->oper_speed; #ifdef CONFIG_PM dev->hu = hu; #endif - bcm_gpio_set_power(bcm->dev, true); break; } } - mutex_unlock(&bcm_device_lock); out: + if (bcm->dev) { + hu->init_speed = bcm->dev->init_speed; + hu->oper_speed = bcm->dev->oper_speed; + bcm_gpio_set_power(bcm->dev, true); + } + + mutex_unlock(&bcm_device_lock); return 0; } static int bcm_close(struct hci_uart *hu) { struct bcm_data *bcm = hu->priv; - struct bcm_device *bdev = bcm->dev; + struct bcm_device *bdev = NULL; bt_dev_dbg(hu->hdev, "hu %p", hu); - /* If this is a serdev defined device, only use serdev - * close primitive and then continue as usual. - */ - if (hu->serdev) - serdev_device_close(hu->serdev); - /* Protect bcm->dev against removal of the device or driver */ mutex_lock(&bcm_device_lock); - if (bcm_device_exists(bdev)) { + + if (hu->serdev) { + serdev_device_close(hu->serdev); + bdev = serdev_device_get_drvdata(hu->serdev); + } else if (bcm_device_exists(bcm->dev)) { + bdev = bcm->dev; +#ifdef CONFIG_PM + bdev->hu = NULL; +#endif + } + + if (bdev) { bcm_gpio_set_power(bdev, false); #ifdef CONFIG_PM pm_runtime_disable(bdev->dev); @@ -374,8 +381,6 @@ static int bcm_close(struct hci_uart *hu) devm_free_irq(bdev->dev, bdev->irq, bdev); device_init_wakeup(bdev->dev, false); } - - bdev->hu = NULL; #endif } mutex_unlock(&bcm_device_lock); @@ -603,7 +608,7 @@ static int bcm_resume_device(struct device *dev) #endif #ifdef CONFIG_PM_SLEEP -/* Platform suspend callback */ +/* suspend callback */ static int bcm_suspend(struct device *dev) { struct bcm_device *bdev = dev_get_drvdata(dev); @@ -611,8 +616,10 @@ static int bcm_suspend(struct device *dev) bt_dev_dbg(bdev, "suspend: is_suspended %d", bdev->is_suspended); - /* bcm_suspend can be called at any time as long as platform device is - * bound, so it should use bcm_device_lock to protect access to hci_uart + /* + * When used with a device instantiated as platform_device, bcm_suspend + * can be called at any time as long as the platform device is bound, + * so it should use bcm_device_lock to protect access to hci_uart * and device_wake-up GPIO. */ mutex_lock(&bcm_device_lock); @@ -635,15 +642,17 @@ unlock: return 0; } -/* Platform resume callback */ +/* resume callback */ static int bcm_resume(struct device *dev) { struct bcm_device *bdev = dev_get_drvdata(dev); bt_dev_dbg(bdev, "resume: is_suspended %d", bdev->is_suspended); - /* bcm_resume can be called at any time as long as platform device is - * bound, so it should use bcm_device_lock to protect access to hci_uart + /* + * When used with a device instantiated as platform_device, bcm_resume + * can be called at any time as long as platform device is bound, + * so it should use bcm_device_lock to protect access to hci_uart * and device_wake-up GPIO. */ mutex_lock(&bcm_device_lock); @@ -785,15 +794,6 @@ static int bcm_get_resources(struct bcm_device *dev) } dev_info(dev->dev, "BCM irq: %d\n", dev->irq); - - /* Make sure at-least one of the GPIO is defined and that - * a name is specified for this instance - */ - if ((!dev->device_wakeup && !dev->shutdown) || !dev->name) { - dev_err(dev->dev, "invalid platform data\n"); - return -EINVAL; - } - return 0; } @@ -846,6 +846,12 @@ static int bcm_acpi_probe(struct bcm_device *dev) } #endif /* CONFIG_ACPI */ +static int bcm_of_probe(struct bcm_device *bdev) +{ + device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed); + return 0; +} + static int bcm_probe(struct platform_device *pdev) { struct bcm_device *dev; @@ -933,7 +939,7 @@ static const struct acpi_device_id bcm_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, bcm_acpi_match); #endif -/* Platform suspend and resume callbacks */ +/* suspend and resume callbacks */ static const struct dev_pm_ops bcm_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(bcm_suspend, bcm_resume) SET_RUNTIME_PM_OPS(bcm_suspend_device, bcm_resume_device, NULL) @@ -951,29 +957,39 @@ static struct platform_driver bcm_driver = { static int bcm_serdev_probe(struct serdev_device *serdev) { - struct bcm_serdev *bcmdev; - u32 speed; + struct bcm_device *bcmdev; int err; bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL); if (!bcmdev) return -ENOMEM; - bcmdev->hu.serdev = serdev; + bcmdev->dev = &serdev->dev; + bcmdev->hu = &bcmdev->serdev_hu; + bcmdev->serdev_hu.serdev = serdev; serdev_device_set_drvdata(serdev, bcmdev); - err = device_property_read_u32(&serdev->dev, "max-speed", &speed); - if (!err) - bcmdev->hu.oper_speed = speed; + if (has_acpi_companion(&serdev->dev)) + err = bcm_acpi_probe(bcmdev); + else + err = bcm_of_probe(bcmdev); + if (err) + return err; - return hci_uart_register_device(&bcmdev->hu, &bcm_proto); + err = bcm_get_resources(bcmdev); + if (err) + return err; + + bcm_gpio_set_power(bcmdev, false); + + return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto); } static void bcm_serdev_remove(struct serdev_device *serdev) { - struct bcm_serdev *bcmdev = serdev_device_get_drvdata(serdev); + struct bcm_device *bcmdev = serdev_device_get_drvdata(serdev); - hci_uart_unregister_device(&bcmdev->hu); + hci_uart_unregister_device(&bcmdev->serdev_hu); } #ifdef CONFIG_OF @@ -990,6 +1006,8 @@ static struct serdev_device_driver bcm_serdev_driver = { .driver = { .name = "hci_uart_bcm", .of_match_table = of_match_ptr(bcm_bluetooth_of_match), + .acpi_match_table = ACPI_PTR(bcm_acpi_match), + .pm = &bcm_pm_ops, }, }; From 0435605289298a7311f78d02eb6a015cae7dbaf7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:54:29 -0700 Subject: [PATCH 0647/2177] Bluetooth: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. As already done in hci_qca, add struct hci_uart pointer to priv structure. Signed-off-by: Kees Cook Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bluecard_cs.c | 7 +++---- drivers/bluetooth/hci_bcsp.c | 10 ++++++---- drivers/bluetooth/hci_h5.c | 10 ++++++---- drivers/bluetooth/hci_qca.c | 17 ++++++++--------- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index b07ca9565291..d513ef4743dc 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -156,9 +156,9 @@ static void bluecard_detach(struct pcmcia_device *p_dev); /* ======================== LED handling routines ======================== */ -static void bluecard_activity_led_timeout(u_long arg) +static void bluecard_activity_led_timeout(struct timer_list *t) { - struct bluecard_info *info = (struct bluecard_info *)arg; + struct bluecard_info *info = from_timer(info, t, timer); unsigned int iobase = info->p_dev->resource[0]->start; if (test_bit(CARD_ACTIVITY, &(info->hw_state))) { @@ -691,8 +691,7 @@ static int bluecard_open(struct bluecard_info *info) spin_lock_init(&(info->lock)); - setup_timer(&(info->timer), &bluecard_activity_led_timeout, - (u_long)info); + timer_setup(&info->timer, bluecard_activity_led_timeout, 0); skb_queue_head_init(&(info->txq)); diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index d880f4e33c75..1a7f0c82fb36 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -65,6 +65,7 @@ struct bcsp_struct { u8 rxseq_txack; /* rxseq == txack. */ u8 rxack; /* Last packet sent by us that the peer ack'ed */ struct timer_list tbcsp; + struct hci_uart *hu; enum { BCSP_W4_PKT_DELIMITER, @@ -697,10 +698,10 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) } /* Arrange to retransmit all messages in the relq. */ -static void bcsp_timed_event(unsigned long arg) +static void bcsp_timed_event(struct timer_list *t) { - struct hci_uart *hu = (struct hci_uart *)arg; - struct bcsp_struct *bcsp = hu->priv; + struct bcsp_struct *bcsp = from_timer(bcsp, t, tbcsp); + struct hci_uart *hu = bcsp->hu; struct sk_buff *skb; unsigned long flags; @@ -729,11 +730,12 @@ static int bcsp_open(struct hci_uart *hu) return -ENOMEM; hu->priv = bcsp; + bcsp->hu = hu; skb_queue_head_init(&bcsp->unack); skb_queue_head_init(&bcsp->rel); skb_queue_head_init(&bcsp->unrel); - setup_timer(&bcsp->tbcsp, bcsp_timed_event, (u_long)hu); + timer_setup(&bcsp->tbcsp, bcsp_timed_event, 0); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index c0e4e26dc30d..6a8d0d06aba7 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -78,6 +78,7 @@ struct h5 { int (*rx_func)(struct hci_uart *hu, u8 c); struct timer_list timer; /* Retransmission timer */ + struct hci_uart *hu; /* Parent HCI UART */ u8 tx_seq; /* Next seq number to send */ u8 tx_ack; /* Next ack number to send */ @@ -120,12 +121,12 @@ static u8 h5_cfg_field(struct h5 *h5) return h5->tx_win & 0x07; } -static void h5_timed_event(unsigned long arg) +static void h5_timed_event(struct timer_list *t) { const unsigned char sync_req[] = { 0x01, 0x7e }; unsigned char conf_req[3] = { 0x03, 0xfc }; - struct hci_uart *hu = (struct hci_uart *)arg; - struct h5 *h5 = hu->priv; + struct h5 *h5 = from_timer(h5, t, timer); + struct hci_uart *hu = h5->hu; struct sk_buff *skb; unsigned long flags; @@ -197,6 +198,7 @@ static int h5_open(struct hci_uart *hu) return -ENOMEM; hu->priv = h5; + h5->hu = hu; skb_queue_head_init(&h5->unack); skb_queue_head_init(&h5->rel); @@ -204,7 +206,7 @@ static int h5_open(struct hci_uart *hu) h5_reset_rx(h5); - setup_timer(&h5->timer, h5_timed_event, (unsigned long)hu); + timer_setup(&h5->timer, h5_timed_event, 0); h5->tx_win = H5_TX_WIN_MAX; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 392f412b4575..4a949bb60394 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -307,10 +307,10 @@ static void qca_wq_serial_tx_clock_vote_off(struct work_struct *work) serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu); } -static void hci_ibs_tx_idle_timeout(unsigned long arg) +static void hci_ibs_tx_idle_timeout(struct timer_list *t) { - struct hci_uart *hu = (struct hci_uart *)arg; - struct qca_data *qca = hu->priv; + struct qca_data *qca = from_timer(qca, t, tx_idle_timer); + struct hci_uart *hu = qca->hu; unsigned long flags; BT_DBG("hu %p idle timeout in %d state", hu, qca->tx_ibs_state); @@ -342,10 +342,10 @@ static void hci_ibs_tx_idle_timeout(unsigned long arg) spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); } -static void hci_ibs_wake_retrans_timeout(unsigned long arg) +static void hci_ibs_wake_retrans_timeout(struct timer_list *t) { - struct hci_uart *hu = (struct hci_uart *)arg; - struct qca_data *qca = hu->priv; + struct qca_data *qca = from_timer(qca, t, wake_retrans_timer); + struct hci_uart *hu = qca->hu; unsigned long flags, retrans_delay; bool retransmit = false; @@ -438,11 +438,10 @@ static int qca_open(struct hci_uart *hu) hu->priv = qca; - setup_timer(&qca->wake_retrans_timer, hci_ibs_wake_retrans_timeout, - (u_long)hu); + timer_setup(&qca->wake_retrans_timer, hci_ibs_wake_retrans_timeout, 0); qca->wake_retrans = IBS_WAKE_RETRANS_TIMEOUT_MS; - setup_timer(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, (u_long)hu); + timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0); qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS; BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u", From b49ef29d72bd2975b8b2cc84b1fa6b40aa2edd5f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Oct 2017 20:42:45 +0200 Subject: [PATCH 0648/2177] Bluetooth: Fix compiler warning with selftest duration calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC net/bluetooth/selftest.o net/bluetooth/selftest.c: In function ‘bt_selftest_init’: net/bluetooth/selftest.c:246:3: warning: ‘duration’ may be used uninitialized in this function [-Wmaybe-uninitialized] snprintf(test_ecdh_buffer, sizeof(test_ecdh_buffer), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "PASS (%llu usecs)\n", duration); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/bluetooth/selftest.c:203:21: note: ‘duration’ was declared here unsigned long long duration; ^~~~~~~~ Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index 2d1519d0affa..03e3c89c3046 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -200,7 +200,7 @@ static int __init test_ecdh(void) { struct crypto_kpp *tfm; ktime_t calltime, delta, rettime; - unsigned long long duration; + unsigned long long duration = 0; int err; calltime = ktime_get(); From 18a4c0eab2623cc95be98a1e6af1ad18e7695977 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:21 -0700 Subject: [PATCH 0649/2177] net: add rb_to_skb() and other rb tree helpers Geeralize private netem_rb_to_skb() TCP rtx queue will soon be converted to rb-tree, so we will need skb_rbtree_walk() helpers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ net/ipv4/tcp_fastopen.c | 8 +++----- net/ipv4/tcp_input.c | 33 ++++++++++++--------------------- net/sched/sch_netem.c | 14 ++++---------- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 01a985937867..03634ec2f918 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) return __skb_grow(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) +#define skb_rbtree_walk(skb, root) \ + for (skb = skb_rb_first(root); skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from(skb) \ + for (; skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from_safe(skb, tmp) \ + for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ + skb = tmp) + #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 29fff14d5a53..7ee4aadcdd71 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -465,17 +465,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node *p; - struct sk_buff *skb; struct dst_entry *dst; + struct sk_buff *skb; if (!tp->syn_fastopen) return; if (!tp->data_segs_in) { - p = rb_first(&tp->out_of_order_queue); - if (p && !rb_next(p)) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); + if (skb && !skb_rb_next(skb)) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_fastopen_active_disable(sk); return; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb0d7ed84b94..90afe4143596 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4335,7 +4335,7 @@ static void tcp_ofo_queue(struct sock *sk) p = rb_first(&tp->out_of_order_queue); while (p) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = rb_to_skb(p); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4399,7 +4399,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node **p, *q, *parent; + struct rb_node **p, *parent; struct sk_buff *skb1; u32 seq, end_seq; bool fragstolen; @@ -4458,7 +4458,7 @@ coalesce_done: parent = NULL; while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(seq, TCP_SKB_CB(skb1)->seq)) { p = &parent->rb_left; continue; @@ -4503,9 +4503,7 @@ insert: merge_right: /* Remove other segments covered by skb. */ - while ((q = rb_next(&skb->rbnode)) != NULL) { - skb1 = rb_entry(q, struct sk_buff, rbnode); - + while ((skb1 = skb_rb_next(skb)) != NULL) { if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4520,7 +4518,7 @@ merge_right: tcp_drop(sk, skb1); } /* If there is no skb after us, we are the last_skb ! */ - if (!q) + if (!skb1) tp->ooo_last_skb = skb; add_sack: @@ -4706,7 +4704,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li if (list) return !skb_queue_is_last(list, skb) ? skb->next : NULL; - return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); + return skb_rb_next(skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -4735,7 +4733,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) p = &parent->rb_left; else @@ -4854,26 +4852,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *head; - struct rb_node *p; u32 start, end; - p = rb_first(&tp->out_of_order_queue); - skb = rb_entry_safe(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); new_range: if (!skb) { - p = rb_last(&tp->out_of_order_queue); - /* Note: This is possible p is NULL here. We do not - * use rb_entry_safe(), as ooo_last_skb is valid only - * if rbtree is not empty. - */ - tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); + tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue); return; } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; for (head = skb;;) { - skb = tcp_skb_next(skb, NULL); + skb = skb_rb_next(skb); /* Range is terminated when we see a gap or when * we are at the queue end. @@ -4916,14 +4907,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk) do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); - tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); + tcp_drop(sk, rb_to_skb(node)); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; node = prev; } while (node); - tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + tp->ooo_last_skb = rb_to_skb(prev); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5a4f10080290..db0228a65e8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -148,12 +148,6 @@ struct netem_skb_cb { psched_time_t time_to_send; }; - -static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) -{ - return rb_entry(rb, struct sk_buff, rbnode); -} - static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { /* we assume we can use skb next/prev/tstamp as storage for rb_node */ @@ -364,7 +358,7 @@ static void tfifo_reset(struct Qdisc *sch) struct rb_node *p = rb_first(&q->t_root); while (p) { - struct sk_buff *skb = netem_rb_to_skb(p); + struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); rb_erase(&skb->rbnode, &q->t_root); @@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) struct sk_buff *skb; parent = *p; - skb = netem_rb_to_skb(parent); + skb = rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) p = &parent->rb_right; else @@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *t_skb; struct netem_skb_cb *t_last; - t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || t_last->time_to_send > last->time_to_send) { @@ -617,7 +611,7 @@ deliver: if (p) { psched_time_t time_to_send; - skb = netem_rb_to_skb(p); + skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; From ac3f09ba3e496bd7cc780ead05b1d1bb5f33aedb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:22 -0700 Subject: [PATCH 0650/2177] tcp: uninline tcp_write_queue_purge() Since the upcoming rtx rbtree will add some extra code, it is time to not inline this fat function anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +-------------- net/ipv4/tcp.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b16f353b539..744559b72784 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1606,20 +1606,7 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) skb->_skb_refdst = _save; \ } -/* write queue abstraction */ -static inline void tcp_write_queue_purge(struct sock *sk) -{ - struct sk_buff *skb; - - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { - tcp_skb_tsorted_anchor_cleanup(skb); - sk_wmem_free_skb(sk, skb); - } - INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); - sk_mem_reclaim(sk); - tcp_clear_all_retrans_hints(tcp_sk(sk)); -} +void tcp_write_queue_purge(struct sock *sk); static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8cf742fd4f99..f8ebae62f834 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2318,6 +2318,20 @@ static inline bool tcp_need_reset(int state) TCPF_FIN_WAIT2 | TCPF_SYN_RECV); } +void tcp_write_queue_purge(struct sock *sk) +{ + struct sk_buff *skb; + + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + tcp_skb_tsorted_anchor_cleanup(skb); + sk_wmem_free_skb(sk, skb); + } + INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); + sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); +} + int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); From 4e8cc22803080853a33bafc6748e133448fb8182 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:23 -0700 Subject: [PATCH 0651/2177] tcp: tcp_tx_timestamp() cleanup tcp_write_queue_tail() call can be factorized. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f8ebae62f834..b8d379c80936 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -469,8 +469,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) tcp_init_buffer_space(sk); } -static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) { + struct sk_buff *skb = tcp_write_queue_tail(sk); + if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -1041,7 +1043,7 @@ wait_for_memory: out: if (copied) { - tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); + tcp_tx_timestamp(sk, sk->sk_tsflags); if (!(flags & MSG_SENDPAGE_NOTLAST)) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } @@ -1418,7 +1420,7 @@ wait_for_memory: out: if (copied) { - tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); + tcp_tx_timestamp(sk, sockc.tsflags); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: From 5e76ee4b8e90384936a214cde5b7816424f70232 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:24 -0700 Subject: [PATCH 0652/2177] tcp: tcp_mark_head_lost() optimization It will be a bit more expensive to get the head of rtx queue once rtx queue is converted to an rb-tree. We can avoid this extra cost in case tp->lost_skb_hint is set. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 90afe4143596..abc3b1db81f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2207,12 +2207,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; WARN_ON(packets > tp->packets_out); - if (tp->lost_skb_hint) { - skb = tp->lost_skb_hint; - cnt = tp->lost_cnt_hint; + skb = tp->lost_skb_hint; + if (skb) { /* Head already handled? */ - if (mark_head && skb != tcp_write_queue_head(sk)) + if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una)) return; + cnt = tp->lost_cnt_hint; } else { skb = tcp_write_queue_head(sk); cnt = 0; From 8ba6ddaaf86c4c6814774e4e4ef158b732bd9f9f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:25 -0700 Subject: [PATCH 0653/2177] tcp: reduce tcp_fastretrans_alert() verbosity With upcoming rb-tree implementation, the checks will trigger more often, and this is expected. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index abc3b1db81f8..be7644204cd4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2804,9 +2804,9 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - if (WARN_ON(!tp->packets_out && tp->sacked_out)) + if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; - if (WARN_ON(!tp->sacked_out && tp->fackets_out)) + if (!tp->sacked_out && tp->fackets_out) tp->fackets_out = 0; /* Now state machine starts. From f33198163a0fbb03766444253edf6ea50685d725 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:26 -0700 Subject: [PATCH 0654/2177] tcp: pass previous skb to tcp_shifted_skb() No need to recompute previous skb, as it will be a bit more expensive when rtx queue is converted to RB tree. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be7644204cd4..72c4732ae2da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1288,13 +1288,13 @@ static u8 tcp_sacktag_one(struct sock *sk, /* Shift newly-SACKed bytes from this skb to the immediately previous * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. */ -static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, +static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, bool dup_sack) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *prev = tcp_write_queue_prev(sk, skb); u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ @@ -1495,7 +1495,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!skb_shift(prev, skb, len)) goto fallback; - if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1514,7 +1514,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, len = skb->len; if (skb_shift(prev, skb, len)) { pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), + len, mss, 0); } out: From 75c119afe14f74b4dd967d75ed9f57ab6c0ef045 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:27 -0700 Subject: [PATCH 0655/2177] tcp: implement rb-tree based retransmit queue Using a linear list to store all skbs in write queue has been okay for quite a while : O(N) is not too bad when N < 500. Things get messy when N is the order of 100,000 : Modern TCP stacks want 10Gbit+ of throughput even with 200 ms RTT flows. 40 ns per cache line miss means a full scan can use 4 ms, blowing away CPU caches. SACK processing often can use various hints to avoid parsing whole retransmit queue. But with high packet losses and/or high reordering, hints no longer work. Sender has to process thousands of unfriendly SACK, accumulating a huge socket backlog, burning a cpu and massively dropping packets. Using an rb-tree for retransmit queue has been avoided for years because it added complexity and overhead, but now is the time to be more resistant and say no to quadratic behavior. 1) RTX queue is no longer part of the write queue : already sent skbs are stored in one rb-tree. 2) Since reaching the head of write queue no longer needs sk->sk_send_head, we added an union of sk_send_head and tcp_rtx_queue Tested: On receiver : netem on ingress : delay 150ms 200us loss 1 GRO disabled to force stress and SACK storms. for f in `seq 1 10` do ./netperf -H lpaa6 -l30 -- -K bbr -o THROUGHPUT|tail -1 done | awk '{print $0} {sum += $0} END {printf "%7u\n",sum}' Before patch : 323.87 351.48 339.59 338.62 306.72 204.07 304.93 291.88 202.47 176.88 2840 After patch: 1700.83 2207.98 2070.17 1544.26 2114.76 2124.89 1693.14 1080.91 2216.82 1299.94 18053 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 7 ++- include/net/tcp.h | 89 ++++++++++++++------------- net/ipv4/tcp.c | 41 ++++++++++--- net/ipv4/tcp_input.c | 133 +++++++++++++++++++++------------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 137 +++++++++++++++++++++++------------------- net/ipv4/tcp_timer.c | 24 +++++--- 7 files changed, 245 insertions(+), 188 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index a6b9a8d1a6df..4827094f1db4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include #include #include - +#include #include #include #include @@ -397,7 +397,10 @@ struct sock { int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; - struct sk_buff *sk_send_head; + union { + struct sk_buff *sk_send_head; + struct rb_root tcp_rtx_queue; + }; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; diff --git a/include/net/tcp.h b/include/net/tcp.h index 744559b72784..5a95e5886b55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); -int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); +enum tcp_queue { + TCP_FRAG_IN_WRITE_QUEUE, + TCP_FRAG_IN_RTX_QUEUE, +}; +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, + unsigned int mss_now, gfp_t gfp); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); @@ -1608,6 +1614,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) void tcp_write_queue_purge(struct sock *sk); +static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) +{ + return skb_rb_first(&sk->tcp_rtx_queue); +} + static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); @@ -1630,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, return skb_queue_prev(&sk->sk_write_queue, skb); } -#define tcp_for_write_queue(skb, sk) \ - skb_queue_walk(&(sk)->sk_write_queue, skb) - -#define tcp_for_write_queue_from(skb, sk) \ - skb_queue_walk_from(&(sk)->sk_write_queue, skb) - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(const struct sock *sk) { - return sk->sk_send_head; + return skb_peek(&sk->sk_write_queue); } static inline bool tcp_skb_is_last(const struct sock *sk, @@ -1650,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } -static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) +static inline bool tcp_write_queue_empty(const struct sock *sk) { - if (tcp_skb_is_last(sk, skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = tcp_write_queue_next(sk, skb); + return skb_queue_empty(&sk->sk_write_queue); +} + +static inline bool tcp_rtx_queue_empty(const struct sock *sk) +{ + return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); +} + +static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) +{ + return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) { - sk->sk_send_head = NULL; + if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - } + if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } -static inline void tcp_init_send_head(struct sock *sk) -{ - sk->sk_send_head = NULL; -} - static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); @@ -1683,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) { - sk->sk_send_head = skb; + if (sk->sk_write_queue.next == skb) { tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) @@ -1697,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s __skb_queue_head(&sk->sk_write_queue, skb); } -/* Insert buff after skb on the write queue of sk. */ -static inline void tcp_insert_write_queue_after(struct sk_buff *skb, - struct sk_buff *buff, - struct sock *sk) -{ - __skb_queue_after(&sk->sk_write_queue, skb, buff); -} - /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { __skb_queue_before(&sk->sk_write_queue, skb, new); - - if (sk->sk_send_head == skb) - sk->sk_send_head = new; } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { - list_del(&skb->tcp_tsorted_anchor); - tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_write_queue_empty(struct sock *sk) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb); + +static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk) { - return skb_queue_empty(&sk->sk_write_queue); + tcp_skb_tsorted_anchor_cleanup(skb); + rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); +} + +static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk) +{ + list_del(&skb->tcp_tsorted_anchor); + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); } static inline void tcp_push_pending_frames(struct sock *sk) @@ -1754,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : - tcp_write_queue_next(sk, skb); + struct sk_buff *next = skb_rb_next(skb); + + tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1765,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + struct sk_buff *skb = tcp_rtx_queue_head(sk); + + tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk); } /* Called when old skb is about to be deleted (to be combined with new skb) */ @@ -1935,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { - const struct sk_buff *skb = tcp_write_queue_head(sk); + const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b8d379c80936..3b34850d361f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -413,6 +413,7 @@ void tcp_init_sock(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); tp->out_of_order_queue = RB_ROOT; + sk->tcp_rtx_queue = RB_ROOT; tcp_init_xmit_timers(sk); INIT_LIST_HEAD(&tp->tsq_node); INIT_LIST_HEAD(&tp->tsorted_sent_queue); @@ -701,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - if (!tcp_send_head(sk)) - return; - skb = tcp_write_queue_tail(sk); + if (!skb) + return; if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); @@ -964,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, int copy, i; bool can_coalesce; - if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || + if (!skb || (copy = size_goal - skb->len) <= 0 || !tcp_skb_can_collapse_to(skb)) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, - skb_queue_empty(&sk->sk_write_queue)); + tcp_rtx_and_write_queues_empty(sk)); if (!skb) goto wait_for_memory; @@ -1199,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto out_err; } - skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL; + skb = tcp_write_queue_tail(sk); uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { err = -ENOBUFS; @@ -1275,7 +1275,7 @@ restart: int max = size_goal; skb = tcp_write_queue_tail(sk); - if (tcp_send_head(sk)) { + if (skb) { if (skb->ip_summed == CHECKSUM_NONE) max = mss_now; copy = max - skb->len; @@ -1295,7 +1295,7 @@ new_segment: process_backlog = false; goto restart; } - first_skb = skb_queue_empty(&sk->sk_write_queue); + first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, select_size(sk, sg, first_skb), sk->sk_allocation, @@ -1521,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) /* XXX -- need to support SO_PEEK_OFF */ + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { + err = skb_copy_datagram_msg(skb, 0, msg, skb->len); + if (err) + return err; + copied += skb->len; + } + skb_queue_walk(&sk->sk_write_queue, skb) { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) @@ -2320,6 +2327,22 @@ static inline bool tcp_need_reset(int state) TCPF_FIN_WAIT2 | TCPF_SYN_RECV); } +static void tcp_rtx_queue_purge(struct sock *sk) +{ + struct rb_node *p = rb_first(&sk->tcp_rtx_queue); + + while (p) { + struct sk_buff *skb = rb_to_skb(p); + + p = rb_next(p); + /* Since we are deleting whole queue, no need to + * list_del(&skb->tcp_tsorted_anchor) + */ + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); + } +} + void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; @@ -2329,6 +2352,7 @@ void tcp_write_queue_purge(struct sock *sk) tcp_skb_tsorted_anchor_cleanup(skb); sk_wmem_free_skb(sk, skb); } + tcp_rtx_queue_purge(sk); INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2392,7 +2416,6 @@ int tcp_disconnect(struct sock *sk, int flags) * issue in __tcp_select_window() */ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; - tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); dst_release(sk->sk_rx_dst); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 72c4732ae2da..d0682ce2a5d6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1142,6 +1142,7 @@ struct tcp_sacktag_state { u64 last_sackt; struct rate_sample *rate; int flag; + unsigned int mss_now; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1191,7 +1192,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, if (pkt_len >= skb->len && !in_sack) return 0; - err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); + err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; } @@ -1363,8 +1365,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; - tcp_unlink_write_queue(skb, sk); - sk_wmem_free_skb(sk, skb); + tcp_rtx_queue_unlink_and_free(skb, sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); @@ -1414,9 +1415,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto fallback; /* Can only happen with delayed DSACK + discard craziness */ - if (unlikely(skb == tcp_write_queue_head(sk))) + prev = skb_rb_prev(skb); + if (!prev) goto fallback; - prev = tcp_write_queue_prev(sk, skb); if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; @@ -1501,12 +1502,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, /* Hole filled allows collapsing with the next as well, this is very * useful when hole on every nth skb pattern happens */ - if (prev == tcp_write_queue_tail(sk)) + skb = skb_rb_next(prev); + if (!skb) goto out; - skb = tcp_write_queue_next(sk, prev); if (!skb_can_shift(skb) || - (skb == tcp_send_head(sk)) || ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || (mss != tcp_skb_seglen(skb))) goto out; @@ -1539,13 +1539,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *tmp; - tcp_for_write_queue_from(skb, sk) { + skb_rbtree_walk_from(skb) { int in_sack = 0; bool dup_sack = dup_sack_in; - if (skb == tcp_send_head(sk)) - break; - /* queue is in-order => we can short-circuit the walk early */ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; @@ -1607,23 +1604,44 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, return skb; } -/* Avoid all extra work that is being done by sacktag while walking in - * a normal way - */ +static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, + struct tcp_sacktag_state *state, + u32 seq) +{ + struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; + struct sk_buff *skb; + int unack_bytes; + + while (*p) { + parent = *p; + skb = rb_to_skb(parent); + if (before(seq, TCP_SKB_CB(skb)->seq)) { + p = &parent->rb_left; + continue; + } + if (!before(seq, TCP_SKB_CB(skb)->end_seq)) { + p = &parent->rb_right; + continue; + } + + state->fack_count = 0; + unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una; + if (state->mss_now && unack_bytes > 0) + state->fack_count = unack_bytes / state->mss_now; + + return skb; + } + return NULL; +} + static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, struct tcp_sacktag_state *state, u32 skip_to_seq) { - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq)) + return skb; - if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) - break; - - state->fack_count += tcp_skb_pcount(skb); - } - return skb; + return tcp_sacktag_bsearch(sk, state, skip_to_seq); } static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, @@ -1745,8 +1763,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } } - skb = tcp_write_queue_head(sk); + state->mss_now = tcp_current_mss(sk); state->fack_count = 0; + skb = NULL; i = 0; if (!tp->sacked_out) { @@ -1970,7 +1989,7 @@ void tcp_enter_loss(struct sock *sk) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); @@ -1979,10 +1998,7 @@ void tcp_enter_loss(struct sock *sk) } tcp_clear_all_retrans_hints(tp); - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - + skb_rbtree_walk_from(skb) { mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || is_reneg); if (mark_lost) @@ -2215,13 +2231,11 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) return; cnt = tp->lost_cnt_hint; } else { - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); cnt = 0; } - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk_from(skb) { /* TODO: do this better */ /* this is not the most efficient way to do this... */ tp->lost_skb_hint = skb; @@ -2245,7 +2259,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) /* If needed, chop off the prefix to mark as lost. */ lost = (packets - oldcnt) * mss; if (lost < skb->len && - tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) + tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + lost, mss, GFP_ATOMIC) < 0) break; cnt = packets; } @@ -2329,7 +2344,7 @@ static bool tcp_any_retrans_done(const struct sock *sk) if (tp->retrans_out) return true; - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) return true; @@ -2370,9 +2385,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (unmark_loss) { struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } tp->lost_out = 0; @@ -2617,9 +2630,7 @@ void tcp_simple_retransmit(struct sock *sk) unsigned int mss = tcp_current_mss(sk); u32 prior_lost = tp->lost_out; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { if (tcp_skb_seglen(skb) > mss && !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { @@ -2713,7 +2724,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, * is updated in tcp_ack()). Otherwise fall back to * the conventional recovery. */ - if (tcp_send_head(sk) && + if (!tcp_write_queue_empty(sk) && after(tcp_wnd_end(tp), tp->snd_nxt)) { *rexmit = REXMIT_NEW; return; @@ -3077,11 +3088,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; + struct sk_buff *skb, *next; bool fully_acked = true; long sack_rtt_us = -1L; long seq_rtt_us = -1L; long ca_rtt_us = -1L; - struct sk_buff *skb; u32 pkts_acked = 0; u32 last_in_flight = 0; bool rtt_update; @@ -3089,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt = 0; - while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { + for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u8 sacked = scb->sacked; u32 acked_pcount; @@ -3107,8 +3118,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, break; fully_acked = false; } else { - /* Speedup tcp_unlink_write_queue() and next loop */ - prefetchw(skb->next); acked_pcount = tcp_skb_pcount(skb); } @@ -3160,12 +3169,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!fully_acked) break; - tcp_unlink_write_queue(skb, sk); - sk_wmem_free_skb(sk, skb); + next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; + tcp_rtx_queue_unlink_and_free(skb, sk); } if (!skb) @@ -3257,12 +3266,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, static void tcp_ack_probe(struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *head = tcp_send_head(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Was it a usable window open? */ - - if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { + if (!head) + return; + if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). @@ -3382,7 +3393,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 tp->pred_flags = 0; tcp_fast_path_check(sk); - if (tcp_send_head(sk)) + if (!tcp_write_queue_empty(sk)) tcp_slow_start_after_idle_check(sk); if (nwin > tp->max_window) { @@ -3567,8 +3578,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) sack_state.first_sackt = 0; sack_state.rate = &rs; - /* We very likely will need to access write queue head. */ - prefetchw(sk->sk_write_queue.next); + /* We very likely will need to access rtx queue. */ + prefetch(sk->tcp_rtx_queue.rb_node); /* If the ack is older than previous acks * then we can probably ignore it. @@ -3682,8 +3693,7 @@ no_queue: * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. */ - if (tcp_send_head(sk)) - tcp_ack_probe(sk); + tcp_ack_probe(sk); if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -4726,7 +4736,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, } /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ -static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -5530,7 +5540,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; + struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL; u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; bool syn_drop = false; @@ -5565,9 +5575,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); if (data) { /* Retransmit unacked data in SYN */ - tcp_for_write_queue_from(data, sk) { - if (data == tcp_send_head(sk) || - __tcp_retransmit_skb(sk, data, 1)) + skb_rbtree_walk_from(data) { + if (__tcp_retransmit_skb(sk, data, 1)) break; } tcp_rearm_rto(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7460fd90884..5418ecf03b78 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -480,7 +480,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) TCP_TIMEOUT_INIT; icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); BUG_ON(!skb); tcp_mstamp_refresh(tp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8162e2880178..696b0a168f16 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -66,15 +66,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) +static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; - tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + __skb_unlink(skb, &sk->sk_write_queue); + tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); + tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -1249,12 +1251,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) TCP_SKB_CB(skb)->eor = 0; } +/* Insert buff after skb on the write or rtx queue of sk. */ +static void tcp_insert_write_queue_after(struct sk_buff *skb, + struct sk_buff *buff, + struct sock *sk, + enum tcp_queue tcp_queue) +{ + if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) + __skb_queue_after(&sk->sk_write_queue, skb, buff); + else + tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, unsigned int mss_now, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); @@ -1337,7 +1352,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk); + tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); return 0; @@ -1625,10 +1640,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * is caused by insufficient sender buffer: * 1) just sent some data (see tcp_write_xmit) * 2) not cwnd limited (this else condition) - * 3) no more data to send (null tcp_send_head ) + * 3) no more data to send (tcp_write_queue_empty()) * 4) application is hitting buffer limit (SOCK_NOSPACE) */ - if (!tcp_send_head(sk) && sk->sk_socket && + if (tcp_write_queue_empty(sk) && sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); @@ -1824,7 +1839,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, +static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, unsigned int len, unsigned int mss_now, gfp_t gfp) { struct sk_buff *buff; @@ -1833,7 +1849,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, skb, len, mss_now, gfp); + return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) @@ -1869,7 +1885,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk); + tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); return 0; } @@ -1939,8 +1955,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - head = tcp_write_queue_head(sk); - + /* TODO : use tsorted_sent_queue ? */ + head = tcp_rtx_queue_head(sk); + if (!head) + goto send_now; age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); /* If next ACK is likely to come too late (half srtt), do not defer */ if (age < (tp->srtt_us >> 4)) @@ -2158,13 +2176,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit <<= factor; if (refcount_read(&sk->sk_wmem_alloc) > limit) { - /* Always send the 1st or 2nd skb in write queue. + /* Always send skb if rtx queue is empty. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. * This helps when TX completions are delayed too much. */ - if (skb == sk->sk_write_queue.next || - skb->prev == sk->sk_write_queue.next) + if (tcp_rtx_queue_empty(sk)) return false; set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); @@ -2215,7 +2232,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) * it's the "most interesting" or current chrono we are * tracking and starts busy chrono if we have pending data. */ - if (tcp_write_queue_empty(sk)) + if (tcp_rtx_and_write_queues_empty(sk)) tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); else if (type == tp->chrono_type) tcp_chrono_set(tp, TCP_CHRONO_BUSY); @@ -2310,7 +2327,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle); if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) + unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, limit, mss_now, gfp))) break; if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) @@ -2350,7 +2368,7 @@ repair: tcp_cwnd_validate(sk, is_cwnd_limited); return false; } - return !tp->packets_out && tcp_send_head(sk); + return !tp->packets_out && !tcp_write_queue_empty(sk); } bool tcp_schedule_loss_probe(struct sock *sk) @@ -2374,7 +2392,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && - tcp_send_head(sk)) + !tcp_write_queue_empty(sk)) return false; /* Probe timeout is 2*rtt. Add minimum RTO to account @@ -2427,18 +2445,14 @@ void tcp_send_loss_probe(struct sock *sk) int mss = tcp_current_mss(sk); skb = tcp_send_head(sk); - if (skb) { - if (tcp_snd_wnd_test(tp, skb, mss)) { - pcount = tp->packets_out; - tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); - if (tp->packets_out > pcount) - goto probe_sent; - goto rearm_timer; - } - skb = tcp_write_queue_prev(sk, skb); - } else { - skb = tcp_write_queue_tail(sk); + if (skb && tcp_snd_wnd_test(tp, skb, mss)) { + pcount = tp->packets_out; + tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + if (tp->packets_out > pcount) + goto probe_sent; + goto rearm_timer; } + skb = skb_rb_last(&sk->tcp_rtx_queue); /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) @@ -2456,10 +2470,11 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { - if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, + if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + (pcount - 1) * mss, mss, GFP_ATOMIC))) goto rearm_timer; - skb = tcp_write_queue_next(sk, skb); + skb = skb_rb_next(skb); } if (WARN_ON(!skb || !tcp_skb_pcount(skb))) @@ -2659,7 +2674,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); + struct sk_buff *next_skb = skb_rb_next(skb); int skb_size, next_skb_size; skb_size = skb->len; @@ -2676,8 +2691,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) } tcp_highest_sack_combine(sk, next_skb, skb); - tcp_unlink_write_queue(next_skb, sk); - if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; @@ -2705,7 +2718,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_skb_collapse_tstamp(skb, next_skb); - sk_wmem_free_skb(sk, next_skb); + tcp_rtx_queue_unlink_and_free(next_skb, sk); return true; } @@ -2716,8 +2729,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) return false; if (skb_cloned(skb)) return false; - if (skb == tcp_send_head(sk)) - return false; /* Some heuristics for collapsing over SACK'd could be invented */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) return false; @@ -2740,7 +2751,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; - tcp_for_write_queue_from_safe(skb, tmp, sk) { + skb_rbtree_walk_from_safe(skb, tmp) { if (!tcp_can_collapse(sk, skb)) break; @@ -2815,7 +2826,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) len = cur_mss * segs; if (skb->len > len) { - if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) + if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, + cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { if (skb_unclone(skb, GFP_ATOMIC)) @@ -2906,29 +2918,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) void tcp_xmit_retransmit_queue(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb, *rtx_head = NULL, *hole = NULL; struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - struct sk_buff *hole = NULL; u32 max_segs; int mib_idx; if (!tp->packets_out) return; - if (tp->retransmit_skb_hint) { - skb = tp->retransmit_skb_hint; - } else { - skb = tcp_write_queue_head(sk); + skb = tp->retransmit_skb_hint; + if (!skb) { + rtx_head = tcp_rtx_queue_head(sk); + skb = rtx_head; } - max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); - tcp_for_write_queue_from(skb, sk) { + skb_rbtree_walk_from(skb) { __u8 sacked; int segs; - if (skb == tcp_send_head(sk)) - break; - if (tcp_pacing_check(sk)) break; @@ -2973,7 +2980,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); - if (skb == tcp_write_queue_head(sk) && + if (skb == rtx_head && icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, @@ -3015,12 +3022,15 @@ void tcp_send_fin(struct sock *sk) * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ - if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { + if (!tskb && tcp_under_memory_pressure(sk)) + tskb = skb_rb_last(&sk->tcp_rtx_queue); + + if (tskb) { coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; - if (!tcp_send_head(sk)) { + if (tcp_write_queue_empty(sk)) { /* This means tskb was already sent. * Pretend we included the FIN on previous transmit. * We need to set tp->snd_nxt to the value it would have @@ -3086,9 +3096,9 @@ int tcp_send_synack(struct sock *sk) { struct sk_buff *skb; - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { - pr_debug("%s: wrong queue state\n", __func__); + pr_err("%s: wrong queue state\n", __func__); return -EFAULT; } if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { @@ -3101,10 +3111,9 @@ int tcp_send_synack(struct sock *sk) if (!nskb) return -ENOMEM; INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); - tcp_unlink_write_queue(skb, sk); + tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); - __tcp_add_write_queue_head(sk, nskb); - sk_wmem_free_skb(sk, skb); + tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); skb = nskb; @@ -3327,7 +3336,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) tcb->end_seq += skb->len; __skb_header_release(skb); - __tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); tp->write_seq = tcb->end_seq; @@ -3405,12 +3413,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; if (!err) { tp->syn_data = (fo->copied > 0); + tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); goto done; } - /* data was not sent, this is our new send_head */ - sk->sk_send_head = syn_data; + /* data was not sent, put it in write_queue */ + __skb_queue_tail(&sk->sk_write_queue, syn_data); tp->packets_out -= tcp_skb_pcount(syn_data); fallback: @@ -3453,6 +3462,7 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = tcp_time_stamp(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); + tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); /* Send off SYN; include data in Fast Open. */ err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : @@ -3647,7 +3657,8 @@ int tcp_write_wakeup(struct sock *sk, int mib) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) + if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, seg_size, mss, GFP_ATOMIC)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(skb, mss); @@ -3677,7 +3688,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); - if (tp->packets_out || !tcp_send_head(sk)) { + if (tp->packets_out || tcp_write_queue_empty(sk)) { /* Cancel probe timer, if it is not required. */ icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 655dd8d7f064..7014cc00c74c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -156,8 +156,13 @@ static bool retransmits_timed_out(struct sock *sk, return false; start_ts = tcp_sk(sk)->retrans_stamp; - if (unlikely(!start_ts)) - start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); + if (unlikely(!start_ts)) { + struct sk_buff *head = tcp_rtx_queue_head(sk); + + if (!head) + return false; + start_ts = tcp_skb_timestamp(head); + } if (likely(timeout == 0)) { linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); @@ -304,11 +309,12 @@ static void tcp_delack_timer(unsigned long data) static void tcp_probe_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; u32 start_ts; - if (tp->packets_out || !tcp_send_head(sk)) { + if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; return; } @@ -321,9 +327,9 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - start_ts = tcp_skb_timestamp(tcp_send_head(sk)); + start_ts = tcp_skb_timestamp(skb); if (!start_ts) - tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; + skb->skb_mstamp = tp->tcp_mstamp; else if (icsk->icsk_user_timeout && (s32)(tcp_time_stamp(tp) - start_ts) > jiffies_to_msecs(icsk->icsk_user_timeout)) @@ -408,7 +414,7 @@ void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - WARN_ON(tcp_write_queue_empty(sk)); + WARN_ON(tcp_rtx_queue_empty(sk)); tp->tlp_high_seq = 0; @@ -441,7 +447,7 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); + tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1); __sk_dst_reset(sk); goto out_reset_timer; } @@ -473,7 +479,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { + if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * do not backoff. */ @@ -647,7 +653,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tp->packets_out || tcp_send_head(sk)) + if (tp->packets_out || !tcp_write_queue_empty(sk)) goto resched; elapsed = keepalive_time_elapsed(tp); From 0d7b70e83642f01c451a52faa3908e7b054ff7c6 Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Fri, 6 Oct 2017 15:48:30 -0400 Subject: [PATCH 0656/2177] bnxt_en: don't consider building bnxt_tc.o if option not enabled Instead of zeroing out bnxt_tc.c with a #ifdef foo, instead don't compile the file when the option is not enabled. Now make and the preprocessor do not have to waste time compiling a no-op. Signed-off-by: Jonathan Toppins Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/Makefile | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index 4f0cb8e1ffc0..457201f409a7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o +bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o +bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 7dd3d131043a..4730c048ed9b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -23,8 +23,6 @@ #include "bnxt_tc.h" #include "bnxt_vfr.h" -#ifdef CONFIG_BNXT_FLOWER_OFFLOAD - #define BNXT_FID_INVALID 0xffff #define VLAN_TCI(vid, prio) ((vid) | ((prio) << VLAN_PRIO_SHIFT)) @@ -833,6 +831,3 @@ void bnxt_shutdown_tc(struct bnxt *bp) rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); } - -#else -#endif From 180ca444b985c42948fa26abd278e616b5ce7eb2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:56 -0700 Subject: [PATCH 0657/2177] ipv6: introduce a new function fib6_update_sernum() This function takes a route as input and tries to update the sernum in the fib6_node this route is associated with. It will be used in later commit when adding a cached route into the exception table under that route. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_fib.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d060d711a624..152b7b14a5a5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -358,6 +358,8 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); +void fib6_update_sernum(struct rt6_info *rt); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e5308d7cbd75..0ba4fbb2f855 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -110,6 +110,20 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; +void fib6_update_sernum(struct rt6_info *rt) +{ + struct fib6_table *table = rt->rt6i_table; + struct net *net = dev_net(rt->dst.dev); + struct fib6_node *fn; + + write_lock_bh(&table->tb6_lock); + fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&table->tb6_lock)); + if (fn) + fn->fn_sernum = fib6_new_sernum(net); + write_unlock_bh(&table->tb6_lock); +} + /* * Auxiliary address test functions for the radix tree. * From 35732d01fe311ec13c4e42936878b782b8e7ea85 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:57 -0700 Subject: [PATCH 0658/2177] ipv6: introduce a hash table to store dst cache Add a hash table into struct rt6_info in order to store dst caches created by pmtu discovery and ip redirect in ipv6 routing code. APIs to add dst cache, delete dst cache, find dst cache and update dst cache in the hash table are implemented and will be used in later commits. This is a preparation work to move all cache routes into the exception table instead of getting inserted into the fib6 tree. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 19 +++ include/net/ip6_route.h | 3 + net/ipv6/route.c | 341 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 363 insertions(+) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 152b7b14a5a5..c4864c1e8f13 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -98,6 +98,22 @@ struct rt6key { struct fib6_table; +struct rt6_exception_bucket { + struct hlist_head chain; + int depth; +}; + +struct rt6_exception { + struct hlist_node hlist; + struct rt6_info *rt6i; + unsigned long stamp; + struct rcu_head rcu; +}; + +#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 +#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) +#define FIB6_MAX_DEPTH 5 + struct rt6_info { struct dst_entry dst; @@ -134,12 +150,15 @@ struct rt6_info { struct inet6_dev *rt6i_idev; struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; u32 rt6i_metric; u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; + u8 exception_bucket_flushed:1, + unused:7; }; static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ee96f402cb75..3315605f34c9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -95,6 +95,9 @@ int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); +void rt6_flush_exceptions(struct rt6_info *rt); +int rt6_remove_exception_rt(struct rt6_info *rt); + static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, const struct in6_addr *daddr, unsigned int prefs, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 26cc9f483b6d..dc5e70975966 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); +static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -392,6 +396,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_exception_bucket *bucket; struct dst_entry *from = dst->from; struct inet6_dev *idev; @@ -404,6 +409,11 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); + if (bucket) { + rt->rt6i_exception_bucket = NULL; + kfree(bucket); + } dst->from = NULL; dst_release(from); @@ -1091,6 +1101,337 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) return pcpu_rt; } +/* exception hash table implementation + */ +static DEFINE_SPINLOCK(rt6_exception_lock); + +/* Remove rt6_ex from hash table and free the memory + * Caller must hold rt6_exception_lock + */ +static void rt6_remove_exception(struct rt6_exception_bucket *bucket, + struct rt6_exception *rt6_ex) +{ + if (!bucket || !rt6_ex) + return; + rt6_ex->rt6i->rt6i_node = NULL; + hlist_del_rcu(&rt6_ex->hlist); + rt6_release(rt6_ex->rt6i); + kfree_rcu(rt6_ex, rcu); + WARN_ON_ONCE(!bucket->depth); + bucket->depth--; +} + +/* Remove oldest rt6_ex in bucket and free the memory + * Caller must hold rt6_exception_lock + */ +static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) +{ + struct rt6_exception *rt6_ex, *oldest = NULL; + + if (!bucket) + return; + + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) + oldest = rt6_ex; + } + rt6_remove_exception(bucket, oldest); +} + +static u32 rt6_exception_hash(const struct in6_addr *dst, + const struct in6_addr *src) +{ + static u32 seed __read_mostly; + u32 val; + + net_get_random_once(&seed, sizeof(seed)); + val = jhash(dst, sizeof(*dst), seed); + +#ifdef CONFIG_IPV6_SUBTREES + if (src) + val = jhash(src, sizeof(*src), val); +#endif + return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); +} + +/* Helper function to find the cached rt in the hash table + * and update bucket pointer to point to the bucket for this + * (daddr, saddr) pair + * Caller must hold rt6_exception_lock + */ +static struct rt6_exception * +__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, + const struct in6_addr *daddr, + const struct in6_addr *saddr) +{ + struct rt6_exception *rt6_ex; + u32 hval; + + if (!(*bucket) || !daddr) + return NULL; + + hval = rt6_exception_hash(daddr, saddr); + *bucket += hval; + + hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { + struct rt6_info *rt6 = rt6_ex->rt6i; + bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); + +#ifdef CONFIG_IPV6_SUBTREES + if (matched && saddr) + matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); +#endif + if (matched) + return rt6_ex; + } + return NULL; +} + +/* Helper function to find the cached rt in the hash table + * and update bucket pointer to point to the bucket for this + * (daddr, saddr) pair + * Caller must hold rcu_read_lock() + */ +static struct rt6_exception * +__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, + const struct in6_addr *daddr, + const struct in6_addr *saddr) +{ + struct rt6_exception *rt6_ex; + u32 hval; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!(*bucket) || !daddr) + return NULL; + + hval = rt6_exception_hash(daddr, saddr); + *bucket += hval; + + hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { + struct rt6_info *rt6 = rt6_ex->rt6i; + bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); + +#ifdef CONFIG_IPV6_SUBTREES + if (matched && saddr) + matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); +#endif + if (matched) + return rt6_ex; + } + return NULL; +} + +static int rt6_insert_exception(struct rt6_info *nrt, + struct rt6_info *ort) +{ + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + int err = 0; + + /* ort can't be a cache or pcpu route */ + if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) + ort = (struct rt6_info *)ort->dst.from; + WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); + + spin_lock_bh(&rt6_exception_lock); + + if (ort->exception_bucket_flushed) { + err = -EINVAL; + goto out; + } + + bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + if (!bucket) { + bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), + GFP_ATOMIC); + if (!bucket) { + err = -ENOMEM; + goto out; + } + rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); + } + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates ort is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (ort->rt6i_src.plen) + src_key = &nrt->rt6i_src.addr; +#endif + rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, + src_key); + if (rt6_ex) + rt6_remove_exception(bucket, rt6_ex); + + rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); + if (!rt6_ex) { + err = -ENOMEM; + goto out; + } + rt6_ex->rt6i = nrt; + rt6_ex->stamp = jiffies; + atomic_inc(&nrt->rt6i_ref); + nrt->rt6i_node = ort->rt6i_node; + hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); + bucket->depth++; + + if (bucket->depth > FIB6_MAX_DEPTH) + rt6_exception_remove_oldest(bucket); + +out: + spin_unlock_bh(&rt6_exception_lock); + + /* Update fn->fn_sernum to invalidate all cached dst */ + if (!err) + fib6_update_sernum(ort); + + return err; +} + +void rt6_flush_exceptions(struct rt6_info *rt) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + spin_lock_bh(&rt6_exception_lock); + /* Prevent rt6_insert_exception() to recreate the bucket list */ + rt->exception_bucket_flushed = 1; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + if (!bucket) + goto out; + + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) + rt6_remove_exception(bucket, rt6_ex); + WARN_ON_ONCE(bucket->depth); + bucket++; + } + +out: + spin_unlock_bh(&rt6_exception_lock); +} + +/* Find cached rt in the hash table inside passed in rt + * Caller has to hold rcu_read_lock() + */ +static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr) +{ + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + struct rt6_info *res = NULL; + + bucket = rcu_dereference(rt->rt6i_exception_bucket); + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates rt is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (rt->rt6i_src.plen) + src_key = saddr; +#endif + rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); + + if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) + res = rt6_ex->rt6i; + + return res; +} + +/* Remove the passed in cached rt from the hash table that contains it */ +int rt6_remove_exception_rt(struct rt6_info *rt) +{ + struct rt6_info *from = (struct rt6_info *)rt->dst.from; + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + int err; + + if (!from || + !(rt->rt6i_flags | RTF_CACHE)) + return -EINVAL; + + if (!rcu_access_pointer(from->rt6i_exception_bucket)) + return -ENOENT; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(from->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates 'from' is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (from->rt6i_src.plen) + src_key = &rt->rt6i_src.addr; +#endif + rt6_ex = __rt6_find_exception_spinlock(&bucket, + &rt->rt6i_dst.addr, + src_key); + if (rt6_ex) { + rt6_remove_exception(bucket, rt6_ex); + err = 0; + } else { + err = -ENOENT; + } + + spin_unlock_bh(&rt6_exception_lock); + return err; +} + +/* Find rt6_ex which contains the passed in rt cache and + * refresh its stamp + */ +static void rt6_update_exception_stamp_rt(struct rt6_info *rt) +{ + struct rt6_info *from = (struct rt6_info *)rt->dst.from; + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + + if (!from || + !(rt->rt6i_flags | RTF_CACHE)) + return; + + rcu_read_lock(); + bucket = rcu_dereference(from->rt6i_exception_bucket); + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates 'from' is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (from->rt6i_src.plen) + src_key = &rt->rt6i_src.addr; +#endif + rt6_ex = __rt6_find_exception_rcu(&bucket, + &rt->rt6i_dst.addr, + src_key); + if (rt6_ex) + rt6_ex->stamp = jiffies; + + rcu_read_unlock(); +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { From 60006a4825f9e71adf770745e17790c6e6c97a89 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:58 -0700 Subject: [PATCH 0659/2177] ipv6: prepare fib6_remove_prefsrc() for exception table After we move cached dst entries into the exception table under its parent route, current fib6_remove_prefsrc() no longer can access them. This commit makes fib6_remove_prefsrc() also go through all routes in the exception table to remove the pref src. This is a preparation patch in order to move all cached dst into the exception table. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc5e70975966..f52ac57dcc99 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1264,6 +1264,12 @@ static int rt6_insert_exception(struct rt6_info *nrt, if (ort->rt6i_src.plen) src_key = &nrt->rt6i_src.addr; #endif + + /* Update rt6i_prefsrc as it could be changed + * in rt6_remove_prefsrc() + */ + nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, src_key); if (rt6_ex) @@ -1432,6 +1438,25 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } +static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + int i; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + rt6_ex->rt6i->rt6i_prefsrc.plen = 0; + } + bucket++; + } + } +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { @@ -3159,8 +3184,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) if (((void *)rt->dst.dev == dev || !dev) && rt != net->ipv6.ip6_null_entry && ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { + spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ rt->rt6i_prefsrc.plen = 0; + /* need to update cache as well */ + rt6_exceptions_remove_prefsrc(rt); + spin_unlock_bh(&rt6_exception_lock); } return 0; } From f5bbe7ee79c2842ca69f25afd0b6b65a9011b735 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:05:59 -0700 Subject: [PATCH 0660/2177] ipv6: prepare rt6_mtu_change() for exception table If we move all cached dst into the exception table under the main route, current rt6_mtu_change() will no longer be able to access them. This commit makes rt6_mtu_change_route() function to also go through all cached routes in the exception table under the main route and do proper updates on the mtu. This is a preparation in order to move all cached routes into the exception table. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f52ac57dcc99..d9805a857809 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1269,6 +1269,14 @@ static int rt6_insert_exception(struct rt6_info *nrt, * in rt6_remove_prefsrc() */ nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + /* rt6_mtu_change() might lower mtu on ort. + * Only insert this exception route if its mtu + * is less than ort's mtu value. + */ + if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + err = -EINVAL; + goto out; + } rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, src_key); @@ -1457,6 +1465,32 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) } } +static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + int i; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) + entry->rt6i_pmtu = mtu; + } + bucket++; + } + } +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { @@ -3296,6 +3330,10 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) if (rt->dst.dev == arg->dev && dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { + spin_lock_bh(&rt6_exception_lock); + /* This case will be removed once the exception table + * is hooked up. + */ if (rt->rt6i_flags & RTF_CACHE) { /* For RTF_CACHE with rt6i_pmtu == 0 * (i.e. a redirected route), @@ -3309,6 +3347,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) dst_mtu(&rt->dst) == idev->cnf.mtu6)) { dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); } + rt6_exceptions_update_pmtu(rt, arg->mtu); + spin_unlock_bh(&rt6_exception_lock); } return 0; } From b16cb459d77800dcb886b5e73e1beafd3d596897 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:00 -0700 Subject: [PATCH 0661/2177] ipv6: prepare rt6_clean_tohost() for exception table If we move all cached dst into the exception table under the main route, current rt6_clean_tohost() will no longer be able to access them. This commit makes fib6_clean_tohost() to also go through all cached routes in exception table and removes cached gateway routes to the passed in gateway. This is a preparation in order to move all cached routes into the exception table. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9805a857809..e8e901589564 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1491,6 +1491,43 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) } } +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +static void rt6_exceptions_clean_tohost(struct rt6_info *rt, + struct in6_addr *gateway) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + if (!rcu_access_pointer(rt->rt6i_exception_bucket)) + return; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, + &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + + if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == + RTF_CACHE_GATEWAY && + ipv6_addr_equal(gateway, + &entry->rt6i_gateway)) { + rt6_remove_exception(bucket, rt6_ex); + } + } + bucket++; + } + } + + spin_unlock_bh(&rt6_exception_lock); +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { @@ -3240,18 +3277,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) } #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) -#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) /* Remove routers and update dst entries when gateway turn into host. */ static int fib6_clean_tohost(struct rt6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; + /* RTF_CACHE_GATEWAY case will be removed once the exception + * table is hooked up to store all cached routes. + */ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { return -1; } + + /* Further clean up cached routes in exception table. + * This is needed because cached route may have a different + * gateway than its 'parent' in the case of an ip redirect. + */ + rt6_exceptions_clean_tohost(rt, gateway); + return 0; } From c757faa8bfa26a0dd24b41ff783e0da042156887 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:01 -0700 Subject: [PATCH 0662/2177] ipv6: prepare fib6_age() for exception table If all dst cache entries are stored in the exception table under the main route, we have to go through them during fib6_age() when doing garbage collecting. Introduce a new function rt6_age_exception() which goes through all dst entries in the exception table and remove those entries that are expired. This function is called in fib6_age() so that all dst caches are also garbage collected. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 13 +++++++++ include/net/ip6_route.h | 2 ++ net/ipv6/ip6_fib.c | 26 +++++++----------- net/ipv6/route.c | 60 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 17 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c4864c1e8f13..11a79ef87a28 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -29,6 +29,14 @@ #define FIB6_TABLE_HASHSZ 1 #endif +#define RT6_DEBUG 2 + +#if RT6_DEBUG >= 3 +#define RT6_TRACE(x...) pr_debug(x) +#else +#define RT6_TRACE(x...) do { ; } while (0) +#endif + struct rt6_info; struct fib6_config { @@ -75,6 +83,11 @@ struct fib6_node { struct rcu_head rcu; }; +struct fib6_gc_args { + int timeout; + int more; +}; + #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL #else diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 3315605f34c9..a0087fb9864b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -97,6 +97,8 @@ int ip6_del_rt(struct rt6_info *); void rt6_flush_exceptions(struct rt6_info *rt); int rt6_remove_exception_rt(struct rt6_info *rt); +void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, const struct in6_addr *daddr, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0ba4fbb2f855..3afbe50f2779 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -38,14 +38,6 @@ #include #include -#define RT6_DEBUG 2 - -#if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) pr_debug(x) -#else -#define RT6_TRACE(x...) do { ; } while (0) -#endif - static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { @@ -1890,12 +1882,6 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -struct fib6_gc_args -{ - int timeout; - int more; -}; - static int fib6_age(struct rt6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; @@ -1904,9 +1890,6 @@ static int fib6_age(struct rt6_info *rt, void *arg) /* * check addrconf expiration here. * Routes are expired even if they are in use. - * - * Also age clones. Note, that clones are aged out - * only if they are not in use now. */ if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { @@ -1915,6 +1898,9 @@ static int fib6_age(struct rt6_info *rt, void *arg) return -1; } gc_args->more++; + /* The following part will soon be removed when the exception + * table is hooked up to store all cached routes. + */ } else if (rt->rt6i_flags & RTF_CACHE) { if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1940,6 +1926,12 @@ static int fib6_age(struct rt6_info *rt, void *arg) gc_args->more++; } + /* Also age clones in the exception table. + * Note, that clones are aged out + * only if they are not in use now. + */ + rt6_age_exceptions(rt, gc_args, now); + return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8e901589564..d2dd55f58b5d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1528,6 +1528,66 @@ static void rt6_exceptions_clean_tohost(struct rt6_info *rt, spin_unlock_bh(&rt6_exception_lock); } +static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, + struct rt6_exception *rt6_ex, + struct fib6_gc_args *gc_args, + unsigned long now) +{ + struct rt6_info *rt = rt6_ex->rt6i; + + if (atomic_read(&rt->dst.__refcnt) == 1 && + time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + RT6_TRACE("aging clone %p\n", rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } else if (rt->rt6i_flags & RTF_GATEWAY) { + struct neighbour *neigh; + __u8 neigh_flags = 0; + + neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); + if (neigh) { + neigh_flags = neigh->flags; + neigh_release(neigh); + } + if (!(neigh_flags & NTF_ROUTER)) { + RT6_TRACE("purging route %p via non-router but gateway\n", + rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } + } + gc_args->more++; +} + +void rt6_age_exceptions(struct rt6_info *rt, + struct fib6_gc_args *gc_args, + unsigned long now) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + if (!rcu_access_pointer(rt->rt6i_exception_bucket)) + return; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, + &bucket->chain, hlist) { + rt6_age_examine_exception(bucket, rt6_ex, + gc_args, now); + } + bucket++; + } + } + spin_unlock_bh(&rt6_exception_lock); +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { From 38fbeeeeccdb38d0635398e8e344d245f6d8dc52 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:02 -0700 Subject: [PATCH 0663/2177] ipv6: prepare fib6_locate() for exception table fib6_locate() is used to find the fib6_node according to the passed in prefix address key. It currently tries to find the fib6_node with the exact match of the passed in key. However, when we move cached routes into the exception table, fib6_locate() will fail to find the fib6_node for it as the cached routes will be stored in the exception table under the fib6_node with the longest prefix match of the cache's dst addr key. This commit adds a new parameter to let the caller specify if it needs exact match or longest prefix match. Right now, all callers still does exact match when calling fib6_locate(). It will be changed in later commit where exception table is hooked up to store cached routes. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 30 +++++++++++++++++++++++------- net/ipv6/route.c | 5 +++-- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 11a79ef87a28..4497a1eb4d41 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -357,7 +357,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len); + const struct in6_addr *saddr, int src_len, + bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 837418ff2d4b..3ccaf52824c9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2322,7 +2322,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, return NULL; read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); + fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true); if (!fn) goto out; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 3afbe50f2779..b3e4cf0962f8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1343,14 +1343,21 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad /* * Get node with specified destination prefix (and source prefix, * if subtrees are used) + * exact_match == true means we try to find fn with exact match of + * the passed in prefix addr + * exact_match == false means we try to find fn with longest prefix + * match of the passed in prefix addr. This is useful for finding fn + * for cached route as it will be stored in the exception table under + * the node with longest prefix length. */ static struct fib6_node *fib6_locate_1(struct fib6_node *root, const struct in6_addr *addr, - int plen, int offset) + int plen, int offset, + bool exact_match) { - struct fib6_node *fn; + struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); @@ -1360,11 +1367,13 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, */ if (plen < fn->fn_bit || !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) - return NULL; + goto out; if (plen == fn->fn_bit) return fn; + prev = fn; + /* * We have more bits to go */ @@ -1373,24 +1382,31 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, else fn = fn->left; } - return NULL; +out: + if (exact_match) + return NULL; + else + return prev; } struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len) + const struct in6_addr *saddr, int src_len, + bool exact_match) { struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst)); + offsetof(struct rt6_info, rt6i_dst), + exact_match); #ifdef CONFIG_IPV6_SUBTREES if (src_len) { WARN_ON(saddr == NULL); if (fn && fn->subtree) fn = fib6_locate_1(fn->subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src)); + offsetof(struct rt6_info, rt6i_src), + exact_match); } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d2dd55f58b5d..855b4ceec349 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2800,7 +2800,8 @@ static int ip6_route_del(struct fib6_config *cfg, fn = fib6_locate(&table->tb6_root, &cfg->fc_dst, cfg->fc_dst_len, - &cfg->fc_src, cfg->fc_src_len); + &cfg->fc_src, cfg->fc_src_len, + true); if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { @@ -3009,7 +3010,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return NULL; read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); + fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); if (!fn) goto out; From 2b760fcf5cfb34e8610df56d83745b2b74ae1379 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:03 -0700 Subject: [PATCH 0664/2177] ipv6: hook up exception table to store dst cache This commit makes use of the exception hash table implementation to store dst caches created by pmtu discovery and ip redirect into the hash table under the rt_info and no longer inserts these routes into fib6 tree. This makes the fib6 tree only contain static configured routes and could now be protected by rcu instead of a rw lock. With this change, in the route lookup related functions, after finding the rt6_info with the longest prefix, we also need to search for the exception table before doing backtracking. In the route delete function, if the route being deleted is not a dst cache, deletion of this route also need to flush the whole hash table under it. If it is a dst cache, then only delete the cached dst in the hash table. Note: for fib6_walk_continue() function, w->root now is always pointing to a root node considering that fib6_prune_clones() is removed from the code. So we add a WARN_ON() msg to make sure w->root always points to a root node and also removed the update of w->root in fib6_repair_tree(). This is a prerequisite for later patch because we don't need to make w->root as rcu protected when replacing rwlock with RCU. Also, we remove all prune related variables as it is no longer used. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 - net/ipv6/addrconf.c | 1 - net/ipv6/ip6_fib.c | 95 +++++++------------------------------ net/ipv6/route.c | 108 +++++++++++++++++++++--------------------- 4 files changed, 72 insertions(+), 133 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 4497a1eb4d41..d0b7283073e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -280,7 +280,6 @@ struct fib6_walker { struct fib6_node *root, *node; struct rt6_info *leaf; enum fib6_walk_state state; - bool prune; unsigned int skip; unsigned int count; int (*func)(struct fib6_walker *); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3ccaf52824c9..873afafddfc4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2326,7 +2326,6 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (!fn) goto out; - noflags |= RTF_CACHE; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->dst.dev->ifindex != dev->ifindex) continue; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b3e4cf0962f8..9c8e704e6af7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -54,7 +54,6 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct net *net, struct fib6_walker *w); @@ -1101,6 +1100,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) return -EINVAL; + if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) + return -EINVAL; if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1192,11 +1193,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, #endif err = fib6_add_rt2node(fn, rt, info, mxc); - if (!err) { + if (!err) fib6_start_gc(info->nl_net, rt); - if (!(rt->rt6i_flags & RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn); - } out: if (err) { @@ -1511,19 +1509,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_lock(&net->ipv6.fib6_walker_lock); FOR_WALKERS(net, w) { if (!child) { - if (w->root == fn) { - w->root = w->node = NULL; - RT6_TRACE("W %p adjusted by delroot 1\n", w); - } else if (w->node == fn) { + if (w->node == fn) { RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); w->node = pn; w->state = nstate; } } else { - if (w->root == fn) { - w->root = child; - RT6_TRACE("W %p adjusted by delroot 2\n", w); - } if (w->node == fn) { w->node = child; if (children&2) { @@ -1557,12 +1548,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, RT6_TRACE("fib6_del_route\n"); + WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); + /* Unlink it */ *rtp = rt->dst.rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; + /* Flush all cached dst in exception table */ + rt6_flush_exceptions(rt); + /* Reset round-robin state, if necessary */ if (fn->rr_ptr == rt) fn->rr_ptr = NULL; @@ -1625,18 +1621,9 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - if (!(rt->rt6i_flags & RTF_CACHE)) { - struct fib6_node *pn = fn; -#ifdef CONFIG_IPV6_SUBTREES - /* clones of this route might be in another subtree */ - if (rt->rt6i_src.plen) { - while (!(pn->fn_flags & RTN_ROOT)) - pn = pn->parent; - pn = pn->parent; - } -#endif - fib6_prune_clones(info->nl_net, pn); - } + /* remove cached dst from exception table */ + if (rt->rt6i_flags & RTF_CACHE) + return rt6_remove_exception_rt(rt); /* * Walk the leaf entries looking for ourself @@ -1679,16 +1666,14 @@ static int fib6_walk_continue(struct fib6_walker *w) { struct fib6_node *fn, *pn; + /* w->root should always be table->tb6_root */ + WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT)); + for (;;) { fn = w->node; if (!fn) return 0; - if (w->prune && fn != w->root && - fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { - w->state = FWS_C; - w->leaf = fn->leaf; - } switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: @@ -1820,20 +1805,16 @@ static int fib6_clean_node(struct fib6_walker *w) * func is called on each route. * It may return -1 -> delete this route. * 0 -> continue walking - * - * prune==1 -> only immediate children of node (certainly, - * ignoring pure split nodes) will be scanned. */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), - bool prune, int sernum, void *arg) + int sernum, void *arg) { struct fib6_cleaner c; c.w.root = root; c.w.func = fib6_clean_node; - c.w.prune = prune; c.w.count = 0; c.w.skip = 0; c.func = func; @@ -1858,7 +1839,7 @@ static void __fib6_clean_all(struct net *net, hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, false, sernum, arg); + func, sernum, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1871,22 +1852,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); } -static int fib6_prune_clone(struct rt6_info *rt, void *arg) -{ - if (rt->rt6i_flags & RTF_CACHE) { - RT6_TRACE("pruning clone %p\n", rt); - return -1; - } - - return 0; -} - -static void fib6_prune_clones(struct net *net, struct fib6_node *fn) -{ - fib6_clean_tree(net, fn, fib6_prune_clone, true, - FIB6_NO_SERNUM_CHANGE, NULL); -} - static void fib6_flush_trees(struct net *net) { int new_sernum = fib6_new_sernum(net); @@ -1914,32 +1879,6 @@ static int fib6_age(struct rt6_info *rt, void *arg) return -1; } gc_args->more++; - /* The following part will soon be removed when the exception - * table is hooked up to store all cached routes. - */ - } else if (rt->rt6i_flags & RTF_CACHE) { - if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) - rt->dst.obsolete = DST_OBSOLETE_KILL; - if (atomic_read(&rt->dst.__refcnt) == 1 && - rt->dst.obsolete == DST_OBSOLETE_KILL) { - RT6_TRACE("aging clone %p\n", rt); - return -1; - } else if (rt->rt6i_flags & RTF_GATEWAY) { - struct neighbour *neigh; - __u8 neigh_flags = 0; - - neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); - if (neigh) { - neigh_flags = neigh->flags; - neigh_release(neigh); - } - if (!(neigh_flags & NTF_ROUTER)) { - RT6_TRACE("purging route %p via non-router but gateway\n", - rt); - return -1; - } - } - gc_args->more++; } /* Also age clones in the exception table. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 855b4ceec349..65130dde276a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -878,8 +878,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) { + struct rt6_info *rt, *rt_cache; struct fib6_node *fn; - struct rt6_info *rt; read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); @@ -893,6 +893,11 @@ restart: if (fn) goto restart; } + /* Search through exception table */ + rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); + if (rt_cache) + rt = rt_cache; + dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -1592,7 +1597,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt; + struct rt6_info *rt, *rt_cache; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1624,6 +1629,10 @@ redo_rt6_select: } } + /*Search through exception table */ + rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); + if (rt_cache) + rt = rt_cache; if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { dst_use(&rt->dst, jiffies); @@ -1988,23 +1997,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); + /* update rt6_ex->stamp for cache */ + if (rt6->rt6i_flags & RTF_CACHE) + rt6_update_exception_stamp_rt(rt6); } else if (daddr) { struct rt6_info *nrt6; nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - - /* ip6_ins_rt(nrt6) will bump the - * rt6->rt6i_node->fn_sernum - * which will fail the next rt6_check() and - * invalidate the sk->sk_dst_cache. - */ - ip6_ins_rt(nrt6); - /* Release the reference taken in - * ip6_rt_cache_alloc() - */ - dst_release(&nrt6->dst); + if (rt6_insert_exception(nrt6, rt6)) + dst_release_immediate(&nrt6->dst); } } } @@ -2068,7 +2071,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; + struct rt6_info *rt, *rt_cache; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2093,8 +2096,23 @@ restart: continue; if (fl6->flowi6_oif != rt->dst.dev->ifindex) continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) + /* rt_cache's gateway might be different from its 'parent' + * in the case of an ip redirect. + * So we keep searching in the exception table if the gateway + * is different. + */ + if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + rt_cache = rt6_find_cached_rt(rt, + &fl6->daddr, + &fl6->saddr); + if (rt_cache && + ipv6_addr_equal(&rdfl->gateway, + &rt_cache->rt6i_gateway)) { + rt = rt_cache; + break; + } continue; + } break; } @@ -2785,9 +2803,9 @@ out_put: static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { + struct rt6_info *rt, *rt_cache; struct fib6_table *table; struct fib6_node *fn; - struct rt6_info *rt; int err = -ESRCH; table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); @@ -2801,13 +2819,17 @@ static int ip6_route_del(struct fib6_config *cfg, fn = fib6_locate(&table->tb6_root, &cfg->fc_dst, cfg->fc_dst_len, &cfg->fc_src, cfg->fc_src_len, - true); + !(cfg->fc_flags & RTF_CACHE)); if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if ((rt->rt6i_flags & RTF_CACHE) && - !(cfg->fc_flags & RTF_CACHE)) - continue; + if (cfg->fc_flags & RTF_CACHE) { + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, + &cfg->fc_src); + if (!rt_cache) + continue; + rt = rt_cache; + } if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -2933,8 +2955,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - if (ip6_ins_rt(nrt)) - goto out_release; + /* No need to remove rt from the exception table if rt is + * a cached route because rt6_insert_exception() will + * takes care of it + */ + if (rt6_insert_exception(nrt, rt)) { + dst_release_immediate(&nrt->dst); + goto out; + } netevent.old = &rt->dst; netevent.new = &nrt->dst; @@ -2942,17 +2970,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); - if (rt->rt6i_flags & RTF_CACHE) { - rt = (struct rt6_info *) dst_clone(&rt->dst); - ip6_del_rt(rt); - } - -out_release: - /* Release the reference taken in - * ip6_rt_cache_alloc() - */ - dst_release(&nrt->dst); - out: neigh_release(neigh); } @@ -3344,12 +3361,8 @@ static int fib6_clean_tohost(struct rt6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - /* RTF_CACHE_GATEWAY case will be removed once the exception - * table is hooked up to store all cached routes. - */ - if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || - ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { return -1; } @@ -3438,20 +3451,9 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { spin_lock_bh(&rt6_exception_lock); - /* This case will be removed once the exception table - * is hooked up. - */ - if (rt->rt6i_flags & RTF_CACHE) { - /* For RTF_CACHE with rt6i_pmtu == 0 - * (i.e. a redirected route), - * the metrics of its rt->dst.from has already - * been updated. - */ - if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) - rt->rt6i_pmtu = arg->mtu; - } else if (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); } rt6_exceptions_update_pmtu(rt, arg->mtu); From a94b9367e044ba672c9f4105eb1516ff6ff4948a Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:04 -0700 Subject: [PATCH 0665/2177] ipv6: grab rt->rt6i_ref before allocating pcpu rt After rwlock is replaced with rcu and spinlock, ip6_pol_route() will be called with only rcu held. That means rt6 route deletion could happen simultaneously with rt6_make_pcpu_rt(). This could potentially cause memory leak if rt6_release() is called right before rt6_make_pcpu_rt() on the same route. This patch grabs rt->rt6i_ref safely before calling rt6_make_pcpu_rt() to make sure rt6_release() will not get triggered while rt6_make_pcpu_rt() is in progress. And rt6_release() is called after rt6_make_pcpu_rt() is finished. Note: As we are incrementing rt->rt6i_ref in ip6_pol_route(), there is a very slim chance that fib6_purge_rt() will be triggered unnecessarily when deleting a route if ip6_pol_route() running on another thread picks this route as well and tries to make pcpu cache for it. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 60 ++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 65130dde276a..941c062389d2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1070,7 +1070,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) { - struct fib6_table *table = rt->rt6i_table; struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); @@ -1081,28 +1080,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) return net->ipv6.ip6_null_entry; } - read_lock_bh(&table->tb6_lock); - if (rt->rt6i_pcpu) { - p = this_cpu_ptr(rt->rt6i_pcpu); - prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ - dst_release_immediate(&pcpu_rt->dst); - pcpu_rt = prev; - } - } else { - /* rt has been removed from the fib6 tree - * before we have a chance to acquire the read_lock. - * In this case, don't brother to create a pcpu rt - * since rt is going away anyway. The next - * dst_check() will trigger a re-lookup. - */ - dst_release_immediate(&pcpu_rt->dst); - pcpu_rt = rt; - } dst_hold(&pcpu_rt->dst); + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + /* release refcnt taken by ip6_rt_pcpu_alloc() */ + dst_release_immediate(&pcpu_rt->dst); + /* release refcnt taken by above dst_hold() */ + dst_release_immediate(&pcpu_rt->dst); + dst_hold(&prev->dst); + pcpu_rt = prev; + } + rt6_dst_from_metrics_check(pcpu_rt); - read_unlock_bh(&table->tb6_lock); return pcpu_rt; } @@ -1683,19 +1674,28 @@ redo_rt6_select: if (pcpu_rt) { read_unlock_bh(&table->tb6_lock); } else { - /* We have to do the read_unlock first - * because rt6_make_pcpu_route() may trigger - * ip6_dst_gc() which will take the write_lock. - */ - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - pcpu_rt = rt6_make_pcpu_route(rt); - dst_release(&rt->dst); + /* atomic_inc_not_zero() is needed when using rcu */ + if (atomic_inc_not_zero(&rt->rt6i_ref)) { + /* We have to do the read_unlock first + * because rt6_make_pcpu_route() may trigger + * ip6_dst_gc() which will take the write_lock. + * + * No dst_hold() on rt is needed because grabbing + * rt->rt6i_ref makes sure rt can't be released. + */ + read_unlock_bh(&table->tb6_lock); + pcpu_rt = rt6_make_pcpu_route(rt); + rt6_release(rt); + } else { + /* rt is already removed from tree */ + read_unlock_bh(&table->tb6_lock); + pcpu_rt = net->ipv6.ip6_null_entry; + dst_hold(&pcpu_rt->dst); + } } trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; - } } EXPORT_SYMBOL_GPL(ip6_pol_route); From 51e398e86d61b69b9a4be49ff7f6afeb87530df1 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:05 -0700 Subject: [PATCH 0666/2177] ipv6: don't release rt->rt6i_pcpu memory during rt6_release() After rwlock is replaced with rcu and spinlock, route lookup can happen simultanously with route deletion. This patch removes the call to free_percpu(rt->rt6i_pcpu) from rt6_release() to avoid the race condition between rt6_release() and rt6_get_pcpu_route(). And as free_percpu(rt->rt6i_pcpu) is already called in ip6_dst_destroy() after the rcu grace period, it is safe to do this change. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9c8e704e6af7..eee392f7b1f6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -190,9 +190,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) *ppcpu_rt = NULL; } } - - free_percpu(non_pcpu_rt->rt6i_pcpu); - non_pcpu_rt->rt6i_pcpu = NULL; } EXPORT_SYMBOL_GPL(rt6_free_pcpu); From d3843fe5fd45be0e04a251a2cc68893c859a31bd Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:06 -0700 Subject: [PATCH 0667/2177] ipv6: replace dst_hold() with dst_hold_safe() in routing code With rwlock, it is safe to call dst_hold() in the read thread because read thread is guaranteed to be separated from write thread. However, after we replace rwlock with rcu, it is no longer safe to use dst_hold(). A dst might already have been deleted but is waiting for the rcu grace period to pass before freeing the memory when a read thread is trying to do dst_hold(). This could potentially cause double free issue. So this commit replaces all dst_hold() with dst_hold_safe() in all read thread to avoid this double free issue. And in order to make the code more compact, a new function ip6_hold_safe() is introduced. It calls dst_hold_safe() first, and if that fails, it will either fall back to hold and return net->ipv6.ip6_null_entry or set rt to NULL according to the caller's need. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +- net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 873afafddfc4..f86e931d555e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2333,7 +2333,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, continue; if ((rt->rt6i_flags & noflags) != 0) continue; - dst_hold(&rt->dst); + if (!dst_hold_safe(&rt->dst)) + rt = NULL; break; } out: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 941c062389d2..aeb349aea429 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -874,6 +874,23 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn, } } +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, + bool null_fallback) +{ + struct rt6_info *rt = *prt; + + if (dst_hold_safe(&rt->dst)) + return true; + if (null_fallback) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = NULL; + } + *prt = rt; + return false; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) @@ -898,7 +915,9 @@ restart: if (rt_cache) rt = rt_cache; - dst_use(&rt->dst, jiffies); + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); @@ -1061,10 +1080,9 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt) { - dst_hold(&pcpu_rt->dst); + if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) rt6_dst_from_metrics_check(pcpu_rt); - } + return pcpu_rt; } @@ -1625,12 +1643,17 @@ redo_rt6_select: if (rt_cache) rt = rt_cache; - if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { - dst_use(&rt->dst, jiffies); + if (rt == net->ipv6.ip6_null_entry) { + read_unlock_bh(&table->tb6_lock); + dst_hold(&rt->dst); + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + return rt; + } else if (rt->rt6i_flags & RTF_CACHE) { + if (ip6_hold_safe(net, &rt, true)) { + dst_use_noref(&rt->dst, jiffies); + rt6_dst_from_metrics_check(rt); + } read_unlock_bh(&table->tb6_lock); - - rt6_dst_from_metrics_check(rt); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && @@ -1643,7 +1666,13 @@ redo_rt6_select: struct rt6_info *uncached_rt; - dst_use(&rt->dst, jiffies); + if (ip6_hold_safe(net, &rt, true)) { + dst_use_noref(&rt->dst, jiffies); + } else { + read_unlock_bh(&table->tb6_lock); + uncached_rt = rt; + goto uncached_rt_out; + } read_unlock_bh(&table->tb6_lock); uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); @@ -1659,6 +1688,7 @@ redo_rt6_select: dst_hold(&uncached_rt->dst); } +uncached_rt_out: trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; @@ -1667,8 +1697,7 @@ redo_rt6_select: struct rt6_info *pcpu_rt; - rt->dst.lastuse = jiffies; - rt->dst.__use++; + dst_use_noref(&rt->dst, jiffies); pcpu_rt = rt6_get_pcpu_route(rt); if (pcpu_rt) { @@ -2130,7 +2159,7 @@ restart: } out: - dst_hold(&rt->dst); + ip6_hold_safe(net, &rt, true); read_unlock_bh(&table->tb6_lock); @@ -2841,7 +2870,8 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) continue; - dst_hold(&rt->dst); + if (!dst_hold_safe(&rt->dst)) + break; read_unlock_bh(&table->tb6_lock); /* if gateway was specified only delete the one hop */ @@ -3038,7 +3068,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, continue; if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) continue; - dst_hold(&rt->dst); + ip6_hold_safe(NULL, &rt, false); break; } out: @@ -3096,7 +3126,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev break; } if (rt) - dst_hold(&rt->dst); + ip6_hold_safe(NULL, &rt, false); read_unlock_bh(&table->tb6_lock); return rt; } @@ -3139,9 +3169,12 @@ restart: for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); + if (dst_hold_safe(&rt->dst)) { + read_unlock_bh(&table->tb6_lock); + ip6_del_rt(rt); + } else { + read_unlock_bh(&table->tb6_lock); + } goto restart; } } From bbd63f06d114a52be33f6982fc89ca2768cdeb62 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:07 -0700 Subject: [PATCH 0668/2177] ipv6: update fn_sernum after route is inserted to tree fib6_add() logic currently calls fib6_add_1() to figure out what node should be used for the newly added route and then call fib6_add_rt2node() to insert the route to the node. And during the call of fib6_add_1(), fn_sernum is updated for all nodes that share the same prefix as the new route. This does not have issue in the current code because reader thread will not be able to access the tree while writer thread is inserting new route to it. However, it is not the case once we transition to use RCU. Reader thread could potentially see the new fn_sernum before the new route is inserted. As a result, reader thread's route lookup will return a stale route with the new fn_sernum. In order to solve this issue, we remove all the update of fn_sernum in fib6_add_1(), and instead, introduce a new function that updates fn_sernum for all related nodes and call this functions once the route is successfully inserted to the tree. Also, smp_wmb() is used after a route is successfully inserted into the fib tree and right before the updated of fn->sernum. And smp_rmb() is used right after fn->sernum is accessed in rt6_get_cookie_safe(). This is to guarantee that when the reader thread sees the new fn->sernum, the new route is already inserted in the tree in memory. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_fib.c | 39 +++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d0b7283073e3..6bf929b50951 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -220,6 +220,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, if (fn) { *cookie = fn->fn_sernum; + /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ + smp_rmb(); status = true; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index eee392f7b1f6..f604b311cc3e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -585,7 +585,7 @@ out: static struct fib6_node *fib6_add_1(struct fib6_node *root, struct in6_addr *addr, int plen, int offset, int allow_create, - int replace_required, int sernum, + int replace_required, struct netlink_ext_ack *extack) { struct fib6_node *fn, *in, *ln; @@ -631,8 +631,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn->leaf = NULL; } - fn->fn_sernum = sernum; - return fn; } @@ -641,7 +639,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, */ /* Try to walk down on tree. */ - fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; fn = dir ? fn->right : fn->left; @@ -677,7 +674,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, ln->fn_bit = plen; ln->parent = pn; - ln->fn_sernum = sernum; if (dir) pn->right = ln; @@ -737,8 +733,6 @@ insert_above: in->leaf = fn->leaf; atomic_inc(&in->leaf->rt6i_ref); - in->fn_sernum = sernum; - /* update parent pointer */ if (dir) pn->right = in; @@ -750,8 +744,6 @@ insert_above: ln->parent = in; fn->parent = in; - ln->fn_sernum = sernum; - if (addr_bit_set(addr, bit)) { in->right = ln; in->left = fn; @@ -776,8 +768,6 @@ insert_above: ln->parent = pn; - ln->fn_sernum = sernum; - if (dir) pn->right = ln; else @@ -1079,6 +1069,20 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } +static void fib6_update_sernum_upto_root(struct rt6_info *rt, + int sernum) +{ + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + + /* paired with smp_rmb() in rt6_get_cookie_safe() */ + smp_wmb(); + while (fn) { + fn->fn_sernum = sernum; + fn = fn->parent; + } +} + /* * Add routing information to the routing tree. * / @@ -1111,7 +1115,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, - replace_required, sernum, extack); + replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); fn = NULL; @@ -1145,15 +1149,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); sfn->fn_flags = RTN_ROOT; - sfn->fn_sernum = sernum; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum, - extack); + allow_create, replace_required, extack); if (IS_ERR(sn)) { /* If it is failed, discard just allocated @@ -1172,8 +1174,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum, - extack); + allow_create, replace_required, extack); if (IS_ERR(sn)) { err = PTR_ERR(sn); @@ -1190,8 +1191,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, #endif err = fib6_add_rt2node(fn, rt, info, mxc); - if (!err) + if (!err) { + fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); + } out: if (err) { From 8d1040e808bb2a5aeb4f0791b32bc7356a01ab11 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:08 -0700 Subject: [PATCH 0669/2177] ipv6: check fn->leaf before it is used If rwlock is replaced with rcu and spinlock, it is possible that the reader thread will see fn->leaf as NULL in the following scenarios: 1. fib6_add() is in progress and we have already inserted a new node but not yet inserted the route. 2. fib6_del_route() is in progress and we have already set fn->leaf to NULL but not yet freed the node because of rcu grace period. This patch makes sure all the reader threads check fn->leaf first before using it. And together with later patch to grab rcu_read_lock() and rcu_dereference() fn->leaf, it makes sure reader threads are safe when accessing fn->leaf. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 23 ++++++++++++++++++----- net/ipv6/route.c | 20 ++++++++++++-------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f604b311cc3e..cf6137e81408 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1279,10 +1279,13 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, while (fn) { if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { + struct rt6_info *leaf = fn->leaf; struct rt6key *key; - key = (struct rt6key *) ((u8 *) fn->leaf + - args->offset); + if (!leaf) + goto backtrack; + + key = (struct rt6key *) ((u8 *)leaf + args->offset); if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES @@ -1299,9 +1302,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, return fn; } } -#ifdef CONFIG_IPV6_SUBTREES backtrack: -#endif if (fn->fn_flags & RTN_ROOT) break; @@ -1358,7 +1359,18 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); + struct rt6_info *leaf = fn->leaf; + struct rt6key *key; + + /* This node is being deleted */ + if (!leaf) { + if (plen <= fn->fn_bit) + goto out; + else + goto next; + } + + key = (struct rt6key *)((u8 *)leaf + offset); /* * Prefix match @@ -1372,6 +1384,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, prev = fn; +next: /* * We have more bits to go */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aeb349aea429..05dc450af441 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -712,6 +712,7 @@ out: } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, + struct rt6_info *leaf, struct rt6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) @@ -730,7 +731,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = find_match(rt, oif, strict, &mpri, match, do_rr); } - for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -748,17 +749,21 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, return match; } -static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) +static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, + int oif, int strict) { + struct rt6_info *leaf = fn->leaf; struct rt6_info *match, *rt0; - struct net *net; bool do_rr = false; + if (!leaf) + return net->ipv6.ip6_null_entry; + rt0 = fn->rr_ptr; if (!rt0) - fn->rr_ptr = rt0 = fn->leaf; + fn->rr_ptr = rt0 = leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, &do_rr); if (do_rr) { @@ -766,13 +771,12 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) - next = fn->leaf; + next = leaf; if (next != rt0) fn->rr_ptr = next; } - net = dev_net(rt0->dst.dev); return match ? match : net->ipv6.ip6_null_entry; } @@ -1623,7 +1627,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(fn, oif, strict); + rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) rt = rt6_multipath_select(rt, fl6, oif, strict); if (rt == net->ipv6.ip6_null_entry) { From 17ecf590b3cba19d9ecb410e340aa78128382abb Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:09 -0700 Subject: [PATCH 0670/2177] ipv6: add key length check into rt6_select() After rwlock is replaced with rcu and spinlock, fib6_lookup() could potentially return an intermediate node if other thread is doing fib6_del() on a route which is the only route on the node so that fib6_repair_tree() will be called on this node and potentially assigns fn->leaf to the its child's fn->leaf. In order to detect this situation in rt6_select(), we have to check if fn->fn_bit is consistent with the key length stored in the route. And depending on if the fn is in the subtree or not, the key is either rt->rt6i_dst or rt->rt6i_src. If any inconsistency is found, that means the node no longer holds valid routes in it. So net->ipv6.ip6_null_entry is returned. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 05dc450af441..24b80f43bbfb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -755,6 +755,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, struct rt6_info *leaf = fn->leaf; struct rt6_info *match, *rt0; bool do_rr = false; + int key_plen; if (!leaf) return net->ipv6.ip6_null_entry; @@ -763,6 +764,19 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, if (!rt0) fn->rr_ptr = rt0 = leaf; + /* Double check to make sure fn is not an intermediate node + * and fn->leaf does not points to its child's leaf + * (This might happen if all routes under fn are deleted from + * the tree and fib6_repair_tree() is called on the node.) + */ + key_plen = rt0->rt6i_dst.plen; +#ifdef CONFIG_IPV6_SUBTREES + if (rt0->rt6i_src.plen) + key_plen = rt0->rt6i_src.plen; +#endif + if (fn->fn_bit != key_plen) + return net->ipv6.ip6_null_entry; + match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, &do_rr); From 66f5d6ce53e665477d2a33e8f539d4fa4ca81c83 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:10 -0700 Subject: [PATCH 0671/2177] ipv6: replace rwlock with rcu and spinlock in fib6_table With all the preparation work before, we are now ready to replace rwlock with rcu and spinlock in fib6_table. That means now all fib6_node in fib6_table are protected by rcu. And when freeing fib6_node, call_rcu() is used to wait for the rcu grace period before releasing the memory. When accessing fib6_node, corresponding rcu APIs need to be used. And all previous sessions protected by the write lock will now be protected by the spin lock per table. All previous sessions protected by read lock will now be protected by rcu_read_lock(). A couple of things to note here: 1. As part of the work of replacing rwlock with rcu, the linked list of fn->leaf now has to be rcu protected as well. So both fn->leaf and rt->dst.rt6_next are now __rcu tagged and corresponding rcu APIs are used when manipulating them. 2. For fn->rr_ptr, first of all, it also needs to be rcu protected now and is tagged with __rcu and rcu APIs are used in corresponding places. Secondly, fn->rr_ptr is changed in rt6_select() which is a reader thread. This makes the issue a bit complicated. We think a valid solution for it is to let rt6_select() grab the tb6_lock if it decides to change it. As it is not in the normal operation and only happens when there is no valid neighbor cache for the route, we think the performance impact should be low. 3. fib6_walk_continue() has to be called with tb6_lock held even in the route dumping related functions, e.g. inet6_dump_fib(), fib6_tables_dump() and ipv6_route_seq_ops. It is because fib6_walk_continue() makes modifications to the walker structure, and so are fib6_repair_tree() and fib6_del_route(). In order to do proper syncing between them, we need to let fib6_walk_continue() hold the lock. We may be able to do further improvement on the way we do the tree walk to get rid of the need for holding the spin lock. But not for now. 4. When fib6_del_route() removes a route from the tree, we no longer mark rt->dst.rt6_next to NULL to make simultaneous reader be able to further traverse the list with rcu. However, rt->dst.rt6_next is only valid within this same rcu period. No one should access it later. 5. All the operation of atomic_inc(rt->rt6i_ref) is changed to be performed before we publish this route (either by linking it to fn->leaf or insert it in the list pointed by fn->leaf) just to be safe because as soon as we publish the route, some read thread will be able to access it. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- include/net/ip6_fib.h | 24 ++- net/ipv6/addrconf.c | 11 +- net/ipv6/ip6_fib.c | 405 +++++++++++++++++++++++++----------------- net/ipv6/route.c | 121 +++++++------ 5 files changed, 333 insertions(+), 230 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 06a6765da074..204c19e25456 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -101,7 +101,7 @@ struct dst_entry { union { struct dst_entry *next; struct rtable __rcu *rt_next; - struct rt6_info *rt6_next; + struct rt6_info __rcu *rt6_next; struct dn_route __rcu *dn_next; }; }; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6bf929b50951..0b438b9bcb10 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -68,18 +68,18 @@ struct fib6_config { }; struct fib6_node { - struct fib6_node *parent; - struct fib6_node *left; - struct fib6_node *right; + struct fib6_node __rcu *parent; + struct fib6_node __rcu *left; + struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES - struct fib6_node *subtree; + struct fib6_node __rcu *subtree; #endif - struct rt6_info *leaf; + struct rt6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info *rr_ptr; + struct rt6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -91,7 +91,7 @@ struct fib6_gc_args { #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL #else -#define FIB6_SUBTREE(fn) ((fn)->subtree) +#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif struct mx6_config { @@ -174,6 +174,14 @@ struct rt6_info { unused:7; }; +#define for_each_fib6_node_rt_rcu(fn) \ + for (rt = rcu_dereference((fn)->leaf); rt; \ + rt = rcu_dereference(rt->dst.rt6_next)) + +#define for_each_fib6_walker_rt(w) \ + for (rt = (w)->leaf; rt; \ + rt = rcu_dereference_protected(rt->dst.rt6_next, 1)) + static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; @@ -310,7 +318,7 @@ struct rt6_statistics { struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; - rwlock_t tb6_lock; + spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f86e931d555e..9854d93e45bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2321,12 +2321,12 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (!table) return NULL; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true); if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt->dst.dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & flags) != flags) @@ -2338,7 +2338,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, break; } out: - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return rt; } @@ -5898,10 +5898,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) spin_lock(&ifa->lock); if (ifa->rt) { struct rt6_info *rt = ifa->rt; - struct fib6_table *table = rt->rt6i_table; int cpu; - read_lock(&table->tb6_lock); + rcu_read_lock(); addrconf_set_nopolicy(ifa->rt, val); if (rt->rt6i_pcpu) { for_each_possible_cpu(cpu) { @@ -5911,7 +5910,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) addrconf_set_nopolicy(*rtp, val); } } - read_unlock(&table->tb6_lock); + rcu_read_unlock(); } spin_unlock(&ifa->lock); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cf6137e81408..3f95908b39c3 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -54,8 +54,12 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); -static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); +static struct rt6_info *fib6_find_prefix(struct net *net, + struct fib6_table *table, + struct fib6_node *fn); +static struct fib6_node *fib6_repair_tree(struct net *net, + struct fib6_table *table, + struct fib6_node *fn); static int fib6_walk(struct net *net, struct fib6_walker *w); static int fib6_walk_continue(struct fib6_walker *w); @@ -107,12 +111,12 @@ void fib6_update_sernum(struct rt6_info *rt) struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); fn = rcu_dereference_protected(rt->rt6i_node, lockdep_is_held(&table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); } /* @@ -207,8 +211,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb) * Initialize table lock at a single place to give lockdep a key, * tables aren't visible prior to being linked to the list. */ - rwlock_init(&tb->tb6_lock); - + spin_lock_init(&tb->tb6_lock); h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); /* @@ -227,7 +230,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) table = kzalloc(sizeof(*table), GFP_ATOMIC); if (table) { table->tb6_id = id; - table->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(table->tb6_root.leaf, + net->ipv6.ip6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -324,11 +328,8 @@ unsigned int fib6_tables_seq_read(struct net *net) struct hlist_head *head = &net->ipv6.fib_table_hash[h]; struct fib6_table *tb; - hlist_for_each_entry_rcu(tb, head, tb6_hlist) { - read_lock_bh(&tb->tb6_lock); + hlist_for_each_entry_rcu(tb, head, tb6_hlist) fib_seq += tb->fib_seq; - read_unlock_bh(&tb->tb6_lock); - } } rcu_read_unlock(); @@ -374,7 +375,7 @@ static int fib6_node_dump(struct fib6_walker *w) { struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->dst.rt6_next) + for_each_fib6_walker_rt(w) fib6_rt_dump(rt, w->args); w->leaf = NULL; return 0; @@ -384,9 +385,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb, struct fib6_walker *w) { w->root = &tb->tb6_root; - read_lock_bh(&tb->tb6_lock); + spin_lock_bh(&tb->tb6_lock); fib6_walk(net, w); - read_unlock_bh(&tb->tb6_lock); + spin_unlock_bh(&tb->tb6_lock); } /* Called with rcu_read_lock() */ @@ -423,7 +424,7 @@ static int fib6_dump_node(struct fib6_walker *w) int res; struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); if (res < 0) { /* Frame is full, suspend walking */ @@ -482,9 +483,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, w->count = 0; w->skip = 0; - read_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); res = fib6_walk(net, w); - read_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; cb->args[5] = w->root->fn_sernum; @@ -499,9 +500,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, } else w->skip = 0; - read_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); - read_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); if (res <= 0) { fib6_walker_unlink(net, w); cb->args[4] = 0; @@ -582,11 +583,12 @@ out: * node. */ -static struct fib6_node *fib6_add_1(struct fib6_node *root, - struct in6_addr *addr, int plen, - int offset, int allow_create, - int replace_required, - struct netlink_ext_ack *extack) +static struct fib6_node *fib6_add_1(struct fib6_table *table, + struct fib6_node *root, + struct in6_addr *addr, int plen, + int offset, int allow_create, + int replace_required, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -601,7 +603,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn = root; do { - key = (struct rt6key *)((u8 *)fn->leaf + offset); + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + key = (struct rt6key *)((u8 *)leaf + offset); /* * Prefix match @@ -627,8 +631,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, if (plen == fn->fn_bit) { /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { - rt6_release(fn->leaf); - fn->leaf = NULL; + RCU_INIT_POINTER(fn->leaf, NULL); + rt6_release(leaf); } return fn; @@ -641,7 +645,11 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, /* Try to walk down on tree. */ dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right : fn->left; + fn = dir ? + rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)) : + rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); } while (fn); if (!allow_create) { @@ -672,13 +680,12 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, if (!ln) return ERR_PTR(-ENOMEM); ln->fn_bit = plen; - - ln->parent = pn; + RCU_INIT_POINTER(ln->parent, pn); if (dir) - pn->right = ln; + rcu_assign_pointer(pn->right, ln); else - pn->left = ln; + rcu_assign_pointer(pn->left, ln); return ln; @@ -692,7 +699,8 @@ insert_above: * and the current */ - pn = fn->parent; + pn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); /* find 1st bit in difference between the 2 addrs. @@ -729,27 +737,28 @@ insert_above: in->fn_bit = bit; - in->parent = pn; + RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; - atomic_inc(&in->leaf->rt6i_ref); + atomic_inc(&rcu_dereference_protected(in->leaf, + lockdep_is_held(&table->tb6_lock))->rt6i_ref); /* update parent pointer */ if (dir) - pn->right = in; + rcu_assign_pointer(pn->right, in); else - pn->left = in; + rcu_assign_pointer(pn->left, in); ln->fn_bit = plen; - ln->parent = in; - fn->parent = in; + RCU_INIT_POINTER(ln->parent, in); + rcu_assign_pointer(fn->parent, in); if (addr_bit_set(addr, bit)) { - in->right = ln; - in->left = fn; + rcu_assign_pointer(in->right, ln); + rcu_assign_pointer(in->left, fn); } else { - in->left = ln; - in->right = fn; + rcu_assign_pointer(in->left, ln); + rcu_assign_pointer(in->right, fn); } } else { /* plen <= bit */ @@ -766,19 +775,19 @@ insert_above: ln->fn_bit = plen; - ln->parent = pn; - - if (dir) - pn->right = ln; - else - pn->left = ln; + RCU_INIT_POINTER(ln->parent, pn); if (addr_bit_set(&key->addr, plen)) - ln->right = fn; + RCU_INIT_POINTER(ln->right, fn); else - ln->left = fn; + RCU_INIT_POINTER(ln->left, fn); - fn->parent = ln; + rcu_assign_pointer(fn->parent, ln); + + if (dir) + rcu_assign_pointer(pn->right, ln); + else + rcu_assign_pointer(pn->left, ln); } return ln; } @@ -824,6 +833,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, struct net *net) { + struct fib6_table *table = rt->rt6i_table; + if (atomic_read(&rt->rt6i_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, @@ -832,12 +843,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(net, fn); - atomic_inc(&fn->leaf->rt6i_ref); + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *new_leaf; + if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { + new_leaf = fib6_find_prefix(net, table, fn); + atomic_inc(&new_leaf->rt6i_ref); + rcu_assign_pointer(fn->leaf, new_leaf); rt6_release(rt); } - fn = fn->parent; + fn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); } } } @@ -849,9 +865,11 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc) { + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct rt6_info *iter = NULL; - struct rt6_info **ins; - struct rt6_info **fallback_ins = NULL; + struct rt6_info __rcu **ins; + struct rt6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -866,7 +884,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { + for (iter = leaf; iter; + iter = rcu_dereference_protected(iter->dst.rt6_next, + lockdep_is_held(&rt->rt6i_table->tb6_lock))) { /* * Search for duplicates */ @@ -928,7 +948,8 @@ next_iter: if (fallback_ins && !found) { /* No ECMP-able route found, replace first non-ECMP one */ ins = fallback_ins; - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); found++; } @@ -942,7 +963,7 @@ next_iter: struct rt6_info *sibling, *temp_sibling; /* Find the first route that have the same metric */ - sibling = fn->leaf; + sibling = leaf; while (sibling) { if (sibling->rt6i_metric == rt->rt6i_metric && rt6_qualify_for_ecmp(sibling)) { @@ -950,7 +971,8 @@ next_iter: &sibling->rt6i_siblings); break; } - sibling = sibling->dst.rt6_next; + sibling = rcu_dereference_protected(sibling->dst.rt6_next, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); } /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings @@ -979,10 +1001,10 @@ add: if (err) return err; - rt->dst.rt6_next = iter; - *ins = rt; - rcu_assign_pointer(rt->rt6i_node, fn); + rcu_assign_pointer(rt->dst.rt6_next, iter); atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(rt->rt6i_node, fn); + rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, rt); if (!info->skip_notify) @@ -1008,10 +1030,10 @@ add: if (err) return err; - *ins = rt; + atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rt->dst.rt6_next = iter->dst.rt6_next; - atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt); if (!info->skip_notify) @@ -1023,14 +1045,15 @@ add: nsiblings = iter->rt6i_nsiblings; iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); - if (fn->rr_ptr == iter) + if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; rt6_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ ins = &rt->dst.rt6_next; - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); while (iter) { if (iter->rt6i_metric > rt->rt6i_metric) break; @@ -1038,14 +1061,15 @@ add: *ins = iter->dst.rt6_next; iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); - if (fn->rr_ptr == iter) + if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; } else { ins = &iter->dst.rt6_next; } - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); } WARN_ON(nsiblings != 0); } @@ -1079,7 +1103,8 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt, smp_wmb(); while (fn) { fn->fn_sernum = sernum; - fn = fn->parent; + fn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); } } @@ -1087,12 +1112,14 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt, * Add routing information to the routing tree. * / * with source addr info in sub-trees + * Need to own table->tb6_lock */ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc, struct netlink_ext_ack *extack) { + struct fib6_table *table = rt->rt6i_table; struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; int allow_create = 1; @@ -1113,7 +1140,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!allow_create && !replace_required) pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); - fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, + fn = fib6_add_1(table, root, + &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, replace_required, extack); if (IS_ERR(fn)) { @@ -1128,7 +1156,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (rt->rt6i_src.plen) { struct fib6_node *sn; - if (!fn->subtree) { + if (!rcu_access_pointer(fn->subtree)) { struct fib6_node *sfn; /* @@ -1146,13 +1174,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!sfn) goto failure; - sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + rcu_assign_pointer(sfn->leaf, + info->nl_net->ipv6.ip6_null_entry); sfn->fn_flags = RTN_ROOT; /* Now add the first leaf node to new subtree */ - sn = fib6_add_1(sfn, &rt->rt6i_src.addr, + sn = fib6_add_1(table, sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), allow_create, replace_required, extack); @@ -1168,10 +1197,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } /* Now link new subtree to main tree */ - sfn->parent = fn; - fn->subtree = sfn; + rcu_assign_pointer(sfn->parent, fn); + rcu_assign_pointer(fn->subtree, sfn); } else { - sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, + sn = fib6_add_1(table, FIB6_SUBTREE(fn), &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), allow_create, replace_required, extack); @@ -1182,9 +1211,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } } - if (!fn->leaf) { - fn->leaf = rt; + if (!rcu_access_pointer(fn->leaf)) { atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(fn->leaf, rt); } fn = sn; } @@ -1203,19 +1232,23 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - if (pn != fn && pn->leaf == rt) { - pn->leaf = NULL; + struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn != fn && pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); atomic_dec(&rt->rt6i_ref); } - if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn->leaf = fib6_find_prefix(info->nl_net, pn); + if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, pn); #if RT6_DEBUG >= 2 - if (!pn->leaf) { - WARN_ON(pn->leaf == NULL); - pn->leaf = info->nl_net->ipv6.ip6_null_entry; + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = info->nl_net->ipv6.ip6_null_entry; } #endif - atomic_inc(&pn->leaf->rt6i_ref); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); } #endif goto failure; @@ -1230,7 +1263,7 @@ failure: * fn->leaf. */ if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) - fib6_repair_tree(info->nl_net, fn); + fib6_repair_tree(info->nl_net, table, fn); /* Always release dst as dst->__refcnt is guaranteed * to be taken before entering this function */ @@ -1268,7 +1301,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, dir = addr_bit_set(args->addr, fn->fn_bit); - next = dir ? fn->right : fn->left; + next = dir ? rcu_dereference(fn->right) : + rcu_dereference(fn->left); if (next) { fn = next; @@ -1278,8 +1312,10 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, } while (fn) { - if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { - struct rt6_info *leaf = fn->leaf; + struct fib6_node *subtree = FIB6_SUBTREE(fn); + + if (subtree || fn->fn_flags & RTN_RTINFO) { + struct rt6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; if (!leaf) @@ -1289,10 +1325,9 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) { + if (subtree) { struct fib6_node *sfn; - sfn = fib6_lookup_1(fn->subtree, - args + 1); + sfn = fib6_lookup_1(subtree, args + 1); if (!sfn) goto backtrack; fn = sfn; @@ -1306,12 +1341,14 @@ backtrack: if (fn->fn_flags & RTN_ROOT) break; - fn = fn->parent; + fn = rcu_dereference(fn->parent); } return NULL; } +/* called with rcu_read_lock() held + */ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1359,7 +1396,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6_info *leaf = fn->leaf; + struct rt6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; /* This node is being deleted */ @@ -1389,9 +1426,9 @@ next: * We have more bits to go */ if (addr_bit_set(addr, fn->fn_bit)) - fn = fn->right; + fn = rcu_dereference(fn->right); else - fn = fn->left; + fn = rcu_dereference(fn->left); } out: if (exact_match) @@ -1413,9 +1450,11 @@ struct fib6_node *fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { + struct fib6_node *subtree = FIB6_SUBTREE(fn); + WARN_ON(saddr == NULL); - if (fn && fn->subtree) - fn = fib6_locate_1(fn->subtree, saddr, src_len, + if (fn && subtree) + fn = fib6_locate_1(subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src), exact_match); } @@ -1433,16 +1472,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) +static struct rt6_info *fib6_find_prefix(struct net *net, + struct fib6_table *table, + struct fib6_node *fn) { + struct fib6_node *child_left, *child_right; + if (fn->fn_flags & RTN_ROOT) return net->ipv6.ip6_null_entry; while (fn) { - if (fn->left) - return fn->left->leaf; - if (fn->right) - return fn->right->leaf; + child_left = rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); + child_right = rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)); + if (child_left) + return rcu_dereference_protected(child_left->leaf, + lockdep_is_held(&table->tb6_lock)); + if (child_right) + return rcu_dereference_protected(child_right->leaf, + lockdep_is_held(&table->tb6_lock)); fn = FIB6_SUBTREE(fn); } @@ -1452,31 +1501,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) /* * Called to trim the tree of intermediate nodes when possible. "fn" * is the node we want to try and remove. + * Need to own table->tb6_lock */ static struct fib6_node *fib6_repair_tree(struct net *net, - struct fib6_node *fn) + struct fib6_table *table, + struct fib6_node *fn) { int children; int nstate; - struct fib6_node *child, *pn; + struct fib6_node *child; struct fib6_walker *w; int iter = 0; for (;;) { + struct fib6_node *fn_r = rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *fn_l = rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn_r = rcu_dereference_protected(pn->right, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn_l = rcu_dereference_protected(pn->left, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *new_fn_leaf; + RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; WARN_ON(fn->fn_flags & RTN_RTINFO); WARN_ON(fn->fn_flags & RTN_TL_ROOT); - WARN_ON(fn->leaf); + WARN_ON(fn_leaf); children = 0; child = NULL; - if (fn->right) - child = fn->right, children |= 1; - if (fn->left) - child = fn->left, children |= 2; + if (fn_r) + child = fn_r, children |= 1; + if (fn_l) + child = fn_l, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES @@ -1484,36 +1551,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net, || (children && fn->fn_flags & RTN_ROOT) #endif ) { - fn->leaf = fib6_find_prefix(net, fn); + new_fn_leaf = fib6_find_prefix(net, table, fn); #if RT6_DEBUG >= 2 - if (!fn->leaf) { - WARN_ON(!fn->leaf); - fn->leaf = net->ipv6.ip6_null_entry; + if (!new_fn_leaf) { + WARN_ON(!new_fn_leaf); + new_fn_leaf = net->ipv6.ip6_null_entry; } #endif - atomic_inc(&fn->leaf->rt6i_ref); - return fn->parent; + atomic_inc(&new_fn_leaf->rt6i_ref); + rcu_assign_pointer(fn->leaf, new_fn_leaf); + return pn; } - pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { WARN_ON(!(fn->fn_flags & RTN_ROOT)); - FIB6_SUBTREE(pn) = NULL; + RCU_INIT_POINTER(pn->subtree, NULL); nstate = FWS_L; } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) - pn->right = child; - else if (pn->left == fn) - pn->left = child; + if (pn_r == fn) + rcu_assign_pointer(pn->right, child); + else if (pn_l == fn) + rcu_assign_pointer(pn->left, child); #if RT6_DEBUG >= 2 else WARN_ON(1); #endif if (child) - child->parent = pn; + rcu_assign_pointer(child->parent, pn); nstate = FWS_R; #ifdef CONFIG_IPV6_SUBTREES } @@ -1546,17 +1613,18 @@ static struct fib6_node *fib6_repair_tree(struct net *net, if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; - rt6_release(pn->leaf); - pn->leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + rt6_release(pn_leaf); fn = pn; } } -static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nl_info *info) +static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, + struct rt6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = *rtp; + struct rt6_info *rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); @@ -1573,7 +1641,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt6_flush_exceptions(rt); /* Reset round-robin state, if necessary */ - if (fn->rr_ptr == rt) + if (rcu_access_pointer(fn->rr_ptr) == rt) fn->rr_ptr = NULL; /* Remove this entry from other siblings */ @@ -1592,20 +1660,19 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rt->dst.rt6_next; + w->leaf = rcu_dereference_protected(rt->dst.rt6_next, + lockdep_is_held(&table->tb6_lock)); if (!w->leaf) w->state = FWS_U; } } read_unlock(&net->ipv6.fib6_walker_lock); - rt->dst.rt6_next = NULL; - /* If it was last route, expunge its radix tree node */ - if (!fn->leaf) { + if (!rcu_access_pointer(fn->leaf)) { fn->fn_flags &= ~RTN_RTINFO; net->ipv6.rt6_stats->fib_route_nodes--; - fn = fib6_repair_tree(net, fn); + fn = fib6_repair_tree(net, table, fn); } fib6_purge_rt(rt, fn, net); @@ -1616,12 +1683,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt6_release(rt); } +/* Need to own table->tb6_lock */ int fib6_del(struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct fib6_table *table = rt->rt6i_table; struct net *net = info->nl_net; - struct rt6_info **rtp; + struct rt6_info __rcu **rtp; + struct rt6_info __rcu **rtp_next; #if RT6_DEBUG >= 2 if (rt->dst.obsolete > 0) { @@ -1642,11 +1712,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * Walk the leaf entries looking for ourself */ - for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { - if (*rtp == rt) { - fib6_del_route(fn, rtp, info); + for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { + struct rt6_info *cur = rcu_dereference_protected(*rtp, + lockdep_is_held(&table->tb6_lock)); + if (rt == cur) { + fib6_del_route(table, fn, rtp, info); return 0; } + rtp_next = &cur->dst.rt6_next; } return -ENOENT; } @@ -1673,11 +1746,13 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * 0 -> walk is complete. * >0 -> walk is incomplete (i.e. suspended) * <0 -> walk is terminated by an error. + * + * This function is called with tb6_lock held. */ static int fib6_walk_continue(struct fib6_walker *w) { - struct fib6_node *fn, *pn; + struct fib6_node *fn, *pn, *left, *right; /* w->root should always be table->tb6_root */ WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT)); @@ -1697,20 +1772,22 @@ static int fib6_walk_continue(struct fib6_walker *w) w->state = FWS_L; #endif case FWS_L: - if (fn->left) { - w->node = fn->left; + left = rcu_dereference_protected(fn->left, 1); + if (left) { + w->node = left; w->state = FWS_INIT; continue; } w->state = FWS_R; case FWS_R: - if (fn->right) { - w->node = fn->right; + right = rcu_dereference_protected(fn->right, 1); + if (right) { + w->node = right; w->state = FWS_INIT; continue; } w->state = FWS_C; - w->leaf = fn->leaf; + w->leaf = rcu_dereference_protected(fn->leaf, 1); case FWS_C: if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; @@ -1732,7 +1809,9 @@ skip: case FWS_U: if (fn == w->root) return 0; - pn = fn->parent; + pn = rcu_dereference_protected(fn->parent, 1); + left = rcu_dereference_protected(pn->left, 1); + right = rcu_dereference_protected(pn->right, 1); w->node = pn; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { @@ -1741,13 +1820,13 @@ skip: continue; } #endif - if (pn->left == fn) { + if (left == fn) { w->state = FWS_R; continue; } - if (pn->right == fn) { + if (right == fn) { w->state = FWS_C; - w->leaf = w->node->leaf; + w->leaf = rcu_dereference_protected(w->node->leaf, 1); continue; } #if RT6_DEBUG >= 2 @@ -1790,7 +1869,7 @@ static int fib6_clean_node(struct fib6_walker *w) return 0; } - for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_walker_rt(w) { res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; @@ -1850,10 +1929,10 @@ static void __fib6_clean_all(struct net *net, for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, sernum, arg); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); } } rcu_read_unlock(); @@ -1967,7 +2046,8 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib_table_hash; net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; - net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, + net->ipv6.ip6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -1978,7 +2058,8 @@ static int __net_init fib6_net_init(struct net *net) if (!net->ipv6.fib6_local_tbl) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; - net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, + net->ipv6.ip6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -2108,7 +2189,9 @@ static int ipv6_route_yield(struct fib6_walker *w) return 1; do { - iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->w.leaf = rcu_dereference_protected( + iter->w.leaf->dst.rt6_next, + lockdep_is_held(&iter->tbl->tb6_lock)); iter->skip--; if (!iter->skip && iter->w.leaf) return 1; @@ -2173,7 +2256,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!v) goto iter_table; - n = ((struct rt6_info *)v)->dst.rt6_next; + n = rcu_dereference(((struct rt6_info *)v)->dst.rt6_next); if (n) { ++*pos; return n; @@ -2181,9 +2264,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter_table: ipv6_route_check_sernum(iter); - read_lock(&iter->tbl->tb6_lock); + spin_lock_bh(&iter->tbl->tb6_lock); r = fib6_walk_continue(&iter->w); - read_unlock(&iter->tbl->tb6_lock); + spin_unlock_bh(&iter->tbl->tb6_lock); if (r > 0) { if (v) ++*pos; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 24b80f43bbfb..cf44d0994b1e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -488,7 +488,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, } /* - * Route lookup. Any table->tb6_lock is implied. + * Route lookup. rcu_read_lock() should be held. */ static inline struct rt6_info *rt6_device_match(struct net *net, @@ -503,7 +503,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (!oif && ipv6_addr_any(saddr)) goto out; - for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { + for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) { struct net_device *dev = sprt->dst.dev; if (oif) { @@ -722,7 +722,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; cont = NULL; - for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -731,7 +731,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = find_match(rt, oif, strict, &mpri, match, do_rr); } - for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + for (rt = leaf; rt && rt != rr_head; + rt = rcu_dereference(rt->dst.rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -743,7 +744,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, if (match || !cont) return match; - for (rt = cont; rt; rt = rt->dst.rt6_next) + for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next)) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -752,7 +753,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, int oif, int strict) { - struct rt6_info *leaf = fn->leaf; + struct rt6_info *leaf = rcu_dereference(fn->leaf); struct rt6_info *match, *rt0; bool do_rr = false; int key_plen; @@ -760,9 +761,9 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, if (!leaf) return net->ipv6.ip6_null_entry; - rt0 = fn->rr_ptr; + rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) - fn->rr_ptr = rt0 = leaf; + rt0 = leaf; /* Double check to make sure fn is not an intermediate node * and fn->leaf does not points to its child's leaf @@ -781,14 +782,19 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, &do_rr); if (do_rr) { - struct rt6_info *next = rt0->dst.rt6_next; + struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next); /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) next = leaf; - if (next != rt0) - fn->rr_ptr = next; + if (next != rt0) { + spin_lock_bh(&leaf->rt6i_table->tb6_lock); + /* make sure next is not being deleted from the tree */ + if (next->rt6i_node) + rcu_assign_pointer(fn->rr_ptr, next); + spin_unlock_bh(&leaf->rt6i_table->tb6_lock); + } } return match ? match : net->ipv6.ip6_null_entry; @@ -878,13 +884,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { - struct fib6_node *pn; + struct fib6_node *pn, *sn; while (1) { if (fn->fn_flags & RTN_TL_ROOT) return NULL; - pn = fn->parent; - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) - fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); + pn = rcu_dereference(fn->parent); + sn = FIB6_SUBTREE(pn); + if (sn && sn != fn) + fn = fib6_lookup(sn, NULL, saddr); else fn = pn; if (fn->fn_flags & RTN_RTINFO) @@ -916,13 +923,19 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct rt6_info *rt, *rt_cache; struct fib6_node *fn; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); + rt = rcu_dereference(fn->leaf); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + } else { + rt = rt6_device_match(net, rt, &fl6->saddr, + fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6, + fl6->flowi6_oif, flags); + } if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -936,7 +949,7 @@ restart: if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); @@ -990,9 +1003,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, struct fib6_table *table; table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); err = fib6_add(&table->tb6_root, rt, info, mxc, extack); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); return err; } @@ -1090,7 +1103,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) return pcpu_rt; } -/* It should be called with read_lock_bh(&tb6_lock) acquired */ +/* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) { struct rt6_info *pcpu_rt, **p; @@ -1632,7 +1645,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; @@ -1662,7 +1675,7 @@ redo_rt6_select: rt = rt_cache; if (rt == net->ipv6.ip6_null_entry) { - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; @@ -1671,7 +1684,7 @@ redo_rt6_select: dst_use_noref(&rt->dst, jiffies); rt6_dst_from_metrics_check(rt); } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && @@ -1687,11 +1700,11 @@ redo_rt6_select: if (ip6_hold_safe(net, &rt, true)) { dst_use_noref(&rt->dst, jiffies); } else { - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); uncached_rt = rt; goto uncached_rt_out; } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); dst_release(&rt->dst); @@ -1719,7 +1732,7 @@ uncached_rt_out: pcpu_rt = rt6_get_pcpu_route(rt); if (pcpu_rt) { - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); } else { /* atomic_inc_not_zero() is needed when using rcu */ if (atomic_inc_not_zero(&rt->rt6i_ref)) { @@ -1730,12 +1743,12 @@ uncached_rt_out: * No dst_hold() on rt is needed because grabbing * rt->rt6i_ref makes sure rt can't be released. */ - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); pcpu_rt = rt6_make_pcpu_route(rt); rt6_release(rt); } else { /* rt is already removed from tree */ - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); pcpu_rt = net->ipv6.ip6_null_entry; dst_hold(&pcpu_rt->dst); } @@ -2131,10 +2144,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, * routes. */ - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt6_check_expired(rt)) continue; if (rt->dst.error) @@ -2179,7 +2192,7 @@ restart: out: ip6_hold_safe(net, &rt, true); - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; @@ -2778,9 +2791,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) } table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); out: ip6_rt_put(rt); @@ -2806,7 +2819,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) if (rt == net->ipv6.ip6_null_entry) goto out_put; table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { struct rt6_info *sibling, *next_sibling; @@ -2836,7 +2849,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) err = fib6_del(rt, info); out_unlock: - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); out_put: ip6_rt_put(rt); @@ -2861,7 +2874,7 @@ static int ip6_route_del(struct fib6_config *cfg, return err; } - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_locate(&table->tb6_root, &cfg->fc_dst, cfg->fc_dst_len, @@ -2869,7 +2882,7 @@ static int ip6_route_del(struct fib6_config *cfg, !(cfg->fc_flags & RTF_CACHE)); if (fn) { - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); @@ -2890,7 +2903,7 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (!dst_hold_safe(&rt->dst)) break; - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); /* if gateway was specified only delete the one hop */ if (cfg->fc_flags & RTF_GATEWAY) @@ -2899,7 +2912,7 @@ static int ip6_route_del(struct fib6_config *cfg, return __ip6_del_rt_siblings(rt, cfg); } } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return err; } @@ -3074,12 +3087,12 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!table) return NULL; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt->dst.dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) @@ -3090,7 +3103,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, break; } out: - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return rt; } @@ -3136,8 +3149,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev if (!table) return NULL; - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { + rcu_read_lock(); + for_each_fib6_node_rt_rcu(&table->tb6_root) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -3145,7 +3158,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev } if (rt) ip6_hold_safe(NULL, &rt, false); - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return rt; } @@ -3183,20 +3196,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table) struct rt6_info *rt; restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { + rcu_read_lock(); + for_each_fib6_node_rt_rcu(&table->tb6_root) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { if (dst_hold_safe(&rt->dst)) { - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); ip6_del_rt(rt); } else { - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); } goto restart; } } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; } From 81eb8447daae3b62247aa66bb17b82f8fef68249 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 6 Oct 2017 12:06:11 -0700 Subject: [PATCH 0672/2177] ipv6: take care of rt6_stats Currently, most of the rt6_stats are not hooked up correctly. As the last part of this patch series, hook up all existing rt6_stats and add one new stat fib_rt_uncache to indicate the number of routes in the uncached list. For details of the stats, please refer to the comments added in include/net/ip6_fib.h. Note: fib_rt_alloc and fib_rt_uncache are not guaranteed to be modified under a lock. So atomic_t is used for them. Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 15 +++++++++------ net/ipv6/ip6_fib.c | 42 ++++++++++++++++++++++++------------------ net/ipv6/route.c | 16 ++++++++++++++-- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b438b9bcb10..10c913816032 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -297,12 +297,15 @@ struct fib6_walker { }; struct rt6_statistics { - __u32 fib_nodes; - __u32 fib_route_nodes; - __u32 fib_rt_alloc; /* permanent routes */ - __u32 fib_rt_entries; /* rt entries in table */ - __u32 fib_rt_cache; /* cache routes */ - __u32 fib_discarded_routes; + __u32 fib_nodes; /* all fib6 nodes */ + __u32 fib_route_nodes; /* intermediate nodes */ + __u32 fib_rt_entries; /* rt entries in fib table */ + __u32 fib_rt_cache; /* cached rt entries in exception table */ + __u32 fib_discarded_routes; /* total number of routes delete */ + + /* The following stats are not protected by any lock */ + atomic_t fib_rt_alloc; /* total number of routes alloced */ + atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 3f95908b39c3..52a29ba32928 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -149,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static struct fib6_node *node_alloc(void) +static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); + if (fn) + net->ipv6.rt6_stats->fib_nodes++; return fn; } -static void node_free_immediate(struct fib6_node *fn) +static void node_free_immediate(struct net *net, struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); + net->ipv6.rt6_stats->fib_nodes--; } static void node_free_rcu(struct rcu_head *head) @@ -170,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head) kmem_cache_free(fib6_node_kmem, fn); } -static void node_free(struct fib6_node *fn) +static void node_free(struct net *net, struct fib6_node *fn) { call_rcu(&fn->rcu, node_free_rcu); + net->ipv6.rt6_stats->fib_nodes--; } void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) @@ -583,7 +587,8 @@ out: * node. */ -static struct fib6_node *fib6_add_1(struct fib6_table *table, +static struct fib6_node *fib6_add_1(struct net *net, + struct fib6_table *table, struct fib6_node *root, struct in6_addr *addr, int plen, int offset, int allow_create, @@ -675,7 +680,7 @@ static struct fib6_node *fib6_add_1(struct fib6_table *table, * Create new leaf node without children. */ - ln = node_alloc(); + ln = node_alloc(net); if (!ln) return ERR_PTR(-ENOMEM); @@ -716,14 +721,14 @@ insert_above: * (new leaf node)[ln] (old node)[fn] */ if (plen > bit) { - in = node_alloc(); - ln = node_alloc(); + in = node_alloc(net); + ln = node_alloc(net); if (!in || !ln) { if (in) - node_free_immediate(in); + node_free_immediate(net, in); if (ln) - node_free_immediate(ln); + node_free_immediate(net, ln); return ERR_PTR(-ENOMEM); } @@ -768,7 +773,7 @@ insert_above: * (old node)[fn] NULL */ - ln = node_alloc(); + ln = node_alloc(net); if (!ln) return ERR_PTR(-ENOMEM); @@ -1065,6 +1070,7 @@ add: fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; + info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { ins = &iter->dst.rt6_next; } @@ -1140,7 +1146,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!allow_create && !replace_required) pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); - fn = fib6_add_1(table, root, + fn = fib6_add_1(info->nl_net, table, root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, replace_required, extack); @@ -1170,7 +1176,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, */ /* Create subtree root node */ - sfn = node_alloc(); + sfn = node_alloc(info->nl_net); if (!sfn) goto failure; @@ -1181,8 +1187,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, /* Now add the first leaf node to new subtree */ - sn = fib6_add_1(table, sfn, &rt->rt6i_src.addr, - rt->rt6i_src.plen, + sn = fib6_add_1(info->nl_net, table, sfn, + &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), allow_create, replace_required, extack); @@ -1191,7 +1197,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, root, and then (in failure) stale node in main tree. */ - node_free_immediate(sfn); + node_free_immediate(info->nl_net, sfn); err = PTR_ERR(sn); goto failure; } @@ -1200,8 +1206,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, rcu_assign_pointer(sfn->parent, fn); rcu_assign_pointer(fn->subtree, sfn); } else { - sn = fib6_add_1(table, FIB6_SUBTREE(fn), &rt->rt6i_src.addr, - rt->rt6i_src.plen, + sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), + &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), allow_create, replace_required, extack); @@ -1609,7 +1615,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } read_unlock(&net->ipv6.fib6_walker_lock); - node_free(fn); + node_free(net, fn); if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf44d0994b1e..399d1bceec4a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -143,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; + struct net *net = dev_net(rt->dst.dev); spin_lock_bh(&ul->lock); list_del(&rt->rt6i_uncached); + atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); spin_unlock_bh(&ul->lock); } } @@ -359,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); - if (rt) + if (rt) { rt6_info_init(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); + } return rt; } @@ -1156,6 +1160,8 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { + struct net *net = dev_net(rt6_ex->rt6i->dst.dev); + if (!bucket || !rt6_ex) return; rt6_ex->rt6i->rt6i_node = NULL; @@ -1164,6 +1170,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; + net->ipv6.rt6_stats->fib_rt_cache--; } /* Remove oldest rt6_ex in bucket and free the memory @@ -1270,6 +1277,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, static int rt6_insert_exception(struct rt6_info *nrt, struct rt6_info *ort) { + struct net *net = dev_net(ort->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1339,6 +1347,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, nrt->rt6i_node = ort->rt6i_node; hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); bucket->depth++; + net->ipv6.rt6_stats->fib_rt_cache++; if (bucket->depth > FIB6_MAX_DEPTH) rt6_exception_remove_oldest(bucket); @@ -1714,6 +1723,7 @@ redo_rt6_select: * No need for another dst_hold() */ rt6_uncached_list_add(uncached_rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); } else { uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); @@ -1894,6 +1904,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori DST_OBSOLETE_NONE, 0); if (rt) { rt6_info_init(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); new = &rt->dst; new->__use = 1; @@ -2341,6 +2352,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, * do proper release of the net_device */ rt6_uncached_list_add(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); @@ -4422,7 +4434,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", net->ipv6.rt6_stats->fib_nodes, net->ipv6.rt6_stats->fib_route_nodes, - net->ipv6.rt6_stats->fib_rt_alloc, + atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), net->ipv6.rt6_stats->fib_rt_entries, net->ipv6.rt6_stats->fib_rt_cache, dst_entries_get_slow(&net->ipv6.ip6_dst_ops), From bdc476413dcdb5c38a7dec90fb2bca327021273a Mon Sep 17 00:00:00 2001 From: Amine Kherbouche Date: Wed, 4 Oct 2017 19:35:57 +0200 Subject: [PATCH 0673/2177] ip_tunnel: add mpls over gre support This commit introduces the MPLSoGRE support (RFC 4023), using ip tunnel API by simply adding ipgre_tunnel_encap_(add|del)_mpls_ops() and the new tunnel type TUNNEL_ENCAP_MPLS. Signed-off-by: Amine Kherbouche Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + net/mpls/af_mpls.c | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 2e520883c054..a2f48c01365e 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -84,6 +84,7 @@ enum tunnel_encap_types { TUNNEL_ENCAP_NONE, TUNNEL_ENCAP_FOU, TUNNEL_ENCAP_GUE, + TUNNEL_ENCAP_MPLS, }; #define TUNNEL_ENCAP_FLAG_CSUM (1<<0) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c5b9ce41d66f..9745e8f69810 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) #include @@ -39,6 +40,36 @@ static int one = 1; static int label_limit = (1 << 20) - 1; static int ttl_max = 255; +#if IS_ENABLED(CONFIG_NET_IP_TUNNEL) +size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) +{ + return sizeof(struct mpls_shim_hdr); +} + +static const struct ip_tunnel_encap_ops mpls_iptun_ops = { + .encap_hlen = ipgre_mpls_encap_hlen, +}; + +static int ipgre_tunnel_encap_add_mpls_ops(void) +{ + return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); +} + +static void ipgre_tunnel_encap_del_mpls_ops(void) +{ + ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); +} +#else +static int ipgre_tunnel_encap_add_mpls_ops(void) +{ + return 0; +} + +static void ipgre_tunnel_encap_del_mpls_ops(void) +{ +} +#endif + static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, unsigned int nlm_flags); @@ -2485,6 +2516,10 @@ static int __init mpls_init(void) 0); rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, mpls_netconf_dump_devconf, 0); + err = ipgre_tunnel_encap_add_mpls_ops(); + if (err) + pr_err("Can't add mpls over gre tunnel ops\n"); + err = 0; out: return err; @@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void) dev_remove_pack(&mpls_packet_type); unregister_netdevice_notifier(&mpls_dev_notifier); unregister_pernet_subsys(&mpls_net_ops); + ipgre_tunnel_encap_del_mpls_ops(); } module_exit(mpls_exit); From 97562633bcbac4a07d605ae628d7655fa71caaf5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:19 -0700 Subject: [PATCH 0674/2177] bpf: perf event change needed for subsequent bpf helpers This patch does not impact existing functionalities. It contains the changes in perf event area needed for subsequent bpf_perf_event_read_value and bpf_perf_prog_read_value helpers. Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/perf_event.h | 7 +++++-- kernel/bpf/arraymap.c | 2 +- kernel/events/core.c | 15 +++++++++++++-- kernel/trace/bpf_trace.c | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..79b18a20cf5d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -806,6 +806,7 @@ struct perf_output_handle { struct bpf_perf_event_data_kern { struct pt_regs *regs; struct perf_sample_data *data; + struct perf_event *event; }; #ifdef CONFIG_CGROUP_PERF @@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value); +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline int perf_event_read_local(struct perf_event *event, u64 *value) +static inline int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { return -EINVAL; } diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..68d866628be0 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -492,7 +492,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; - if (perf_event_read_local(event, &value) == -EOPNOTSUPP) + if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e202ae4..902149f05381 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event *event) * will not be local and we cannot read them atomically * - must not have a pmu::count method */ -int perf_event_read_local(struct perf_event *event, u64 *value) +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { unsigned long flags; int ret = 0; + u64 now; /* * Disabling interrupts avoids all counter scheduling (context @@ -3718,13 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64 *value) goto out; } + now = event->shadow_ctx_time + perf_clock(); + if (enabled) + *enabled = now - event->tstamp_enabled; /* * If the event is currently on this CPU, its either a per-task event, * or local to this CPU. Furthermore it means its ACTIVE (otherwise * oncpu == -1). */ - if (event->oncpu == smp_processor_id()) + if (event->oncpu == smp_processor_id()) { event->pmu->read(event); + if (running) + *running = now - event->tstamp_running; + } else if (running) { + *running = event->total_time_running; + } *value = local64_read(&event->count); out: @@ -8072,6 +8082,7 @@ static void bpf_overflow_handler(struct perf_event *event, struct bpf_perf_event_data_kern ctx = { .data = data, .regs = regs, + .event = event, }; int ret = 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b605d5d..95888ae6c263 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -275,7 +275,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - err = perf_event_read_local(ee->event, &value); + err = perf_event_read_local(ee->event, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi From 908432ca84fc229e906ba164219e9ad0fe56f755 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:20 -0700 Subject: [PATCH 0675/2177] bpf: add helper bpf_perf_event_read_value for perf event array map Hardware pmu counters are limited resources. When there are more pmu based perf events opened than available counters, kernel will multiplex these events so each event gets certain percentage (but not 100%) of the pmu time. In case that multiplexing happens, the number of samples or counter value will not reflect the case compared to no multiplexing. This makes comparison between different runs difficult. Typically, the number of samples or counter value should be normalized before comparing to other experiments. The typical normalization is done like: normalized_num_samples = num_samples * time_enabled / time_running normalized_counter_value = counter_value * time_enabled / time_running where time_enabled is the time enabled for event and time_running is the time running for event since last normalization. This patch adds helper bpf_perf_event_read_value for kprobed based perf event array map, to read perf counter and enabled/running time. The enabled/running time is accumulated since the perf event open. To achieve scaling factor between two bpf invocations, users can can use cpu_id as the key (which is typical for perf array usage model) to remember the previous value and do the calculation inside the bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 21 +++++++++++++++++-- kernel/bpf/verifier.c | 4 +++- kernel/trace/bpf_trace.c | 45 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6082faf5fd2a..7b57a212c7d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -641,6 +641,14 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: An positive/negative integer to be added to xdp_md.data_meta * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -697,7 +705,8 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ - FN(xdp_adjust_meta), + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -741,7 +750,9 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK /* BPF_FUNC_perf_event_output for sk_buff input context. */ @@ -934,4 +945,10 @@ enum { #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52b022310f6a..590125e29161 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1552,7 +1552,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) break; case BPF_MAP_TYPE_PERF_EVENT_ARRAY: if (func_id != BPF_FUNC_perf_event_read && - func_id != BPF_FUNC_perf_event_output) + func_id != BPF_FUNC_perf_event_output && + func_id != BPF_FUNC_perf_event_read_value) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -1595,6 +1596,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) break; case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: + case BPF_FUNC_perf_event_read_value: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 95888ae6c263..0be86cc0130e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -255,14 +255,14 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) +static __always_inline int +get_map_perf_counter(struct bpf_map *map, u64 flags, + u64 *value, u64 *enabled, u64 *running) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; - u64 value = 0; - int err; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; @@ -275,7 +275,15 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - err = perf_event_read_local(ee->event, &value, NULL, NULL); + return perf_event_read_local(ee->event, value, enabled, running); +} + +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) +{ + u64 value = 0; + int err; + + err = get_map_perf_counter(map, flags, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi @@ -293,6 +301,33 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, + struct bpf_perf_event_value *, buf, u32, size) +{ + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_perf_event_value))) + goto clear; + err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, + &buf->running); + if (unlikely(err)) + goto clear; + return 0; +clear: + memset(buf, 0, size); + return err; +} + +static const struct bpf_func_proto bpf_perf_event_read_value_proto = { + .func = bpf_perf_event_read_value, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); static __always_inline u64 @@ -499,6 +534,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; + case BPF_FUNC_perf_event_read_value: + return &bpf_perf_event_read_value_proto; default: return tracing_func_proto(func_id); } From 020a32d9581ac824d038b0b4e24e977e3cc8589f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:21 -0700 Subject: [PATCH 0676/2177] bpf: add a test case for helper bpf_perf_event_read_value The bpf sample program tracex6 is enhanced to use the new helper to read enabled/running time as well. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/tracex6_kern.c | 26 +++++++++++++++++++++++ samples/bpf/tracex6_user.c | 13 +++++++++++- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 3 +++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index e7d180305974..46c557afac73 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -15,6 +15,12 @@ struct bpf_map_def SEC("maps") values = { .value_size = sizeof(u64), .max_entries = 64, }; +struct bpf_map_def SEC("maps") values2 = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_perf_event_value), + .max_entries = 64, +}; SEC("kprobe/htab_map_get_next_key") int bpf_prog1(struct pt_regs *ctx) @@ -37,5 +43,25 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } +SEC("kprobe/htab_map_lookup_elem") +int bpf_prog2(struct pt_regs *ctx) +{ + u32 key = bpf_get_smp_processor_id(); + struct bpf_perf_event_value *val, buf; + int error; + + error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf)); + if (error) + return 0; + + val = bpf_map_lookup_elem(&values2, &key); + if (val) + *val = buf; + else + bpf_map_update_elem(&values2, &key, &buf, BPF_NOEXIST); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index a05a99a0752f..3341a96fc046 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -22,6 +22,7 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr) { + struct bpf_perf_event_value value2; int pmu_fd, error = 0; cpu_set_t set; __u64 value; @@ -46,8 +47,18 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr) fprintf(stderr, "Value missing for CPU %d\n", cpu); error = 1; goto on_exit; + } else { + fprintf(stderr, "CPU %d: %llu\n", cpu, value); + } + /* The above bpf_map_lookup_elem should trigger the second kprobe */ + if (bpf_map_lookup_elem(map_fd[2], &cpu, &value2)) { + fprintf(stderr, "Value2 missing for CPU %d\n", cpu); + error = 1; + goto on_exit; + } else { + fprintf(stderr, "CPU %d: counter: %llu, enabled: %llu, running: %llu\n", cpu, + value2.counter, value2.enabled, value2.running); } - fprintf(stderr, "CPU %d: %llu\n", cpu, value); on_exit: assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cb2b9f95160a..cdf6c4f50b0f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -697,7 +697,8 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ - FN(xdp_adjust_meta), + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index a56053db26f5..c15ca83dbbd9 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -72,6 +72,9 @@ static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = (void *) BPF_FUNC_sock_map_update; +static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, + void *buf, unsigned int buf_size) = + (void *) BPF_FUNC_perf_event_read_value; /* llvm builtin functions that eBPF C program may use to From 4bebdc7a85aa400c0222b5329861e4ad9252f1e5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:22 -0700 Subject: [PATCH 0677/2177] bpf: add helper bpf_perf_prog_read_value This patch adds helper bpf_perf_prog_read_cvalue for perf event based bpf programs, to read event counter and enabled/running time. The enabled/running time is accumulated since the perf event open. The typical use case for perf event based bpf program is to attach itself to a single event. In such cases, if it is desirable to get scaling factor between two bpf invocations, users can can save the time values in a map, and use the value from the map and the current value to calculate the scaling factor. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 +++++++++- kernel/trace/bpf_trace.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7b57a212c7d7..5bbbec17aa5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -649,6 +649,13 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -706,7 +713,8 @@ union bpf_attr { FN(sk_redirect_map), \ FN(sock_map_update), \ FN(xdp_adjust_meta), \ - FN(perf_event_read_value), + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0be86cc0130e..04ea5314f2bc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -613,6 +613,32 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, + struct bpf_perf_event_value *, buf, u32, size) +{ + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_perf_event_value))) + goto clear; + err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, + &buf->running); + if (unlikely(err)) + goto clear; + return 0; +clear: + memset(buf, 0, size); + return err; +} + +static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { + .func = bpf_perf_prog_read_value_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -620,6 +646,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; + case BPF_FUNC_perf_prog_read_value: + return &bpf_perf_prog_read_value_proto_tp; default: return tracing_func_proto(func_id); } From 81b9cf8028a17bdbdaa0da80b735b32150d4e89e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:23 -0700 Subject: [PATCH 0678/2177] bpf: add a test case for helper bpf_perf_prog_read_value The bpf sample program trace_event is enhanced to use the new helper to print out enabled/running time. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/trace_event_kern.c | 10 ++++++++++ samples/bpf/trace_event_user.c | 13 ++++++++----- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 3 +++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c index 41b6115a32eb..a77a583d94d4 100644 --- a/samples/bpf/trace_event_kern.c +++ b/samples/bpf/trace_event_kern.c @@ -37,10 +37,14 @@ struct bpf_map_def SEC("maps") stackmap = { SEC("perf_event") int bpf_prog1(struct bpf_perf_event_data *ctx) { + char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu"; + char time_fmt2[] = "Get Time Failed, ErrCode: %d"; char fmt[] = "CPU-%d period %lld ip %llx"; u32 cpu = bpf_get_smp_processor_id(); + struct bpf_perf_event_value value_buf; struct key_t key; u64 *val, one = 1; + int ret; if (ctx->sample_period < 10000) /* ignore warmup */ @@ -54,6 +58,12 @@ int bpf_prog1(struct bpf_perf_event_data *ctx) return 0; } + ret = bpf_perf_prog_read_value(ctx, (void *)&value_buf, sizeof(struct bpf_perf_event_value)); + if (!ret) + bpf_trace_printk(time_fmt1, sizeof(time_fmt1), value_buf.enabled, value_buf.running); + else + bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret); + val = bpf_map_lookup_elem(&counts, &key); if (val) (*val)++; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 7bd827b84a67..bf4f1b6d9a52 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -127,6 +127,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) int *pmu_fd = malloc(nr_cpus * sizeof(int)); int i, error = 0; + /* system wide perf event, no need to inherit */ + attr->inherit = 0; + /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); @@ -154,6 +157,11 @@ static void test_perf_event_task(struct perf_event_attr *attr) { int pmu_fd; + /* per task perf event, enable inherit so the "dd ..." command can be traced properly. + * Enabling inherit will cause bpf_perf_prog_read_time helper failure. + */ + attr->inherit = 1; + /* open task bound event */ pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { @@ -175,14 +183,12 @@ static void test_bpf_perf_event(void) .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, - .inherit = 1, }; struct perf_event_attr attr_type_sw = { .sample_freq = SAMPLE_FREQ, .freq = 1, .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, - .inherit = 1, }; struct perf_event_attr attr_hw_cache_l1d = { .sample_freq = SAMPLE_FREQ, @@ -192,7 +198,6 @@ static void test_bpf_perf_event(void) PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - .inherit = 1, }; struct perf_event_attr attr_hw_cache_branch_miss = { .sample_freq = SAMPLE_FREQ, @@ -202,7 +207,6 @@ static void test_bpf_perf_event(void) PERF_COUNT_HW_CACHE_BPU | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), - .inherit = 1, }; struct perf_event_attr attr_type_raw = { .sample_freq = SAMPLE_FREQ, @@ -210,7 +214,6 @@ static void test_bpf_perf_event(void) .type = PERF_TYPE_RAW, /* Intel Instruction Retired */ .config = 0xc0, - .inherit = 1, }; printf("Test HW_CPU_CYCLES\n"); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cdf6c4f50b0f..0894fd20b12b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -698,7 +698,8 @@ union bpf_attr { FN(sk_redirect_map), \ FN(sock_map_update), \ FN(xdp_adjust_meta), \ - FN(perf_event_read_value), + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c15ca83dbbd9..e25dbf6038cf 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -75,6 +75,9 @@ static int (*bpf_sock_map_update)(void *map, void *key, void *value, static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_event_read_value; +static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, + unsigned int buf_size) = + (void *) BPF_FUNC_perf_prog_read_value; /* llvm builtin functions that eBPF C program may use to From 64237470ddf97b63155fbd272c9e743e01d5f514 Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 01:37:29 +0800 Subject: [PATCH 0679/2177] net: phonet: mark header_ops as const Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 2 +- net/phonet/af_phonet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index bbcdb0a767d8..a118ee4a8428 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -10,5 +10,5 @@ #include -extern struct header_ops phonet_header_ops; +extern const struct header_ops phonet_header_ops; #endif diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index f925753668a7..b12142e55d19 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) return 1; } -struct header_ops phonet_header_ops = { +const struct header_ops phonet_header_ops = { .create = pn_header_create, .parse = pn_header_parse, }; From 548ec114705bb8f0879a0da12abec17f17a7cc26 Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 01:40:35 +0800 Subject: [PATCH 0680/2177] net: phonet: mark phonet_protocol as const The phonet_protocol structs don't need to be written by anyone and so can be marked as const. Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 6 ++++-- net/phonet/af_phonet.c | 15 ++++++++------- net/phonet/datagram.c | 2 +- net/phonet/pep.c | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 039cc29cb4a8..51e1a2a45d02 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -108,8 +108,10 @@ struct phonet_protocol { int sock_type; }; -int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp); -void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp); +int phonet_proto_register(unsigned int protocol, + const struct phonet_protocol *pp); +void phonet_proto_unregister(unsigned int protocol, + const struct phonet_protocol *pp); int phonet_sysctl_init(void); void phonet_sysctl_exit(void); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index b12142e55d19..3b0ef691f5b1 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -35,11 +35,11 @@ #include /* Transport protocol registration */ -static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static struct phonet_protocol *phonet_proto_get(unsigned int protocol) +static const struct phonet_protocol *phonet_proto_get(unsigned int protocol) { - struct phonet_protocol *pp; + const struct phonet_protocol *pp; if (protocol >= PHONET_NPROTO) return NULL; @@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol) return pp; } -static inline void phonet_proto_put(struct phonet_protocol *pp) +static inline void phonet_proto_put(const struct phonet_protocol *pp) { module_put(pp->prot->owner); } @@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; struct pn_sock *pn; - struct phonet_protocol *pnp; + const struct phonet_protocol *pnp; int err; if (!capable(CAP_SYS_ADMIN)) @@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = { static DEFINE_MUTEX(proto_tab_lock); int __init_or_module phonet_proto_register(unsigned int protocol, - struct phonet_protocol *pp) + const struct phonet_protocol *pp) { int err = 0; @@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol, } EXPORT_SYMBOL(phonet_proto_register); -void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) +void phonet_proto_unregister(unsigned int protocol, + const struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 5e710435ffa9..b44fb9018fb8 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -195,7 +195,7 @@ static struct proto pn_proto = { .name = "PHONET", }; -static struct phonet_protocol pn_dgram_proto = { +static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e81537991ddf..9fc76b19cd3c 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1351,7 +1351,7 @@ static struct proto pep_proto = { .name = "PNPIPE", }; -static struct phonet_protocol pep_pn_proto = { +static const struct phonet_protocol pep_pn_proto = { .ops = &phonet_stream_ops, .prot = &pep_proto, .sock_type = SOCK_SEQPACKET, From f192970de860d3ab90aa9e2a22853201a57bde78 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 5 Oct 2017 12:07:12 -0700 Subject: [PATCH 0681/2177] ip_gre: check packet length and mtu correctly in erspan tx Similarly to early patch for erspan_xmit(), the ARPHDR_ETHER device is the length of the whole ether packet. So skb->len should subtract the dev->hard_header_len. Fixes: 1a66a836da63 ("gre: add collect_md mode to ERSPAN tunnel") Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: William Tu Cc: Xin Long Cc: David Laight Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index dc2317776499..c105a315b1a3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (gre_handle_offloads(skb, false)) goto err_free_rt; - if (skb->len > dev->mtu) { - pskb_trim(skb, dev->mtu); + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); truncate = true; } @@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; - if (skb->len - dev->hard_header_len > dev->mtu) { - pskb_trim(skb, dev->mtu); + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); truncate = true; } From 473d97343f94ff20f5196078314e4dd83156d3a2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:11 -0700 Subject: [PATCH 0682/2177] bpf: Change bpf_obj_name_cpy() to better ensure map's name is init by 0 During get_info_by_fd, the prog/map name is memcpy-ed. It depends on the prog->aux->name and map->name to be zero initialized. bpf_prog_aux is easy to guarantee that aux->name is zero init. The name in bpf_map may be harder to be guaranteed in the future when new map type is added. Hence, this patch makes bpf_obj_name_cpy() to always zero init the prog/map name. Suggested-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0048cb24ba7b..d124e702e040 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -322,6 +322,8 @@ static int bpf_obj_name_cpy(char *dst, const char *src) { const char *end = src + BPF_OBJ_NAME_LEN; + memset(dst, 0, BPF_OBJ_NAME_LEN); + /* Copy all isalnum() and '_' char */ while (src < end && *src) { if (!isalnum(*src) && *src != '_') @@ -333,9 +335,6 @@ static int bpf_obj_name_cpy(char *dst, const char *src) if (src == end) return -EINVAL; - /* '\0' terminates dst */ - *dst = 0; - return 0; } From 067cae47771c864604969fd902efe10916e0d79c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:12 -0700 Subject: [PATCH 0683/2177] bpf: Use char in prog and map name Instead of u8, use char for prog and map name. It can avoid the userspace tool getting compiler's signess warning. The bpf_prog_aux, bpf_map, bpf_attr, bpf_prog_info and bpf_map_info are changed. Signed-off-by: Martin KaFai Lau Cc: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++-- include/uapi/linux/bpf.h | 8 ++++---- tools/include/uapi/linux/bpf.h | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a67daea731ab..bc7da2ddfcaf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ @@ -189,7 +189,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5bbbec17aa5a..6db9e1d679cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,7 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ - __u8 map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -253,7 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; - __u8 prog_name[BPF_OBJ_NAME_LEN]; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -888,7 +888,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -898,7 +898,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0894fd20b12b..fb4fb81ce5b0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -230,7 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ - __u8 map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -253,7 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; - __u8 prog_name[BPF_OBJ_NAME_LEN]; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -871,7 +871,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -881,7 +881,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops From 368211fb920a0b789c238942c6af0414539f79d6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:13 -0700 Subject: [PATCH 0684/2177] bpf: Append prog->aux->name in bpf_get_prog_name() This patch makes the bpf_prog's name available in kallsyms. The new format is bpf_prog_tag[_name]. Sample kallsyms from running selftests/bpf/test_progs: [root@arch-fb-vm1 ~]# egrep ' bpf_prog_[0-9a-fA-F]{16}' /proc/kallsyms ffffffffa0048000 t bpf_prog_dabf0207d1992486_test_obj_id ffffffffa0038000 t bpf_prog_a04f5eef06a7f555__123456789ABCDE ffffffffa0050000 t bpf_prog_a04f5eef06a7f555 Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c6be15ae83ee..248961af2421 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -309,12 +309,25 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { + const char *end = sym + KSYM_NAME_LEN; + BUILD_BUG_ON(sizeof("bpf_prog_") + - sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN); + sizeof(prog->tag) * 2 + + /* name has been null terminated. + * We should need +1 for the '_' preceding + * the name. However, the null character + * is double counted between the name and the + * sizeof("bpf_prog_") above, so we omit + * the +1 here. + */ + sizeof(prog->aux->name) > KSYM_NAME_LEN); sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); - *sym = 0; + if (prog->aux->name[0]) + snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); + else + *sym = 0; } static __always_inline unsigned long From c1f2c6d025d365d961dcdf555ecf0f60066f38a3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 8 Oct 2017 11:57:55 +0200 Subject: [PATCH 0685/2177] mlxsw: spectrum: Add extack for VLAN enslavements Similar to physical ports, enslavement of VLAN devices can also fail. Use extack to indicate why the enslavement failed. $ ip link add link enp1s0np1 name enp1s0np1.10 type vlan id 10 $ ip link add name bond0 type bond mode 802.3ad $ ip link set dev enp1s0np1.10 master bond0 Error: spectrum: VLAN devices only support bridge and VRF uppers. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5cd4df08ce97..5ab4fd74a325 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -4389,18 +4390,25 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; struct net_device *upper_dev; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!netif_is_bridge_master(upper_dev)) + if (!netif_is_bridge_master(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; From 9b63ef88d3a16d67afbe7916625289650e9f30d9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 8 Oct 2017 11:57:56 +0200 Subject: [PATCH 0686/2177] mlxsw: spectrum: Propagate extack further for bridge enslavements The code that actually takes care of bridge offload introduces a few more non-trivial constraints with regards to bridge enslavements. Propagate extack there to indicate the reason. $ ip link add link enp1s0np1 name enp1s0np1.10 type vlan id 10 $ ip link add link enp1s0np1 name enp1s0np1.20 type vlan id 20 $ ip link add name br0 type bridge $ ip link set dev enp1s0np1.10 master br0 $ ip link set dev enp1s0np1.20 master br0 Error: spectrum: Can not bridge VLAN uppers of the same port. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 6 +++-- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++- .../mellanox/mlxsw/spectrum_switchdev.c | 25 +++++++++++++------ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5ab4fd74a325..321988ac57cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4299,7 +4299,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lower_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lower_dev, @@ -4416,7 +4417,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, vlan_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, vlan_dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ae67e6046098..8e45183dc9bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -326,7 +326,8 @@ void mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev); + struct net_device *br_dev, + struct netlink_ext_ack *extack); void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0f9eac5f4ebf..2cfdf22a145f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include "spectrum.h" @@ -107,7 +108,8 @@ struct mlxsw_sp_bridge_vlan { struct mlxsw_sp_bridge_ops { int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); @@ -1735,12 +1737,15 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { static int mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (is_vlan_dev(bridge_port->dev)) + if (is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; + } mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); if (WARN_ON(!mlxsw_sp_port_vlan)) @@ -1797,13 +1802,16 @@ mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; u16 vid; - if (!is_vlan_dev(bridge_port->dev)) + if (!is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); return -EINVAL; + } vid = vlan_dev_vlan_id(bridge_port->dev); mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -1811,7 +1819,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, return -EINVAL; if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { - netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n"); + NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); return -EINVAL; } @@ -1854,7 +1862,8 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; @@ -1867,7 +1876,7 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, bridge_device = bridge_port->bridge_device; err = bridge_device->ops->port_join(bridge_device, bridge_port, - mlxsw_sp_port); + mlxsw_sp_port, extack); if (err) goto err_port_join; From 486e3981057cacdafd62ba0618612193ff12d1dd Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 6 Oct 2017 08:33:57 -0700 Subject: [PATCH 0687/2177] hv_netvsc: Change the hash level variable to bit flags This simplifies the logic and make it easier to add more options. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 11 ++++- drivers/net/hyperv/netvsc_drv.c | 73 ++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 6f550e15a41c..a81335e8ebe8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -704,6 +704,14 @@ struct netvsc_reconfig { u32 event; }; +/* L4 hash bits for different protocols */ +#define HV_TCP4_L4HASH 1 +#define HV_TCP6_L4HASH 2 +#define HV_UDP4_L4HASH 4 +#define HV_UDP6_L4HASH 8 +#define HV_DEFAULT_L4HASH (HV_TCP4_L4HASH | HV_TCP6_L4HASH | HV_UDP4_L4HASH | \ + HV_UDP6_L4HASH) + /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -726,10 +734,9 @@ struct net_device_context { u32 tx_send_table[VRSS_SEND_TAB_SIZE]; /* Ethtool settings */ - bool udp4_l4_hash; - bool udp6_l4_hash; u8 duplex; u32 speed; + u32 l4_hash; /* L4 hash settings */ struct netvsc_ethtool_stats eth_stats; /* State to manage the associated VF interface. */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dfb986421ec6..9bc7dbab9506 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -203,7 +203,7 @@ static inline u32 netvsc_get_hash( const struct net_device_context *ndc) { struct flow_keys flow; - u32 hash; + u32 hash, pkt_proto = 0; static u32 hashrnd __read_mostly; net_get_random_once(&hashrnd, sizeof(hashrnd)); @@ -211,11 +211,25 @@ static inline u32 netvsc_get_hash( if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) return 0; - if (flow.basic.ip_proto == IPPROTO_TCP || - (flow.basic.ip_proto == IPPROTO_UDP && - ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) || - (flow.basic.n_proto == htons(ETH_P_IPV6) && - ndc->udp6_l4_hash)))) { + switch (flow.basic.ip_proto) { + case IPPROTO_TCP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_TCP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_TCP6_L4HASH; + + break; + + case IPPROTO_UDP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_UDP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_UDP6_L4HASH; + + break; + } + + if (pkt_proto & ndc->l4_hash) { return skb_get_hash(skb); } else { if (flow.basic.n_proto == htons(ETH_P_IP)) @@ -898,8 +912,7 @@ static void netvsc_init_settings(struct net_device *dev) { struct net_device_context *ndc = netdev_priv(dev); - ndc->udp4_l4_hash = true; - ndc->udp6_l4_hash = true; + ndc->l4_hash = HV_DEFAULT_L4HASH; ndc->speed = SPEED_UNKNOWN; ndc->duplex = DUPLEX_FULL; @@ -1245,23 +1258,25 @@ static int netvsc_get_rss_hash_opts(struct net_device_context *ndc, struct ethtool_rxnfc *info) { + const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data = RXH_IP_SRC | RXH_IP_DST; switch (info->flow_type) { case TCP_V4_FLOW: case TCP_V6_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= l4_flag; break; case UDP_V4_FLOW: - if (ndc->udp4_l4_hash) - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + if (ndc->l4_hash & HV_UDP4_L4HASH) + info->data |= l4_flag; break; case UDP_V6_FLOW: - if (ndc->udp6_l4_hash) - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + if (ndc->l4_hash & HV_UDP6_L4HASH) + info->data |= l4_flag; break; @@ -1302,23 +1317,35 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, { if (info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - if (info->flow_type == UDP_V4_FLOW) - ndc->udp4_l4_hash = true; - else if (info->flow_type == UDP_V6_FLOW) - ndc->udp6_l4_hash = true; - else + switch (info->flow_type) { + case UDP_V4_FLOW: + ndc->l4_hash |= HV_UDP4_L4HASH; + break; + + case UDP_V6_FLOW: + ndc->l4_hash |= HV_UDP6_L4HASH; + break; + + default: return -EOPNOTSUPP; + } return 0; } if (info->data == (RXH_IP_SRC | RXH_IP_DST)) { - if (info->flow_type == UDP_V4_FLOW) - ndc->udp4_l4_hash = false; - else if (info->flow_type == UDP_V6_FLOW) - ndc->udp6_l4_hash = false; - else + switch (info->flow_type) { + case UDP_V4_FLOW: + ndc->l4_hash &= ~HV_UDP4_L4HASH; + break; + + case UDP_V6_FLOW: + ndc->l4_hash &= ~HV_UDP6_L4HASH; + break; + + default: return -EOPNOTSUPP; + } return 0; } From 0518ec4f9d8804a9b3ab4306b4b10828f35f715b Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 6 Oct 2017 08:33:58 -0700 Subject: [PATCH 0688/2177] hv_netvsc: Add ethtool handler to set and get TCP hash levels The patch supports the options to switch TCP hash level between L3 and L4 by ethtool command. TCP over IPv4 and v6 can be set differently. The default hash level is L4. We currently only allow switching TX hash level from within the guests. For example, for TCP over IPv4 on eth0: To include TCP port numbers in hashing: ethtool -N eth0 rx-flow-hash tcp4 sdfn To exclude TCP port numbers in hashing: ethtool -N eth0 rx-flow-hash tcp4 sd To show TCP hash level: ethtool -n eth0 rx-flow-hash tcp4 Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9bc7dbab9506..44746de3dd4c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1264,8 +1264,15 @@ netvsc_get_rss_hash_opts(struct net_device_context *ndc, switch (info->flow_type) { case TCP_V4_FLOW: + if (ndc->l4_hash & HV_TCP4_L4HASH) + info->data |= l4_flag; + + break; + case TCP_V6_FLOW: - info->data |= l4_flag; + if (ndc->l4_hash & HV_TCP6_L4HASH) + info->data |= l4_flag; + break; case UDP_V4_FLOW: @@ -1318,6 +1325,14 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, if (info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) { switch (info->flow_type) { + case TCP_V4_FLOW: + ndc->l4_hash |= HV_TCP4_L4HASH; + break; + + case TCP_V6_FLOW: + ndc->l4_hash |= HV_TCP6_L4HASH; + break; + case UDP_V4_FLOW: ndc->l4_hash |= HV_UDP4_L4HASH; break; @@ -1335,6 +1350,14 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, if (info->data == (RXH_IP_SRC | RXH_IP_DST)) { switch (info->flow_type) { + case TCP_V4_FLOW: + ndc->l4_hash &= ~HV_TCP4_L4HASH; + break; + + case TCP_V6_FLOW: + ndc->l4_hash &= ~HV_TCP6_L4HASH; + break; + case UDP_V4_FLOW: ndc->l4_hash &= ~HV_UDP4_L4HASH; break; From 78005d91c11ea45a93e68c7d59079784f4c46828 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 6 Oct 2017 08:33:59 -0700 Subject: [PATCH 0689/2177] hv_netvsc: Update netvsc Document for TCP hash level setting Update Documentation/networking/netvsc.txt for TCP hash level setting and related info. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- Documentation/networking/netvsc.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt index 93560fb1170a..92f5b31392fa 100644 --- a/Documentation/networking/netvsc.txt +++ b/Documentation/networking/netvsc.txt @@ -19,12 +19,12 @@ Features Receive Side Scaling -------------------- - Hyper-V supports receive side scaling. For TCP, packets are - distributed among available queues based on IP address and port + Hyper-V supports receive side scaling. For TCP & UDP, packets can + be distributed among available queues based on IP address and port number. - For UDP, we can switch UDP hash level between L3 and L4 by ethtool - command. UDP over IPv4 and v6 can be set differently. The default + For TCP & UDP, we can switch hash level between L3 and L4 by ethtool + command. TCP/UDP over IPv4 and v6 can be set differently. The default hash level is L4. We currently only allow switching TX hash level from within the guests. From 951f788a80ff8b6339c5c1ab888b0d4b4352efd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Oct 2017 21:07:18 -0700 Subject: [PATCH 0690/2177] ipv6: fix a BUG in rt6_get_pcpu_route() Ido reported following splat and provided a patch. [ 122.221814] BUG: using smp_processor_id() in preemptible [00000000] code: sshd/2672 [ 122.221845] caller is debug_smp_processor_id+0x17/0x20 [ 122.221866] CPU: 0 PID: 2672 Comm: sshd Not tainted 4.14.0-rc3-idosch-next-custom #639 [ 122.221880] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 122.221893] Call Trace: [ 122.221919] dump_stack+0xb1/0x10c [ 122.221946] ? _atomic_dec_and_lock+0x124/0x124 [ 122.221974] ? ___ratelimit+0xfe/0x240 [ 122.222020] check_preemption_disabled+0x173/0x1b0 [ 122.222060] debug_smp_processor_id+0x17/0x20 [ 122.222083] ip6_pol_route+0x1482/0x24a0 ... I believe we can simplify this code path a bit, since we no longer hold a read_lock and need to release it to avoid a dead lock. By disabling BH, we make sure we'll prevent code re-entry and rt6_get_pcpu_route()/rt6_make_pcpu_route() run on the same cpu. Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Reported-by: Ido Schimmel Signed-off-by: Eric Dumazet Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv6/route.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 399d1bceec4a..606e80325b21 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1136,15 +1136,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) dst_hold(&pcpu_rt->dst); p = this_cpu_ptr(rt->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ - /* release refcnt taken by ip6_rt_pcpu_alloc() */ - dst_release_immediate(&pcpu_rt->dst); - /* release refcnt taken by above dst_hold() */ - dst_release_immediate(&pcpu_rt->dst); - dst_hold(&prev->dst); - pcpu_rt = prev; - } + BUG_ON(prev); rt6_dst_from_metrics_check(pcpu_rt); return pcpu_rt; @@ -1739,31 +1731,25 @@ uncached_rt_out: struct rt6_info *pcpu_rt; dst_use_noref(&rt->dst, jiffies); + local_bh_disable(); pcpu_rt = rt6_get_pcpu_route(rt); - if (pcpu_rt) { - rcu_read_unlock(); - } else { + if (!pcpu_rt) { /* atomic_inc_not_zero() is needed when using rcu */ if (atomic_inc_not_zero(&rt->rt6i_ref)) { - /* We have to do the read_unlock first - * because rt6_make_pcpu_route() may trigger - * ip6_dst_gc() which will take the write_lock. - * - * No dst_hold() on rt is needed because grabbing + /* No dst_hold() on rt is needed because grabbing * rt->rt6i_ref makes sure rt can't be released. */ - rcu_read_unlock(); pcpu_rt = rt6_make_pcpu_route(rt); rt6_release(rt); } else { /* rt is already removed from tree */ - rcu_read_unlock(); pcpu_rt = net->ipv6.ip6_null_entry; dst_hold(&pcpu_rt->dst); } } - + local_bh_enable(); + rcu_read_unlock(); trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; } From 821f1b21cabb46827ce39ddf82e2789680b5042a Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 6 Oct 2017 22:12:37 -0700 Subject: [PATCH 0691/2177] bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood This patch adds a new bridge port flag BR_NEIGH_SUPPRESS to suppress arp and nd flood on bridge ports. It implements rfc7432, section 10. https://tools.ietf.org/html/rfc7432#section-10 for ethernet VPN deployments. It is similar to the existing BR_PROXYARP* flags but has a few semantic differences to conform to EVPN standard. Unlike the existing flags, this new flag suppresses flood of all neigh discovery packets (arp and nd) to tunnel ports. Supports both vlan filtering and non-vlan filtering bridges. In case of EVPN, it is mainly used to avoid flooding of arp and nd packets to tunnel ports like vxlan. This patch adds netlink and sysfs support to set this bridge port flag. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + net/bridge/Makefile | 2 +- net/bridge/br_arp_nd_proxy.c | 32 ++++++++++++++++++++++++++++++++ net/bridge/br_forward.c | 2 +- net/bridge/br_if.c | 5 +++++ net/bridge/br_netlink.c | 10 +++++++++- net/bridge/br_private.h | 2 ++ net/bridge/br_sysfs_if.c | 2 ++ 9 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 net/bridge/br_arp_nd_proxy.c diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3cd18ac0697f..316ee113a220 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -49,6 +49,7 @@ struct br_ip_list { #define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_VLAN_TUNNEL BIT(13) #define BR_BCAST_FLOOD BIT(14) +#define BR_NEIGH_SUPPRESS BIT(15) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cd580fc0e58f..b037e0ab1975 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -327,6 +327,7 @@ enum { IFLA_BRPORT_VLAN_TUNNEL, IFLA_BRPORT_BCAST_FLOOD, IFLA_BRPORT_GROUP_FWD_MASK, + IFLA_BRPORT_NEIGH_SUPPRESS, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 40b1ede527ca..4aee55fdcc92 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_ioctl.o br_stp.o br_stp_bpdu.o \ br_stp_if.o br_stp_timer.o br_netlink.o \ - br_netlink_tunnel.o + br_netlink_tunnel.o br_arp_nd_proxy.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c new file mode 100644 index 000000000000..f889ad5f0048 --- /dev/null +++ b/net/bridge/br_arp_nd_proxy.c @@ -0,0 +1,32 @@ +/* + * Handle bridge arp/nd proxy/suppress + * + * Copyright (C) 2017 Cumulus Networks + * Copyright (c) 2017 Roopa Prabhu + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "br_private.h" + +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool neigh_suppress = false; + + list_for_each_entry(p, &br->port_list, list) { + if (p->flags & BR_NEIGH_SUPPRESS) { + neigh_suppress = true; + break; + } + } + + br->neigh_suppress_enabled = neigh_suppress; +} diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48fb17417fac..b4eed113d2ec 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; - if ((p->flags & BR_PROXYARP_WIFI) && + if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) && BR_INPUT_SKB_CB(skb)->proxyarp_replied) continue; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 59a74a414e20..ae38547bbf91 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_recalculate_neigh_suppress_enabled(br); + br_fdb_delete_by_port(br, NULL, 0, 1); cancel_delayed_work_sync(&br->gc_work); @@ -660,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) if (mask & BR_AUTO_MASK) nbp_update_port_count(br); + + if (mask & BR_NEIGH_SUPPRESS) + br_recalculate_neigh_suppress_enabled(br); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dea88a255d26..f0e82682e071 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -210,7 +211,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & BR_VLAN_TUNNEL)) || - nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask)) + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) || + nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS, + !!(p->flags & BR_NEIGH_SUPPRESS))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -785,6 +788,11 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) p->group_fwd_mask = fwd_mask; } + err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, + BR_NEIGH_SUPPRESS); + if (err) + return err; + br_port_flags_change(p, old_flags ^ p->flags); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ab4df24f7bba..00fa371b1fb2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -404,6 +404,7 @@ struct net_bridge { #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif + bool neigh_suppress_enabled; }; struct br_input_skb_cb { @@ -1139,4 +1140,5 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) } #endif /* CONFIG_NET_SWITCHDEV */ +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); #endif diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 9110d5e56085..0a1fa9ccd8b7 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -191,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); +BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -241,6 +242,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_flood, &brport_attr_broadcast_flood, &brport_attr_group_fwd_mask, + &brport_attr_neigh_suppress, NULL }; From 057658cb33fbf4d4309f01fe8845903b1cd07fad Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 6 Oct 2017 22:12:38 -0700 Subject: [PATCH 0692/2177] bridge: suppress arp pkts on BR_NEIGH_SUPPRESS ports This patch avoids flooding and proxies arp packets for BR_NEIGH_SUPPRESS ports. Moves existing br_do_proxy_arp to br_do_proxy_suppress_arp to support both proxy arp and neigh suppress. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_arp_nd_proxy.c | 188 +++++++++++++++++++++++++++++++++++ net/bridge/br_device.c | 9 ++ net/bridge/br_input.c | 63 +----------- net/bridge/br_private.h | 3 + 4 files changed, 205 insertions(+), 58 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index f889ad5f0048..a79c1824e163 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -14,6 +14,14 @@ */ #include +#include +#include +#include +#include +#include +#include +#include + #include "br_private.h" void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) @@ -30,3 +38,183 @@ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) br->neigh_suppress_enabled = neigh_suppress; } + +#if IS_ENABLED(CONFIG_INET) +static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, + struct net_device *dev, __be32 dest_ip, __be32 src_ip, + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw, + __be16 vlan_proto, u16 vlan_tci) +{ + struct net_bridge_vlan_group *vg; + struct sk_buff *skb; + u16 pvid; + + netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n", + dev->name, &dest_ip, dest_hw, &src_ip, src_hw); + + if (!vlan_tci) { + arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + return; + } + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + if (!skb) + return; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + + if (p) { + arp_xmit(skb); + } else { + skb_reset_mac_header(skb); + __skb_pull(skb, skb_network_offset(skb)); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_HOST; + + netif_rx_ni(skb); + } +} + +static int br_chk_addr_ip(struct net_device *dev, void *data) +{ + __be32 ip = *(__be32 *)data; + struct in_device *in_dev; + __be32 addr = 0; + + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip, + RT_SCOPE_HOST); + + if (addr == ip) + return 1; + + return 0; +} + +static bool br_is_local_ip(struct net_device *dev, __be32 ip) +{ + if (br_chk_addr_ip(dev, &ip)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip)) + return true; + + return false; +} + +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = dev; + struct neighbour *n; + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + + if ((dev->flags & IFF_NOARP) || + !pskb_may_pull(skb, arp_hdr_len(dev))) + return; + + parp = arp_hdr(skb); + + if (parp->ar_pro != htons(ETH_P_IP) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + return; + + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + return; + + if (br->neigh_suppress_enabled) { + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + if (ipv4_is_zeronet(sip) || sip == tip) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + } + + if (parp->ar_op != htons(ARPOP_REQUEST)) + return; + + if (vid != 0) { + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } + + if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) { + /* its our local ip, so don't proxy reply + * and don't forward to neigh suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + n = neigh_lookup(&arp_tbl, &tip, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if ((p && (p->flags & BR_PROXYARP)) || + (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI | + BR_NEIGH_SUPPRESS)))) { + if (!vid) + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, 0, 0); + else + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, + skb->vlan_proto, + skb_vlan_tag_get(skb)); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to arp replied + */ + if (replied || br->neigh_suppress_enabled) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } + + neigh_release(n); + } +} +#endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7acb77c9bd65..eb30c6a274c3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; const unsigned char *dest; + struct ethhdr *eth; u16 vid = 0; rcu_read_lock(); @@ -57,11 +58,19 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); + eth = eth_hdr(skb); skb_pull(skb, ETH_HLEN); if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; + if (IS_ENABLED(CONFIG_INET) && + (eth->h_proto == htons(ETH_P_ARP) || + eth->h_proto == htons(ETH_P_RARP)) && + br->neigh_suppress_enabled) { + br_do_proxy_suppress_arp(skb, br, vid, NULL); + } + dest = eth_hdr(skb)->h_dest; if (is_broadcast_ether_addr(dest)) { br_flood(br, skb, BR_PKT_BROADCAST, false, true); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7cb613776b31..4b8d2ec2fa23 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb) br_netif_receive_skb); } -static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid, struct net_bridge_port *p) -{ - struct net_device *dev = br->dev; - struct neighbour *n; - struct arphdr *parp; - u8 *arpptr, *sha; - __be32 sip, tip; - - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; - - if ((dev->flags & IFF_NOARP) || - !pskb_may_pull(skb, arp_hdr_len(dev))) - return; - - parp = arp_hdr(skb); - - if (parp->ar_pro != htons(ETH_P_IP) || - parp->ar_op != htons(ARPOP_REQUEST) || - parp->ar_hln != dev->addr_len || - parp->ar_pln != 4) - return; - - arpptr = (u8 *)parp + sizeof(struct arphdr); - sha = arpptr; - arpptr += dev->addr_len; /* sha */ - memcpy(&sip, arpptr, sizeof(sip)); - arpptr += sizeof(sip); - arpptr += dev->addr_len; /* tha */ - memcpy(&tip, arpptr, sizeof(tip)); - - if (ipv4_is_loopback(tip) || - ipv4_is_multicast(tip)) - return; - - n = neigh_lookup(&arp_tbl, &tip, dev); - if (n) { - struct net_bridge_fdb_entry *f; - - if (!(n->nud_state & NUD_VALID)) { - neigh_release(n); - return; - } - - f = br_fdb_find_rcu(br, n->ha, vid); - if (f && ((p->flags & BR_PROXYARP) || - (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, - sha, n->ha, sha); - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; - } - - neigh_release(n); - } -} - /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -171,8 +115,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb BR_INPUT_SKB_CB(skb)->brdev = br->dev; - if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid, p); + if (IS_ENABLED(CONFIG_INET) && + (skb->protocol == htons(ETH_P_ARP) || + skb->protocol == htons(ETH_P_RARP))) { + br_do_proxy_suppress_arp(skb, br, vid, p); + } switch (pkt_type) { case BR_PKT_MULTICAST: diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 00fa371b1fb2..4e6b25be14d0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1140,5 +1140,8 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) } #endif /* CONFIG_NET_SWITCHDEV */ +/* br_arp_nd_proxy.c */ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p); #endif From ed842faeb2bd49256f00485402f3113205f91d30 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 6 Oct 2017 22:12:39 -0700 Subject: [PATCH 0693/2177] bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports This patch avoids flooding and proxies ndisc packets for BR_NEIGH_SUPPRESS ports. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_arp_nd_proxy.c | 249 +++++++++++++++++++++++++++++++++++ net/bridge/br_device.c | 11 ++ net/bridge/br_input.c | 11 ++ net/bridge/br_private.h | 3 + 4 files changed, 274 insertions(+) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index a79c1824e163..2cf7716254be 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -21,6 +21,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif #include "br_private.h" @@ -218,3 +221,249 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, } } #endif + +#if IS_ENABLED(CONFIG_IPV6) +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg) +{ + struct nd_msg *m; + + m = skb_header_pointer(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), sizeof(*msg), msg); + if (!m) + return NULL; + + if (m->icmph.icmp6_code != 0 || + (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && + m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) + return NULL; + + return m; +} + +static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, + struct sk_buff *request, struct neighbour *n, + __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) +{ + struct net_device *dev = request->dev; + struct net_bridge_vlan_group *vg; + struct sk_buff *reply; + struct nd_msg *na; + struct ipv6hdr *pip6; + int na_olen = 8; /* opt hdr + ETH_ALEN for target */ + int ns_olen; + int i, len; + u8 *daddr; + u16 pvid; + + if (!dev) + return; + + len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + + sizeof(*na) + na_olen + dev->needed_tailroom; + + reply = alloc_skb(len, GFP_ATOMIC); + if (!reply) + return; + + reply->protocol = htons(ETH_P_IPV6); + reply->dev = dev; + skb_reserve(reply, LL_RESERVED_SPACE(dev)); + skb_push(reply, sizeof(struct ethhdr)); + skb_set_mac_header(reply, 0); + + daddr = eth_hdr(request)->h_source; + + /* Do we need option processing ? */ + ns_olen = request->len - (skb_network_offset(request) + + sizeof(struct ipv6hdr)) - sizeof(*ns); + for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { + if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + break; + } + } + + /* Ethernet header */ + ether_addr_copy(eth_hdr(reply)->h_dest, daddr); + ether_addr_copy(eth_hdr(reply)->h_source, n->ha); + eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); + reply->protocol = htons(ETH_P_IPV6); + + skb_pull(reply, sizeof(struct ethhdr)); + skb_set_network_header(reply, 0); + skb_put(reply, sizeof(struct ipv6hdr)); + + /* IPv6 header */ + pip6 = ipv6_hdr(reply); + memset(pip6, 0, sizeof(struct ipv6hdr)); + pip6->version = 6; + pip6->priority = ipv6_hdr(request)->priority; + pip6->nexthdr = IPPROTO_ICMPV6; + pip6->hop_limit = 255; + pip6->daddr = ipv6_hdr(request)->saddr; + pip6->saddr = *(struct in6_addr *)n->primary_key; + + skb_pull(reply, sizeof(struct ipv6hdr)); + skb_set_transport_header(reply, 0); + + na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); + + /* Neighbor Advertisement */ + memset(na, 0, sizeof(*na) + na_olen); + na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */ + na->icmph.icmp6_override = 1; + na->icmph.icmp6_solicited = 1; + na->target = ns->target; + ether_addr_copy(&na->opt[2], n->ha); + na->opt[0] = ND_OPT_TARGET_LL_ADDR; + na->opt[1] = na_olen >> 3; + + na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, + &pip6->daddr, + sizeof(*na) + na_olen, + IPPROTO_ICMPV6, + csum_partial(na, sizeof(*na) + na_olen, 0)); + + pip6->payload_len = htons(sizeof(*na) + na_olen); + + skb_push(reply, sizeof(struct ipv6hdr)); + skb_push(reply, sizeof(struct ethhdr)); + + reply->ip_summed = CHECKSUM_UNNECESSARY; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci); + + netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n", + dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha); + + if (p) { + dev_queue_xmit(reply); + } else { + skb_reset_mac_header(reply); + __skb_pull(reply, skb_network_offset(reply)); + reply->ip_summed = CHECKSUM_UNNECESSARY; + reply->pkt_type = PACKET_HOST; + + netif_rx_ni(reply); + } +} + +static int br_chk_addr_ip6(struct net_device *dev, void *data) +{ + struct in6_addr *addr = (struct in6_addr *)data; + + if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) + return 1; + + return 0; +} + +static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) + +{ + if (br_chk_addr_ip6(dev, addr)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr)) + return true; + + return false; +} + +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = NULL; + struct in6_addr *saddr, *daddr; + struct ipv6hdr *iphdr; + struct neighbour *n; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + + if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && + !msg->icmph.icmp6_solicited) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + iphdr = ipv6_hdr(skb); + saddr = &iphdr->saddr; + daddr = &iphdr->daddr; + + if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + if (vid != 0) { + /* build neigh table lookup on the vlan device */ + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } else { + vlandev = dev; + } + + if (br_is_local_ip6(vlandev, &msg->target)) { + /* its our own ip, so don't proxy reply + * and don't forward to arp suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) { + if (vid != 0) + br_nd_send(br, p, skb, n, + skb->vlan_proto, + skb_vlan_tag_get(skb), msg); + else + br_nd_send(br, p, skb, n, 0, 0, msg); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to NEIGH_SUPPRESS ports that we + * have replied + */ + if (replied || br->neigh_suppress_enabled) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } + neigh_release(n); + } +} +#endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eb30c6a274c3..28bb22186fa0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -69,6 +69,17 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) eth->h_proto == htons(ETH_P_RARP)) && br->neigh_suppress_enabled) { br_do_proxy_suppress_arp(skb, br, vid, NULL); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br->neigh_suppress_enabled && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, NULL, msg); } dest = eth_hdr(skb)->h_dest; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4b8d2ec2fa23..a096d3e189da 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -119,6 +119,17 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb (skb->protocol == htons(ETH_P_ARP) || skb->protocol == htons(ETH_P_RARP))) { br_do_proxy_suppress_arp(skb, br, vid, p); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br->neigh_suppress_enabled && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, p, msg); } switch (pkt_type) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4e6b25be14d0..fa0039f44818 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1144,4 +1144,7 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, u16 vid, struct net_bridge_port *p); +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg); +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m); #endif From 8ef802aa8ec87a0fec20d28f8f51cad964265902 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:23 -0700 Subject: [PATCH 0694/2177] ipv6: prepare RCU lookups for idev->addr_list inet6_ifa_finish_destroy() already uses kfree_rcu() to free inet6_ifaddr structs. We need to use proper list additions/deletions in order to allow readers to use RCU instead of idev->lock rwlock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9854d93e45bb..d1ff0955b709 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -945,7 +945,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) break; } - list_add_tail(&ifp->if_list, p); + list_add_tail_rcu(&ifp->if_list, p); } static u32 inet6_addr_hash(const struct in6_addr *addr) @@ -1204,7 +1204,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) action = check_cleanup_prefix_route(ifp, &expires); - list_del_init(&ifp->if_list); + list_del_rcu(&ifp->if_list); __in6_ifa_put(ifp); write_unlock_bh(&ifp->idev->lock); @@ -3562,7 +3562,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; - struct list_head del_list; int _keep_addr; bool keep_addr; int state, i; @@ -3654,7 +3653,6 @@ restart: */ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); - INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct rt6_info *rt = NULL; bool keep; @@ -3663,8 +3661,6 @@ restart: keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - if (!keep) - list_move(&ifa->if_list, &del_list); write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); @@ -3698,19 +3694,14 @@ restart: } write_lock_bh(&idev->lock); + if (!keep) { + list_del_rcu(&ifa->if_list); + in6_ifa_put(ifa); + } } write_unlock_bh(&idev->lock); - /* now clean up addresses to be removed */ - while (!list_empty(&del_list)) { - ifa = list_first_entry(&del_list, - struct inet6_ifaddr, if_list); - list_del(&ifa->if_list); - - in6_ifa_put(ifa); - } - /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); From d9bf82c2f61be534873a37a0631b58761b6d6506 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:24 -0700 Subject: [PATCH 0695/2177] ipv6: ipv6_count_addresses() rcu conversion Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1ff0955b709..2e029c8be1f2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -152,7 +152,7 @@ static void ipv6_regen_rndid(struct inet6_dev *idev); static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); -static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_count_addresses(const struct inet6_dev *idev); static int ipv6_generate_stable_address(struct in6_addr *addr, u8 dad_count, const struct inet6_dev *idev); @@ -1785,15 +1785,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, return err; } -static int ipv6_count_addresses(struct inet6_dev *idev) +static int ipv6_count_addresses(const struct inet6_dev *idev) { + const struct inet6_ifaddr *ifp; int cnt = 0; - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) + rcu_read_lock(); + list_for_each_entry_rcu(ifp, &idev->addr_list, if_list) cnt++; - read_unlock_bh(&idev->lock); + rcu_read_unlock(); return cnt; } From 47e26941f78d6ec03c5dd547425409f1f7852ff5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:25 -0700 Subject: [PATCH 0696/2177] ipv6: ipv6_chk_custom_prefix() rcu conversion Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2e029c8be1f2..33ee84c2512b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1859,20 +1859,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, bool ipv6_chk_custom_prefix(const struct in6_addr *addr, const unsigned int prefix_len, struct net_device *dev) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; + const struct inet6_dev *idev; bool ret = false; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); if (ret) break; } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); From 24ba333b2c1a609fc17214ceb215e19bd9d54951 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:26 -0700 Subject: [PATCH 0697/2177] ipv6: ipv6_chk_prefix() rcu conversion Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 33ee84c2512b..ea63442209bf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1880,22 +1880,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix); int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; + const struct inet6_dev *idev; int onlink; onlink = 0; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { onlink = ipv6_prefix_equal(addr, &ifa->addr, ifa->prefix_len); if (onlink) break; } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); return onlink; From f59c031e9134bb16c38980196a73d7ba40979baa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:27 -0700 Subject: [PATCH 0698/2177] ipv6: __ipv6_dev_get_saddr() rcu conversion Callers hold rcu_read_lock(), so we do not need the rcu_read_lock()/rcu_read_unlock() pair. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ea63442209bf..20c3ca777529 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1558,8 +1558,7 @@ static int __ipv6_dev_get_saddr(struct net *net, { struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; - read_lock_bh(&idev->lock); - list_for_each_entry(score->ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) { int i; /* @@ -1625,7 +1624,6 @@ static int __ipv6_dev_get_saddr(struct net *net, } } out: - read_unlock_bh(&idev->lock); return hiscore_idx; } From cc429c8f6fa7cc1c9109bb4a1ad63a6d1f232f0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Oct 2017 19:30:28 -0700 Subject: [PATCH 0699/2177] ipv6: avoid cache line dirtying in ipv6_dev_get_saddr() By extending the rcu section a bit, we can avoid these very expensive in6_ifa_put()/in6_ifa_hold() calls done in __ipv6_dev_get_saddr() and ipv6_dev_get_saddr() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 20c3ca777529..cab3faad2bf1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1608,11 +1608,6 @@ static int __ipv6_dev_get_saddr(struct net *net, } break; } else if (minihiscore < miniscore) { - if (hiscore->ifa) - in6_ifa_put(hiscore->ifa); - - in6_ifa_hold(score->ifa); - swap(hiscore, score); hiscore_idx = 1 - hiscore_idx; @@ -1660,6 +1655,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, int dst_type; bool use_oif_addr = false; int hiscore_idx = 0; + int ret = 0; dst_type = __ipv6_addr_type(daddr); dst.addr = daddr; @@ -1735,15 +1731,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, } out: - rcu_read_unlock(); - hiscore = &scores[hiscore_idx]; if (!hiscore->ifa) - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + else + *saddr = hiscore->ifa->addr; - *saddr = hiscore->ifa->addr; - in6_ifa_put(hiscore->ifa); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ipv6_dev_get_saddr); From c778c32118167adcfe6b40063c49bfeac6bc1cf1 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 8 Oct 2017 20:13:49 +0100 Subject: [PATCH 0700/2177] net: ethernet: stmmac: Clean up dead code Many macros in dwmac-ipq806x are unused and should be removed. Moreover gmac->id is an unsigned variable and therefore checking whether it is less than zero is redundant. Signed-off-by: Christos Gkekas Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 866444b6c82f..2c6d7c69c8f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -51,15 +51,11 @@ #define NSS_COMMON_CLK_SRC_CTRL_RGMII(x) 1 #define NSS_COMMON_CLK_SRC_CTRL_SGMII(x) ((x >= 2) ? 1 : 0) -#define NSS_COMMON_MACSEC_CTL 0x28 -#define NSS_COMMON_MACSEC_CTL_EXT_BYPASS_EN(x) (1 << x) - #define NSS_COMMON_GMAC_CTL(x) (0x30 + (x * 4)) #define NSS_COMMON_GMAC_CTL_CSYS_REQ BIT(19) #define NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL BIT(16) #define NSS_COMMON_GMAC_CTL_IFG_LIMIT_OFFSET 8 #define NSS_COMMON_GMAC_CTL_IFG_OFFSET 0 -#define NSS_COMMON_GMAC_CTL_IFG_MASK 0x3f #define NSS_COMMON_CLK_DIV_RGMII_1000 1 #define NSS_COMMON_CLK_DIV_RGMII_100 9 @@ -68,9 +64,6 @@ #define NSS_COMMON_CLK_DIV_SGMII_100 4 #define NSS_COMMON_CLK_DIV_SGMII_10 49 -#define QSGMII_PCS_MODE_CTL 0x68 -#define QSGMII_PCS_MODE_CTL_AUTONEG_EN(x) BIT((x * 8) + 7) - #define QSGMII_PCS_CAL_LCKDT_CTL 0x120 #define QSGMII_PCS_CAL_LCKDT_CTL_RST BIT(19) @@ -83,15 +76,10 @@ #define QSGMII_PHY_TX_DRIVER_EN BIT(3) #define QSGMII_PHY_QSGMII_EN BIT(7) #define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET 12 -#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK 0x7 #define QSGMII_PHY_RX_DC_BIAS_OFFSET 18 -#define QSGMII_PHY_RX_DC_BIAS_MASK 0x3 #define QSGMII_PHY_RX_INPUT_EQU_OFFSET 20 -#define QSGMII_PHY_RX_INPUT_EQU_MASK 0x3 #define QSGMII_PHY_CDR_PI_SLEW_OFFSET 22 -#define QSGMII_PHY_CDR_PI_SLEW_MASK 0x3 #define QSGMII_PHY_TX_DRV_AMP_OFFSET 28 -#define QSGMII_PHY_TX_DRV_AMP_MASK 0xf struct ipq806x_gmac { struct platform_device *pdev; @@ -217,7 +205,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) * code and keep it consistent with the Linux convention, we'll number * them from 0 to 3 here. */ - if (gmac->id < 0 || gmac->id > 3) { + if (gmac->id > 3) { dev_err(dev, "invalid gmac id\n"); return -EINVAL; } From c49c777f9c87749b73bc888f097f8a4178382449 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 8 Oct 2017 23:46:47 +0100 Subject: [PATCH 0701/2177] qed: Delete redundant check on dcb_app priority dcb_app priority is unsigned thus checking whether it is less than zero is redundant. Signed-off-by: Christos Gkekas Acked-By: Tomer Tayar Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 8f6ccc0c39e5..6e15d3c10ebf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -2308,7 +2308,7 @@ static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) DP_VERBOSE(hwfn, QED_MSG_DCB, "selector = %d protocol = %d pri = %d\n", app->selector, app->protocol, app->priority); - if (app->priority < 0 || app->priority >= QED_MAX_PFC_PRIORITIES) { + if (app->priority >= QED_MAX_PFC_PRIORITIES) { DP_INFO(hwfn, "Invalid priority %d\n", app->priority); return -EINVAL; } From 2e22a75c55c1e70f36e21d4fa4e165af7f356aa9 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 5 Jul 2017 15:57:30 -0700 Subject: [PATCH 0702/2177] ixgbe: Clear SWFW_SYNC register during init Added clearing of SW resource bits in the SW/FW synchronization register to ixgbe_init_swfw_sync_X540(). Updated ixgbe_acquire_swfw_sync_X540 SW Manageability host interface resource bit error case to match the error handling of the other SW resource bits. Which is to release the SW resource bits if SW times out while attempting to acquire the resource. This allows the driver to load in cases where the semaphore bits could be stuck after a reset or a crash. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 6ea0d6a5fb90..b8c5fd2a2115 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -619,12 +619,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) usleep_range(5000, 10000); } - /* Failed to get SW only semaphore */ - if (swmask == IXGBE_GSSR_SW_MNG_SM) { - hw_dbg(hw, "Failed to get SW only semaphore\n"); - return IXGBE_ERR_SWFW_SYNC; - } - /* If the resource is not released by the FW/HW the SW can assume that * the FW/HW malfunctions. In that case the SW should set the SW bit(s) * of the requested resource(s) while ignoring the corresponding FW/HW @@ -647,7 +641,8 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) */ if (swfw_sync & swmask) { u32 rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM | - IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM; + IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM | + IXGBE_GSSR_SW_MNG_SM; if (swi2c_mask) rmask |= IXGBE_GSSR_I2C_MASK; @@ -763,6 +758,8 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) **/ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw) { + u32 rmask; + /* First try to grab the semaphore but we don't need to bother * looking to see whether we got the lock or not since we do * the same thing regardless of whether we got the lock or not. @@ -771,6 +768,14 @@ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw) */ ixgbe_get_swfw_sync_semaphore(hw); ixgbe_release_swfw_sync_semaphore(hw); + + /* Acquire and release all software resources. */ + rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM | + IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM | + IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_I2C_MASK; + + ixgbe_acquire_swfw_sync_X540(hw, rmask); + ixgbe_release_swfw_sync_X540(hw, rmask); } /** From 2e033eace7557a4b67ba1cb2746bd87d6ca2620b Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 17 Jul 2017 20:31:18 +0530 Subject: [PATCH 0703/2177] ixgbe: declare ixgbe_mac_operations structures as const Declare ixgbe_mac_operations structures as const as they are only stored in the mac_ops field of ixgbe_info structure. This field is of type const and therefore ixgbe_mac_operations structure can be made const too. Signed-off-by: Bhumika Goyal Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 19fbb2f28ea4..933c5070f1b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3884,7 +3884,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x_fw = { .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550, }; -static struct ixgbe_mac_operations mac_ops_x550em_a = { +static const struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, @@ -3905,7 +3905,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a, }; -static struct ixgbe_mac_operations mac_ops_x550em_a_fw = { +static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = { X550_COMMON_MAC .led_on = ixgbe_led_on_generic, .led_off = ixgbe_led_off_generic, From f5a71caa1763cc8fd1b108234689d6a7e4fe9d2f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 15 Aug 2017 08:59:54 -0700 Subject: [PATCH 0704/2177] ixgbe: restore normal RSS after last macvlan offload is removed Just like when the last VF is removed, we need to restore normal operations after the last macvlan offload is removed, else we get stuck in single queue operations. To test: ethtool -l eth1 # note the number of queues in use, ~= cpus ethtool -K eth1 l2-fwd-offload on ip link add mv1 link eth1 type macvlan mode bridge ip link set dev mv1 up ip link del mv1 ethtool -l eth1 # are we back to the same # of queues, or stuck on 1? Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3942c6208745..d83cc9d34de3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9758,6 +9758,17 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) limit = find_last_bit(&adapter->fwd_bitmask, 32); adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter); + + /* go back to full RSS if we're done with our VMQs */ + if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { + int rss = min_t(int, ixgbe_max_rss_indices(adapter), + num_online_cpus()); + + adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; + adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; + adapter->ring_feature[RING_F_RSS].limit = rss; + } + ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev)); netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", fwd_adapter->pool, adapter->num_rx_pools, From c69be946d687a99dbc891ebc66539c1c2f082c1d Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 18 Aug 2017 15:48:02 -0700 Subject: [PATCH 0705/2177] ixgbe: add error checks when initializing the PHY Ignoring errors when attempting to identify the PHY can lead to a crash. Specifically in the case of FW controlled PHYs where the PHY read/write operations are set to NULL. Removed redundant comment. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 933c5070f1b6..8cea53b62e1b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3192,6 +3192,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || + ret_val == IXGBE_ERR_PHY_ADDR_INVALID) + return ret_val; /* Setup function pointers based on detected hardware */ ixgbe_init_mac_link_ops_X550em(hw); @@ -3394,9 +3397,10 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) ixgbe_clear_tx_pending(hw); /* PHY ops must be identified and initialized prior to reset */ - - /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || + status == IXGBE_ERR_PHY_ADDR_INVALID) + return status; /* start the external PHY */ if (hw->phy.type == ixgbe_phy_x550em_ext_t) { From 761c2a48c70d871b0622dccaa20ccad024101a51 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 29 Aug 2017 12:21:48 -0700 Subject: [PATCH 0706/2177] ixgbe: split Tx/Rx ring clearing for ethtool loopback test Commit: fed21bcee7a5 ("ixgbe: Don't bother clearing buffer memory for descriptor rings) exposed some issues with the logic in the current implementation of ixgbe_clean_test_rings() that are being addressed in this patch: - Split the clearing of the Tx and Rx rings in separate loops. Previously both Tx and Rx rings were cleared in a rx_desc->wb.upper.length based loop which could lead to issues if for w/e reason packets were received outside of the frames transmitted for the loopback test. - Add check for IXGBE_TXD_STAT_DD to avoid clearing the rings if the transmits have not comlpeted by the time we enter ixgbe_clean_test_rings() - Exit early on ixgbe_check_lbtest_frame() failure. This change fixes a crash during ethtool diagnostic (ethtool -t). Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 61 ++++++++++++------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 72c565712a5f..6d89f28cae06 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1916,8 +1916,6 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, unsigned int size) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *rx_buffer; - struct ixgbe_tx_buffer *tx_buffer; u16 rx_ntc, tx_ntc, count = 0; /* initialize next to clean and descriptor values */ @@ -1925,25 +1923,15 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, tx_ntc = tx_ring->next_to_clean; rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); - while (rx_desc->wb.upper.length) { - /* check Rx buffer */ - rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; + while (tx_ntc != tx_ring->next_to_use) { + union ixgbe_adv_tx_desc *tx_desc; + struct ixgbe_tx_buffer *tx_buffer; - /* sync Rx buffer for CPU read */ - dma_sync_single_for_cpu(rx_ring->dev, - rx_buffer->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + tx_desc = IXGBE_TX_DESC(tx_ring, tx_ntc); - /* verify contents of skb */ - if (ixgbe_check_lbtest_frame(rx_buffer, size)) - count++; - - /* sync Rx buffer for device write */ - dma_sync_single_for_device(rx_ring->dev, - rx_buffer->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* if DD is not set transmit has not completed */ + if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + return count; /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; @@ -1958,13 +1946,40 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); - /* increment Rx/Tx next to clean counters */ - rx_ntc++; - if (rx_ntc == rx_ring->count) - rx_ntc = 0; + /* increment Tx next to clean counter */ tx_ntc++; if (tx_ntc == tx_ring->count) tx_ntc = 0; + } + + while (rx_desc->wb.upper.length) { + struct ixgbe_rx_buffer *rx_buffer; + + /* check Rx buffer */ + rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; + + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + + /* verify contents of skb */ + if (ixgbe_check_lbtest_frame(rx_buffer, size)) + count++; + else + break; + + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + + /* increment Rx next to clean counter */ + rx_ntc++; + if (rx_ntc == rx_ring->count) + rx_ntc = 0; /* fetch next descriptor */ rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); From 5bca3b94dfbf9259d972a5be91333dda6eb9f350 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:43:55 +0800 Subject: [PATCH 0707/2177] net: hns3: Cleanup for shifting true in hns3 driver This patch fixes a shifting true in hclge_main module. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1a13614af3de..5c1bf12beade 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1687,7 +1687,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev, req->buf_num[i] = cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S); req->buf_num[i] |= - cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B); + cpu_to_le16(1 << HCLGE_TC0_PRI_BUF_EN_B); } req->shared_buf = From 9780cb97afd868fb11500f38826b7f30d554535c Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:43:56 +0800 Subject: [PATCH 0708/2177] net: hns3: Add hns3_get_handle macro in hns3 driver There are many places that will need to get the handle of netdev, so add a macro to get the handle of netdev. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_dcbnl.c | 18 +++------ .../hisilicon/hns3/hns3pf/hns3_enet.c | 39 +++++++------------ .../hisilicon/hns3/hns3pf/hns3_enet.h | 3 ++ .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 38 +++++++----------- 4 files changed, 35 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c index 9832172bfb08..925619a7c50a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c @@ -13,8 +13,7 @@ static int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->ieee_getets) return h->kinfo.dcb_ops->ieee_getets(h, ets); @@ -25,8 +24,7 @@ int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) static int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->ieee_setets) return h->kinfo.dcb_ops->ieee_setets(h, ets); @@ -37,8 +35,7 @@ int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) static int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->ieee_getpfc) return h->kinfo.dcb_ops->ieee_getpfc(h, pfc); @@ -49,8 +46,7 @@ int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) static int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->ieee_setpfc) return h->kinfo.dcb_ops->ieee_setpfc(h, pfc); @@ -61,8 +57,7 @@ int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) /* DCBX configuration */ static u8 hns3_dcbnl_getdcbx(struct net_device *ndev) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->getdcbx) return h->kinfo.dcb_ops->getdcbx(h); @@ -73,8 +68,7 @@ static u8 hns3_dcbnl_getdcbx(struct net_device *ndev) /* return 0 if successful, otherwise fail */ static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(ndev); if (h->kinfo.dcb_ops->setdcbx) return h->kinfo.dcb_ops->setdcbx(h, mode); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index c31506514e5d..c2a0537c649f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -198,8 +198,7 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector) static int hns3_nic_set_real_num_queue(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; unsigned int queue_size = kinfo->rss_size * kinfo->num_tc; int ret; @@ -307,8 +306,7 @@ static int hns3_nic_net_stop(struct net_device *netdev) void hns3_set_multicast_list(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); struct netdev_hw_addr *ha = NULL; if (h->ae_algo->ops->set_mc_addr) { @@ -321,8 +319,7 @@ void hns3_set_multicast_list(struct net_device *netdev) static int hns3_nic_uc_sync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->add_uc_addr) return h->ae_algo->ops->add_uc_addr(h, addr); @@ -333,8 +330,7 @@ static int hns3_nic_uc_sync(struct net_device *netdev, static int hns3_nic_uc_unsync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->rm_uc_addr) return h->ae_algo->ops->rm_uc_addr(h, addr); @@ -345,8 +341,7 @@ static int hns3_nic_uc_unsync(struct net_device *netdev, static int hns3_nic_mc_sync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->add_mc_addr) return h->ae_algo->ops->add_mc_addr(h, addr); @@ -357,8 +352,7 @@ static int hns3_nic_mc_sync(struct net_device *netdev, static int hns3_nic_mc_unsync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->rm_mc_addr) return h->ae_algo->ops->rm_mc_addr(h, addr); @@ -368,8 +362,7 @@ static int hns3_nic_mc_unsync(struct net_device *netdev, void hns3_nic_set_rx_mode(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->set_promisc_mode) { if (netdev->flags & IFF_PROMISC) @@ -1025,8 +1018,7 @@ out_net_tx_busy: static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); struct sockaddr *mac_addr = p; int ret; @@ -1208,8 +1200,7 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev, static int hns3_setup_tc(struct net_device *netdev, u8 tc) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; unsigned int i; int ret; @@ -1259,8 +1250,7 @@ static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, static int hns3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) @@ -1272,8 +1262,7 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev, static int hns3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) @@ -1285,8 +1274,7 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev, static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vf_vlan_filter) @@ -1298,8 +1286,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); bool if_running = netif_running(netdev); int ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 481eada73e2d..dd8d40ca1dcc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -587,6 +587,9 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) #define hns3_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) +#define hns3_get_handle(ndev) \ + (((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle) + void hns3_ethtool_set_ops(struct net_device *netdev); int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index d636399232fb..a892a157f346 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -102,8 +102,7 @@ static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd, static int hns3_get_sset_count(struct net_device *netdev, int stringset) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops = h->ae_algo->ops; if (!ops->get_sset_count) @@ -164,8 +163,7 @@ static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data) static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops = h->ae_algo->ops; char *buff = (char *)data; @@ -220,8 +218,7 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data) void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); u64 *p = data; if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) { @@ -262,10 +259,7 @@ static void hns3_get_drvinfo(struct net_device *netdev, static u32 hns3_get_link(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h; - - h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status) return h->ae_algo->ops->get_status(h); @@ -277,7 +271,8 @@ static void hns3_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *param) { struct hns3_nic_priv *priv = netdev_priv(netdev); - int queue_num = priv->ae_handle->kinfo.num_tqps; + struct hnae3_handle *h = priv->ae_handle; + int queue_num = h->kinfo.num_tqps; param->tx_max_pending = HNS3_RING_MAX_PENDING; param->rx_max_pending = HNS3_RING_MAX_PENDING; @@ -289,8 +284,7 @@ static void hns3_get_ringparam(struct net_device *netdev, static void hns3_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *param) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam) h->ae_algo->ops->get_pauseparam(h, ¶m->autoneg, @@ -300,8 +294,7 @@ static void hns3_get_pauseparam(struct net_device *netdev, static int hns3_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); u32 supported_caps; u32 advertised_caps; u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN; @@ -392,8 +385,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev, static u32 hns3_get_rss_key_size(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_key_size) @@ -404,8 +396,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev) static u32 hns3_get_rss_indir_size(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_indir_size) @@ -417,8 +408,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev) static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss) return -EOPNOTSUPP; @@ -429,8 +419,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, static int hns3_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss) return -EOPNOTSUPP; @@ -454,8 +443,7 @@ static int hns3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size) return -EOPNOTSUPP; From 56cf68c73019ec3f04b1ae69f76f524918fb22cb Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:43:57 +0800 Subject: [PATCH 0709/2177] net: hns3: Cleanup indentation for Kconfig in the the hisilicon folder This patch fixes a few indentation for Kconfig file in the hisilicon folder. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 9d7cb0387bf7..30000b6aa7b8 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -78,7 +78,7 @@ config HNS_ENET config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" - depends on PCI + depends on PCI ---help--- This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet @@ -87,7 +87,7 @@ config HNS3 config HNS3_HCLGE tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support" - depends on PCI_MSI + depends on PCI_MSI depends on HNS3 ---help--- This selects the HNS3_HCLGE network acceleration engine & its hardware @@ -96,7 +96,7 @@ config HNS3_HCLGE config HNS3_ENET tristate "Hisilicon HNS3 Ethernet Device Support" - depends on 64BIT && PCI + depends on 64BIT && PCI depends on HNS3 && HNS3_HCLGE ---help--- This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08 From 5392902d332b85a93e3be2755f7f6df183e5cafc Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:43:58 +0800 Subject: [PATCH 0710/2177] net: hns3: Consistently using GENMASK in hns3 driver This patch uses GENMASK to generate bit mask whenever possible in hns3 driver. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 48 +++++++++---------- .../hisilicon/hns3/hns3pf/hclge_main.h | 4 +- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 6b6d28eff664..9cff7dbca5dd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -250,11 +250,11 @@ struct hclge_ctrl_vector_chain { u8 int_vector_id; u8 int_cause_num; #define HCLGE_INT_TYPE_S 0 -#define HCLGE_INT_TYPE_M 0x3 +#define HCLGE_INT_TYPE_M GENMASK(1, 0) #define HCLGE_TQP_ID_S 2 -#define HCLGE_TQP_ID_M (0x7ff << HCLGE_TQP_ID_S) +#define HCLGE_TQP_ID_M GENMASK(12, 2) #define HCLGE_INT_GL_IDX_S 13 -#define HCLGE_INT_GL_IDX_M (0x3 << HCLGE_INT_GL_IDX_S) +#define HCLGE_INT_GL_IDX_M GENMASK(14, 13) __le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD]; u8 vfid; u8 rsv; @@ -372,28 +372,28 @@ struct hclge_pf_res { }; #define HCLGE_CFG_OFFSET_S 0 -#define HCLGE_CFG_OFFSET_M 0xfffff /* Byte (8-10.3) */ +#define HCLGE_CFG_OFFSET_M GENMASK(19, 0) #define HCLGE_CFG_RD_LEN_S 24 -#define HCLGE_CFG_RD_LEN_M (0xf << HCLGE_CFG_RD_LEN_S) +#define HCLGE_CFG_RD_LEN_M GENMASK(27, 24) #define HCLGE_CFG_RD_LEN_BYTES 16 #define HCLGE_CFG_RD_LEN_UNIT 4 #define HCLGE_CFG_VMDQ_S 0 -#define HCLGE_CFG_VMDQ_M (0xff << HCLGE_CFG_VMDQ_S) +#define HCLGE_CFG_VMDQ_M GENMASK(7, 0) #define HCLGE_CFG_TC_NUM_S 8 -#define HCLGE_CFG_TC_NUM_M (0xff << HCLGE_CFG_TC_NUM_S) +#define HCLGE_CFG_TC_NUM_M GENMASK(15, 8) #define HCLGE_CFG_TQP_DESC_N_S 16 -#define HCLGE_CFG_TQP_DESC_N_M (0xffff << HCLGE_CFG_TQP_DESC_N_S) +#define HCLGE_CFG_TQP_DESC_N_M GENMASK(31, 16) #define HCLGE_CFG_PHY_ADDR_S 0 -#define HCLGE_CFG_PHY_ADDR_M (0x1f << HCLGE_CFG_PHY_ADDR_S) +#define HCLGE_CFG_PHY_ADDR_M GENMASK(4, 0) #define HCLGE_CFG_MEDIA_TP_S 8 -#define HCLGE_CFG_MEDIA_TP_M (0xff << HCLGE_CFG_MEDIA_TP_S) +#define HCLGE_CFG_MEDIA_TP_M GENMASK(15, 8) #define HCLGE_CFG_RX_BUF_LEN_S 16 -#define HCLGE_CFG_RX_BUF_LEN_M (0xffff << HCLGE_CFG_RX_BUF_LEN_S) +#define HCLGE_CFG_RX_BUF_LEN_M GENMASK(31, 16) #define HCLGE_CFG_MAC_ADDR_H_S 0 -#define HCLGE_CFG_MAC_ADDR_H_M (0xffff << HCLGE_CFG_MAC_ADDR_H_S) +#define HCLGE_CFG_MAC_ADDR_H_M GENMASK(15, 0) #define HCLGE_CFG_DEFAULT_SPEED_S 16 -#define HCLGE_CFG_DEFAULT_SPEED_M (0xff << HCLGE_CFG_DEFAULT_SPEED_S) +#define HCLGE_CFG_DEFAULT_SPEED_M GENMASK(23, 16) struct hclge_cfg_param { __le32 offset; @@ -441,9 +441,9 @@ struct hclge_rss_indirection_table { }; #define HCLGE_RSS_TC_OFFSET_S 0 -#define HCLGE_RSS_TC_OFFSET_M (0x3ff << HCLGE_RSS_TC_OFFSET_S) +#define HCLGE_RSS_TC_OFFSET_M GENMASK(9, 0) #define HCLGE_RSS_TC_SIZE_S 12 -#define HCLGE_RSS_TC_SIZE_M (0x7 << HCLGE_RSS_TC_SIZE_S) +#define HCLGE_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGE_RSS_TC_VALID_B 15 struct hclge_rss_tc_mode { u16 rss_tc_mode[HCLGE_MAX_TC_NUM]; @@ -501,7 +501,7 @@ struct hclge_config_mac_mode { }; #define HCLGE_CFG_SPEED_S 0 -#define HCLGE_CFG_SPEED_M (0x3f << HCLGE_CFG_SPEED_S) +#define HCLGE_CFG_SPEED_M GENMASK(5, 0) #define HCLGE_CFG_DUPLEX_B 7 #define HCLGE_CFG_DUPLEX_M BIT(HCLGE_CFG_DUPLEX_B) @@ -518,7 +518,7 @@ struct hclge_config_mac_speed_dup { #define HCLGE_QUERY_AN_B 0 #define HCLGE_QUERY_DUPLEX_B 2 -#define HCLGE_QUERY_SPEED_M (0x1f << HCLGE_QUERY_SPEED_S) +#define HCLGE_QUERY_SPEED_M GENMASK(4, 0) #define HCLGE_QUERY_AN_M BIT(HCLGE_QUERY_AN_B) #define HCLGE_QUERY_DUPLEX_M BIT(HCLGE_QUERY_DUPLEX_B) @@ -528,7 +528,7 @@ struct hclge_query_an_speed_dup { u8 rsv[23]; }; -#define HCLGE_RING_ID_MASK 0x3ff +#define HCLGE_RING_ID_MASK GENMASK(9, 0) #define HCLGE_TQP_ENABLE_B 0 #define HCLGE_MAC_CFG_AN_EN_B 0 @@ -565,9 +565,9 @@ enum hclge_mac_vlan_tbl_opcode { #define HCLGE_MAC_EPORT_SW_EN_B 0xc #define HCLGE_MAC_EPORT_TYPE_B 0xb #define HCLGE_MAC_EPORT_VFID_S 0x3 -#define HCLGE_MAC_EPORT_VFID_M (0xff << HCLGE_MAC_EPORT_VFID_S) +#define HCLGE_MAC_EPORT_VFID_M GENMASK(10, 3) #define HCLGE_MAC_EPORT_PFID_S 0x0 -#define HCLGE_MAC_EPORT_PFID_M (0x7 << HCLGE_MAC_EPORT_PFID_S) +#define HCLGE_MAC_EPORT_PFID_M GENMASK(2, 0) struct hclge_mac_vlan_tbl_entry { u8 flags; u8 resp_code; @@ -583,7 +583,7 @@ struct hclge_mac_vlan_tbl_entry { }; #define HCLGE_CFG_MTA_MAC_SEL_S 0x0 -#define HCLGE_CFG_MTA_MAC_SEL_M (0x3 << HCLGE_CFG_MTA_MAC_SEL_S) +#define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0) #define HCLGE_CFG_MTA_MAC_EN_B 0x7 struct hclge_mta_filter_mode { u8 dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */ @@ -599,7 +599,7 @@ struct hclge_cfg_func_mta_filter { #define HCLGE_CFG_MTA_ITEM_ACCEPT_B 0x0 #define HCLGE_CFG_MTA_ITEM_IDX_S 0x0 -#define HCLGE_CFG_MTA_ITEM_IDX_M (0xfff << HCLGE_CFG_MTA_ITEM_IDX_S) +#define HCLGE_CFG_MTA_ITEM_IDX_M GENMASK(11, 0) struct hclge_cfg_func_mta_item { u16 item_idx; /* Only used lowest 12 bit */ u8 accept; /* Only used lowest 1 bit */ @@ -670,10 +670,10 @@ struct hclge_cfg_tx_queue_pointer { }; #define HCLGE_TSO_MSS_MIN_S 0 -#define HCLGE_TSO_MSS_MIN_M (0x3FFF << HCLGE_TSO_MSS_MIN_S) +#define HCLGE_TSO_MSS_MIN_M GENMASK(13, 0) #define HCLGE_TSO_MSS_MAX_S 16 -#define HCLGE_TSO_MSS_MAX_M (0x3FFF << HCLGE_TSO_MSS_MAX_S) +#define HCLGE_TSO_MSS_MAX_M GENMASK(29, 16) struct hclge_cfg_tso_status { __le16 tso_mss_min; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 7c66c00e8a3e..79c1a06cb941 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -32,7 +32,7 @@ #define HCLGE_VECTOR_VF_OFFSET 0x100000 #define HCLGE_RSS_IND_TBL_SIZE 512 -#define HCLGE_RSS_SET_BITMAP_MSK 0xffff +#define HCLGE_RSS_SET_BITMAP_MSK GENMASK(15, 0) #define HCLGE_RSS_KEY_SIZE 40 #define HCLGE_RSS_HASH_ALGO_TOEPLITZ 0 #define HCLGE_RSS_HASH_ALGO_SIMPLE 1 @@ -65,7 +65,7 @@ #define HCLGE_PHY_CSS_REG 17 #define HCLGE_PHY_MDIX_CTRL_S (5) -#define HCLGE_PHY_MDIX_CTRL_M (3 << HCLGE_PHY_MDIX_CTRL_S) +#define HCLGE_PHY_MDIX_CTRL_M GENMASK(6, 5) #define HCLGE_PHY_MDIX_STATUS_B (6) #define HCLGE_PHY_SPEED_DUP_RESOLVE_B (11) From d44f9b631fbc137b1b16a22318ceb136f934db48 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:43:59 +0800 Subject: [PATCH 0711/2177] net: hns3: Cleanup for struct that used to send cmd to firmware The hclge_tm module has already added _cmd to the end of struct that used to send cmd to firmware. This will help us finding the endian issues. This patch adds the _cmd to the end of struct that used to send cmd to firmware in hclge_main module. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.c | 4 +- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 72 ++++---- .../hisilicon/hns3/hns3pf/hclge_main.c | 163 +++++++++--------- 3 files changed, 120 insertions(+), 119 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 8b511e6e0ce9..648b64bc363a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -279,12 +279,12 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw, u32 *version) { - struct hclge_query_version *resp; + struct hclge_query_version_cmd *resp; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1); - resp = (struct hclge_query_version *)desc.data; + resp = (struct hclge_query_version_cmd *)desc.data; ret = hclge_cmd_send(hw, &desc, 1); if (!ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 9cff7dbca5dd..d2ff0e37f9f5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -221,12 +221,12 @@ enum hclge_opcode_type { #define HCLGE_RCB_INIT_QUERY_TIMEOUT 10 #define HCLGE_RCB_INIT_FLAG_EN_B 0 #define HCLGE_RCB_INIT_FLAG_FINI_B 8 -struct hclge_config_rcb_init { +struct hclge_config_rcb_init_cmd { __le16 rcb_init_flag; u8 rsv[22]; }; -struct hclge_tqp_map { +struct hclge_tqp_map_cmd { __le16 tqp_id; /* Absolute tqp id for in this pf */ u8 tqp_vf; /* VF id */ #define HCLGE_TQP_MAP_TYPE_PF 0 @@ -246,7 +246,7 @@ enum hclge_int_type { HCLGE_INT_EVENT, }; -struct hclge_ctrl_vector_chain { +struct hclge_ctrl_vector_chain_cmd { u8 int_vector_id; u8 int_cause_num; #define HCLGE_INT_TYPE_S 0 @@ -263,18 +263,18 @@ struct hclge_ctrl_vector_chain { #define HCLGE_TC_NUM 8 #define HCLGE_TC0_PRI_BUF_EN_B 15 /* Bit 15 indicate enable or not */ #define HCLGE_BUF_UNIT_S 7 /* Buf size is united by 128 bytes */ -struct hclge_tx_buff_alloc { +struct hclge_tx_buff_alloc_cmd { __le16 tx_pkt_buff[HCLGE_TC_NUM]; u8 tx_buff_rsv[8]; }; -struct hclge_rx_priv_buff { +struct hclge_rx_priv_buff_cmd { __le16 buf_num[HCLGE_TC_NUM]; __le16 shared_buf; u8 rsv[6]; }; -struct hclge_query_version { +struct hclge_query_version_cmd { __le32 firmware; __le32 firmware_rsv[5]; }; @@ -328,14 +328,14 @@ struct hclge_pkt_buf_alloc { }; #define HCLGE_RX_COM_WL_EN_B 15 -struct hclge_rx_com_wl_buf { +struct hclge_rx_com_wl_buf_cmd { __le16 high_wl; __le16 low_wl; u8 rsv[20]; }; #define HCLGE_RX_PKT_EN_B 15 -struct hclge_rx_pkt_buf { +struct hclge_rx_pkt_buf_cmd { __le16 high_pkt; __le16 low_pkt; u8 rsv[20]; @@ -348,7 +348,7 @@ struct hclge_rx_pkt_buf { #define HCLGE_PF_MAC_NUM_MASK 0x3 #define HCLGE_PF_STATE_MAIN BIT(HCLGE_PF_STATE_MAIN_B) #define HCLGE_PF_STATE_DONE BIT(HCLGE_PF_STATE_DONE_B) -struct hclge_func_status { +struct hclge_func_status_cmd { __le32 vf_rst_state[4]; u8 pf_state; u8 mac_id; @@ -359,7 +359,7 @@ struct hclge_func_status { u8 rsv[2]; }; -struct hclge_pf_res { +struct hclge_pf_res_cmd { __le16 tqp_num; __le16 buf_size; __le16 msixcap_localid_ba_nic; @@ -395,7 +395,7 @@ struct hclge_pf_res { #define HCLGE_CFG_DEFAULT_SPEED_S 16 #define HCLGE_CFG_DEFAULT_SPEED_M GENMASK(23, 16) -struct hclge_cfg_param { +struct hclge_cfg_param_cmd { __le32 offset; __le32 rsv; __le32 param[4]; @@ -405,7 +405,7 @@ struct hclge_cfg_param { #define HCLGE_DESC_NUM 0x40 #define HCLGE_ALLOC_VALID_B 0 -struct hclge_vf_num { +struct hclge_vf_num_cmd { u8 alloc_valid; u8 rsv[23]; }; @@ -413,13 +413,13 @@ struct hclge_vf_num { #define HCLGE_RSS_DEFAULT_OUTPORT_B 4 #define HCLGE_RSS_HASH_KEY_OFFSET_B 4 #define HCLGE_RSS_HASH_KEY_NUM 16 -struct hclge_rss_config { +struct hclge_rss_config_cmd { u8 hash_config; u8 rsv[7]; u8 hash_key[HCLGE_RSS_HASH_KEY_NUM]; }; -struct hclge_rss_input_tuple { +struct hclge_rss_input_tuple_cmd { u8 ipv4_tcp_en; u8 ipv4_udp_en; u8 ipv4_sctp_en; @@ -433,7 +433,7 @@ struct hclge_rss_input_tuple { #define HCLGE_RSS_CFG_TBL_SIZE 16 -struct hclge_rss_indirection_table { +struct hclge_rss_indirection_table_cmd { u16 start_table_index; u16 rss_set_bitmap; u8 rsv[4]; @@ -445,14 +445,14 @@ struct hclge_rss_indirection_table { #define HCLGE_RSS_TC_SIZE_S 12 #define HCLGE_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGE_RSS_TC_VALID_B 15 -struct hclge_rss_tc_mode { +struct hclge_rss_tc_mode_cmd { u16 rss_tc_mode[HCLGE_MAX_TC_NUM]; u8 rsv[8]; }; #define HCLGE_LINK_STS_B 0 #define HCLGE_LINK_STATUS BIT(HCLGE_LINK_STS_B) -struct hclge_link_status { +struct hclge_link_status_cmd { u8 status; u8 rsv[23]; }; @@ -467,7 +467,7 @@ struct hclge_promisc_param { #define HCLGE_PROMISC_EN_UC 0x1 #define HCLGE_PROMISC_EN_MC 0x2 #define HCLGE_PROMISC_EN_BC 0x4 -struct hclge_promisc_cfg { +struct hclge_promisc_cfg_cmd { u8 flag; u8 vf_id; __le16 rsv0; @@ -495,7 +495,7 @@ enum hclge_promisc_type { #define HCLGE_MAC_TX_UNDER_MIN_ERR_B 21 #define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B 22 -struct hclge_config_mac_mode { +struct hclge_config_mac_mode_cmd { __le32 txrx_pad_fcs_loop_en; u8 rsv[20]; }; @@ -506,7 +506,7 @@ struct hclge_config_mac_mode { #define HCLGE_CFG_DUPLEX_B 7 #define HCLGE_CFG_DUPLEX_M BIT(HCLGE_CFG_DUPLEX_B) -struct hclge_config_mac_speed_dup { +struct hclge_config_mac_speed_dup_cmd { u8 speed_dup; #define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B 0 @@ -522,7 +522,7 @@ struct hclge_config_mac_speed_dup { #define HCLGE_QUERY_AN_M BIT(HCLGE_QUERY_AN_B) #define HCLGE_QUERY_DUPLEX_M BIT(HCLGE_QUERY_DUPLEX_B) -struct hclge_query_an_speed_dup { +struct hclge_query_an_speed_dup_cmd { u8 an_syn_dup_speed; u8 pause; u8 rsv[23]; @@ -539,7 +539,7 @@ struct hclge_query_an_speed_dup { #define HCLGE_MAC_CFG_AN_EN BIT(HCLGE_MAC_CFG_AN_EN_B) -struct hclge_config_auto_neg { +struct hclge_config_auto_neg_cmd { __le32 cfg_an_cmd_flag; u8 rsv[20]; }; @@ -548,7 +548,7 @@ struct hclge_config_auto_neg { #define HCLGE_MAC_MAX_MTU 9728 #define HCLGE_MAC_UPLINK_PORT 0x100 -struct hclge_config_max_frm_size { +struct hclge_config_max_frm_size_cmd { __le16 max_frm_size; u8 rsv[22]; }; @@ -568,7 +568,7 @@ enum hclge_mac_vlan_tbl_opcode { #define HCLGE_MAC_EPORT_VFID_M GENMASK(10, 3) #define HCLGE_MAC_EPORT_PFID_S 0x0 #define HCLGE_MAC_EPORT_PFID_M GENMASK(2, 0) -struct hclge_mac_vlan_tbl_entry { +struct hclge_mac_vlan_tbl_entry_cmd { u8 flags; u8 resp_code; __le16 vlan_tag; @@ -585,13 +585,13 @@ struct hclge_mac_vlan_tbl_entry { #define HCLGE_CFG_MTA_MAC_SEL_S 0x0 #define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0) #define HCLGE_CFG_MTA_MAC_EN_B 0x7 -struct hclge_mta_filter_mode { +struct hclge_mta_filter_mode_cmd { u8 dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */ u8 rsv[23]; }; #define HCLGE_CFG_FUNC_MTA_ACCEPT_B 0x0 -struct hclge_cfg_func_mta_filter { +struct hclge_cfg_func_mta_filter_cmd { u8 accept; /* Only used lowest 1 bit */ u8 function_id; u8 rsv[22]; @@ -600,13 +600,13 @@ struct hclge_cfg_func_mta_filter { #define HCLGE_CFG_MTA_ITEM_ACCEPT_B 0x0 #define HCLGE_CFG_MTA_ITEM_IDX_S 0x0 #define HCLGE_CFG_MTA_ITEM_IDX_M GENMASK(11, 0) -struct hclge_cfg_func_mta_item { +struct hclge_cfg_func_mta_item_cmd { u16 item_idx; /* Only used lowest 12 bit */ u8 accept; /* Only used lowest 1 bit */ u8 rsv[21]; }; -struct hclge_mac_vlan_add { +struct hclge_mac_vlan_add_cmd { __le16 flags; __le16 mac_addr_hi16; __le32 mac_addr_lo32; @@ -619,7 +619,7 @@ struct hclge_mac_vlan_add { }; #define HNS3_MAC_VLAN_CFG_FLAG_BIT 0 -struct hclge_mac_vlan_remove { +struct hclge_mac_vlan_remove_cmd { __le16 flags; __le16 mac_addr_hi16; __le32 mac_addr_lo32; @@ -631,20 +631,20 @@ struct hclge_mac_vlan_remove { u8 rsv[4]; }; -struct hclge_vlan_filter_ctrl { +struct hclge_vlan_filter_ctrl_cmd { u8 vlan_type; u8 vlan_fe; u8 rsv[22]; }; -struct hclge_vlan_filter_pf_cfg { +struct hclge_vlan_filter_pf_cfg_cmd { u8 vlan_offset; u8 vlan_cfg; u8 rsv[2]; u8 vlan_offset_bitmap[20]; }; -struct hclge_vlan_filter_vf_cfg { +struct hclge_vlan_filter_vf_cfg_cmd { u16 vlan_id; u8 resp_code; u8 rsv; @@ -653,14 +653,14 @@ struct hclge_vlan_filter_vf_cfg { u8 vf_bitmap[16]; }; -struct hclge_cfg_com_tqp_queue { +struct hclge_cfg_com_tqp_queue_cmd { __le16 tqp_id; __le16 stream_id; u8 enable; u8 rsv[19]; }; -struct hclge_cfg_tx_queue_pointer { +struct hclge_cfg_tx_queue_pointer_cmd { __le16 tqp_id; __le16 tx_tail; __le16 tx_head; @@ -675,7 +675,7 @@ struct hclge_cfg_tx_queue_pointer { #define HCLGE_TSO_MSS_MAX_S 16 #define HCLGE_TSO_MSS_MAX_M GENMASK(29, 16) -struct hclge_cfg_tso_status { +struct hclge_cfg_tso_status_cmd { __le16 tso_mss_min; __le16 tso_mss_max; u8 rsv[20]; @@ -685,7 +685,7 @@ struct hclge_cfg_tso_status { #define HCLGE_TSO_MSS_MAX 9668 #define HCLGE_TQP_RESET_B 0 -struct hclge_reset_tqp_queue { +struct hclge_reset_tqp_queue_cmd { __le16 tqp_id; u8 reset_req; u8 ready_to_reset; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 5c1bf12beade..5103f234d1c3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -820,7 +820,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) } static int hclge_parse_func_status(struct hclge_dev *hdev, - struct hclge_func_status *status) + struct hclge_func_status_cmd *status) { if (!(status->pf_state & HCLGE_PF_STATE_DONE)) return -EINVAL; @@ -837,13 +837,13 @@ static int hclge_parse_func_status(struct hclge_dev *hdev, static int hclge_query_function_status(struct hclge_dev *hdev) { - struct hclge_func_status *req; + struct hclge_func_status_cmd *req; struct hclge_desc desc; int timeout = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true); - req = (struct hclge_func_status *)desc.data; + req = (struct hclge_func_status_cmd *)desc.data; do { ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -868,7 +868,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev) static int hclge_query_pf_resource(struct hclge_dev *hdev) { - struct hclge_pf_res *req; + struct hclge_pf_res_cmd *req; struct hclge_desc desc; int ret; @@ -880,7 +880,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) return ret; } - req = (struct hclge_pf_res *)desc.data; + req = (struct hclge_pf_res_cmd *)desc.data; hdev->num_tqps = __le16_to_cpu(req->tqp_num); hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S; @@ -938,12 +938,12 @@ static int hclge_parse_speed(int speed_cmd, int *speed) static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) { - struct hclge_cfg_param *req; + struct hclge_cfg_param_cmd *req; u64 mac_addr_tmp_high; u64 mac_addr_tmp; int i; - req = (struct hclge_cfg_param *)desc[0].data; + req = (struct hclge_cfg_param_cmd *)desc[0].data; /* get the configuration */ cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]), @@ -978,7 +978,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) for (i = 0; i < ETH_ALEN; i++) cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff; - req = (struct hclge_cfg_param *)desc[1].data; + req = (struct hclge_cfg_param_cmd *)desc[1].data; cfg->numa_node_map = __le32_to_cpu(req->param[0]); } @@ -989,11 +989,11 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg) { struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM]; - struct hclge_cfg_param *req; + struct hclge_cfg_param_cmd *req; int i, ret; for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) { - req = (struct hclge_cfg_param *)desc[i].data; + req = (struct hclge_cfg_param_cmd *)desc[i].data; hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM, true); hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M, @@ -1099,12 +1099,12 @@ static int hclge_configure(struct hclge_dev *hdev) static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min, int tso_mss_max) { - struct hclge_cfg_tso_status *req; + struct hclge_cfg_tso_status_cmd *req; struct hclge_desc desc; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false); - req = (struct hclge_cfg_tso_status *)desc.data; + req = (struct hclge_cfg_tso_status_cmd *)desc.data; hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M, HCLGE_TSO_MSS_MIN_S, tso_mss_min); hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M, @@ -1144,13 +1144,13 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id, u16 tqp_pid, u16 tqp_vid, bool is_pf) { - struct hclge_tqp_map *req; + struct hclge_tqp_map_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false); - req = (struct hclge_tqp_map *)desc.data; + req = (struct hclge_tqp_map_cmd *)desc.data; req->tqp_id = cpu_to_le16(tqp_pid); req->tqp_vf = cpu_to_le16(func_id); req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B | @@ -1340,12 +1340,12 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 #define HCLGE_BUF_SIZE_UPDATE_EN_MSK BIT(15) - struct hclge_tx_buff_alloc *req; + struct hclge_tx_buff_alloc_cmd *req; struct hclge_desc desc; int ret; u8 i; - req = (struct hclge_tx_buff_alloc *)desc.data; + req = (struct hclge_tx_buff_alloc_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); for (i = 0; i < HCLGE_TC_NUM; i++) { @@ -1672,13 +1672,13 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_rx_priv_buff *req; + struct hclge_rx_priv_buff_cmd *req; struct hclge_desc desc; int ret; int i; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false); - req = (struct hclge_rx_priv_buff *)desc.data; + req = (struct hclge_rx_priv_buff_cmd *)desc.data; /* Alloc private buffer TCs */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { @@ -2000,11 +2000,11 @@ static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed) int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex) { - struct hclge_config_mac_speed_dup *req; + struct hclge_config_mac_speed_dup_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_config_mac_speed_dup *)desc.data; + req = (struct hclge_config_mac_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false); @@ -2075,12 +2075,12 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed, u8 *duplex) { - struct hclge_query_an_speed_dup *req; + struct hclge_query_an_speed_dup_cmd *req; struct hclge_desc desc; int speed_tmp; int ret; - req = (struct hclge_query_an_speed_dup *)desc.data; + req = (struct hclge_query_an_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2108,11 +2108,11 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed, static int hclge_query_autoneg_result(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; - struct hclge_query_an_speed_dup *req; + struct hclge_query_an_speed_dup_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_query_an_speed_dup *)desc.data; + req = (struct hclge_query_an_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2129,13 +2129,13 @@ static int hclge_query_autoneg_result(struct hclge_dev *hdev) static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) { - struct hclge_config_auto_neg *req; + struct hclge_config_auto_neg_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false); - req = (struct hclge_config_auto_neg *)desc.data; + req = (struct hclge_config_auto_neg_cmd *)desc.data; hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2214,7 +2214,7 @@ static void hclge_task_schedule(struct hclge_dev *hdev) static int hclge_get_mac_link_status(struct hclge_dev *hdev) { - struct hclge_link_status *req; + struct hclge_link_status_cmd *req; struct hclge_desc desc; int link_status; int ret; @@ -2227,7 +2227,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev) return ret; } - req = (struct hclge_link_status *)desc.data; + req = (struct hclge_link_status_cmd *)desc.data; link_status = req->status & HCLGE_LINK_STATUS; return !!link_status; @@ -2451,7 +2451,7 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle) static int hclge_get_rss_algo(struct hclge_dev *hdev) { - struct hclge_rss_config *req; + struct hclge_rss_config_cmd *req; struct hclge_desc desc; int rss_hash_algo; int ret; @@ -2465,7 +2465,7 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev) return ret; } - req = (struct hclge_rss_config *)desc.data; + req = (struct hclge_rss_config_cmd *)desc.data; rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK); if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ) @@ -2477,13 +2477,13 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev) static int hclge_set_rss_algo_key(struct hclge_dev *hdev, const u8 hfunc, const u8 *key) { - struct hclge_rss_config *req; + struct hclge_rss_config_cmd *req; struct hclge_desc desc; int key_offset; int key_size; int ret; - req = (struct hclge_rss_config *)desc.data; + req = (struct hclge_rss_config_cmd *)desc.data; for (key_offset = 0; key_offset < 3; key_offset++) { hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, @@ -2514,12 +2514,12 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev, static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir) { - struct hclge_rss_indirection_table *req; + struct hclge_rss_indirection_table_cmd *req; struct hclge_desc desc; int i, j; int ret; - req = (struct hclge_rss_indirection_table *)desc.data; + req = (struct hclge_rss_indirection_table_cmd *)desc.data; for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) { hclge_cmd_setup_basic_desc @@ -2546,13 +2546,13 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir) static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid, u16 *tc_size, u16 *tc_offset) { - struct hclge_rss_tc_mode *req; + struct hclge_rss_tc_mode_cmd *req; struct hclge_desc desc; int ret; int i; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false); - req = (struct hclge_rss_tc_mode *)desc.data; + req = (struct hclge_rss_tc_mode_cmd *)desc.data; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B, @@ -2577,13 +2577,13 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev) { #define HCLGE_RSS_INPUT_TUPLE_OTHER 0xf #define HCLGE_RSS_INPUT_TUPLE_SCTP 0x1f - struct hclge_rss_input_tuple *req; + struct hclge_rss_input_tuple_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false); - req = (struct hclge_rss_input_tuple *)desc.data; + req = (struct hclge_rss_input_tuple_cmd *)desc.data; req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; @@ -2750,7 +2750,7 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, struct hnae3_ring_chain_node *ring_chain) { struct hclge_dev *hdev = vport->back; - struct hclge_ctrl_vector_chain *req; + struct hclge_ctrl_vector_chain_cmd *req; struct hnae3_ring_chain_node *node; struct hclge_desc desc; int ret; @@ -2758,7 +2758,7 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false); - req = (struct hclge_ctrl_vector_chain *)desc.data; + req = (struct hclge_ctrl_vector_chain_cmd *)desc.data; req->int_vector_id = vector_id; i = 0; @@ -2831,7 +2831,7 @@ static int hclge_unmap_ring_from_vector( { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_ctrl_vector_chain *req; + struct hclge_ctrl_vector_chain_cmd *req; struct hnae3_ring_chain_node *node; struct hclge_desc desc; int i, vector_id; @@ -2846,7 +2846,7 @@ static int hclge_unmap_ring_from_vector( hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false); - req = (struct hclge_ctrl_vector_chain *)desc.data; + req = (struct hclge_ctrl_vector_chain_cmd *)desc.data; req->int_vector_id = vector_id; i = 0; @@ -2898,13 +2898,13 @@ static int hclge_unmap_ring_from_vector( int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, struct hclge_promisc_param *param) { - struct hclge_promisc_cfg *req; + struct hclge_promisc_cfg_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false); - req = (struct hclge_promisc_cfg *)desc.data; + req = (struct hclge_promisc_cfg_cmd *)desc.data; req->vf_id = param->vf_id; req->flag = (param->enable << HCLGE_PROMISC_EN_B); @@ -2946,8 +2946,8 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en) static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) { struct hclge_desc desc; - struct hclge_config_mac_mode *req = - (struct hclge_config_mac_mode *)desc.data; + struct hclge_config_mac_mode_cmd *req = + (struct hclge_config_mac_mode_cmd *)desc.data; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false); @@ -2980,8 +2980,8 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, int stream_id, bool enable) { struct hclge_desc desc; - struct hclge_cfg_com_tqp_queue *req = - (struct hclge_cfg_com_tqp_queue *)desc.data; + struct hclge_cfg_com_tqp_queue_cmd *req = + (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); @@ -3174,7 +3174,7 @@ static bool hclge_is_all_function_id_zero(struct hclge_desc *desc) return true; } -static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req, +static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req, const u8 *addr) { const unsigned char *mac_addr = addr; @@ -3201,11 +3201,11 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable) { - struct hclge_mta_filter_mode *req; + struct hclge_mta_filter_mode_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_mta_filter_mode *)desc.data; + req = (struct hclge_mta_filter_mode_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false); hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B, @@ -3228,11 +3228,11 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev, u8 func_id, bool enable) { - struct hclge_cfg_func_mta_filter *req; + struct hclge_cfg_func_mta_filter_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_cfg_func_mta_filter *)desc.data; + req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false); hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B, @@ -3255,11 +3255,11 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport, bool enable) { struct hclge_dev *hdev = vport->back; - struct hclge_cfg_func_mta_item *req; + struct hclge_cfg_func_mta_item_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_cfg_func_mta_item *)desc.data; + req = (struct hclge_cfg_func_mta_item_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false); hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable); @@ -3279,7 +3279,7 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport, } static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req) + struct hclge_mac_vlan_tbl_entry_cmd *req) { struct hclge_dev *hdev = vport->back; struct hclge_desc desc; @@ -3288,7 +3288,7 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false); - memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry)); + memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -3304,7 +3304,7 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, } static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req, + struct hclge_mac_vlan_tbl_entry_cmd *req, struct hclge_desc *desc, bool is_mc) { @@ -3317,7 +3317,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); memcpy(desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_MAC_VLAN_ADD, true); @@ -3329,7 +3329,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, } else { memcpy(desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, desc, 1); } if (ret) { @@ -3345,7 +3345,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, } static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req, + struct hclge_mac_vlan_tbl_entry_cmd *req, struct hclge_desc *mc_desc) { struct hclge_dev *hdev = vport->back; @@ -3359,7 +3359,8 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ADD, false); - memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry)); + memcpy(desc.data, req, + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); resp_code = (desc.data[0] >> 8) & 0xff; cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval, @@ -3373,7 +3374,7 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT); memcpy(mc_desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, mc_desc, 3); resp_code = (mc_desc[0].data[0] >> 8) & 0xff; cfg_status = hclge_get_mac_vlan_cmd_status(vport, @@ -3404,7 +3405,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; /* mac addr check */ @@ -3454,7 +3455,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; /* mac addr check */ @@ -3488,7 +3489,7 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; struct hclge_desc desc[3]; u16 tbl_idx; int status; @@ -3539,7 +3540,7 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; struct hclge_desc desc[3]; u16 tbl_idx; @@ -3622,13 +3623,13 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p) static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type, bool filter_en) { - struct hclge_vlan_filter_ctrl *req; + struct hclge_vlan_filter_ctrl_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false); - req = (struct hclge_vlan_filter_ctrl *)desc.data; + req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data; req->vlan_type = vlan_type; req->vlan_fe = filter_en; @@ -3646,8 +3647,8 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto) { #define HCLGE_MAX_VF_BYTES 16 - struct hclge_vlan_filter_vf_cfg *req0; - struct hclge_vlan_filter_vf_cfg *req1; + struct hclge_vlan_filter_vf_cfg_cmd *req0; + struct hclge_vlan_filter_vf_cfg_cmd *req1; struct hclge_desc desc[2]; u8 vf_byte_val; u8 vf_byte_off; @@ -3663,8 +3664,8 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, vf_byte_off = vfid / 8; vf_byte_val = 1 << (vfid % 8); - req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data; - req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data; + req0 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data; + req1 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[1].data; req0->vlan_id = vlan; req0->vlan_cfg = is_kill; @@ -3707,7 +3708,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_vlan_filter_pf_cfg *req; + struct hclge_vlan_filter_pf_cfg_cmd *req; struct hclge_desc desc; u8 vlan_offset_byte_val; u8 vlan_offset_byte; @@ -3720,7 +3721,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, vlan_offset_byte = (vlan_id % 160) / 8; vlan_offset_byte_val = 1 << (vlan_id % 8); - req = (struct hclge_vlan_filter_pf_cfg *)desc.data; + req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data; req->vlan_offset = vlan_offset_160; req->vlan_cfg = is_kill; req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val; @@ -3782,7 +3783,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_config_max_frm_size *req; + struct hclge_config_max_frm_size_cmd *req; struct hclge_dev *hdev = vport->back; struct hclge_desc desc; int ret; @@ -3793,7 +3794,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) hdev->mps = new_mtu; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false); - req = (struct hclge_config_max_frm_size *)desc.data; + req = (struct hclge_config_max_frm_size_cmd *)desc.data; req->max_frm_size = cpu_to_le16(new_mtu); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -3808,13 +3809,13 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, bool enable) { - struct hclge_reset_tqp_queue *req; + struct hclge_reset_tqp_queue_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false); - req = (struct hclge_reset_tqp_queue *)desc.data; + req = (struct hclge_reset_tqp_queue_cmd *)desc.data; req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable); @@ -3830,13 +3831,13 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id) { - struct hclge_reset_tqp_queue *req; + struct hclge_reset_tqp_queue_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true); - req = (struct hclge_reset_tqp_queue *)desc.data; + req = (struct hclge_reset_tqp_queue_cmd *)desc.data; req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); ret = hclge_cmd_send(&hdev->hw, &desc, 1); From a90bb9a5ea1db7f10096e7573b7d45295ad4eada Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:44:00 +0800 Subject: [PATCH 0712/2177] net: hns3: Cleanup for endian issue in hns3 driver This patch fixes a lot of endian issues detected by sparse. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.c | 8 +- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 10 +- .../hisilicon/hns3/hns3pf/hclge_main.c | 212 ++++++++++-------- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 26 ++- .../hisilicon/hns3/hns3pf/hns3_enet.c | 4 +- 5 files changed, 148 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 648b64bc363a..05985d81dda0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -208,7 +208,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) * which will be use for hardware to write back */ ntc = hw->cmq.csq.next_to_use; - opcode = desc[0].opcode; + opcode = le16_to_cpu(desc[0].opcode); while (handle < num) { desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; *desc_to_use = desc[handle]; @@ -225,7 +225,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) * If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ - if (HCLGE_SEND_SYNC(desc->flag)) { + if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) { do { if (hclge_cmd_csq_done(hw)) break; @@ -244,9 +244,9 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) pr_debug("Get cmd desc:\n"); if (likely(!hclge_is_special_opcode(opcode))) - desc_ret = desc[handle].retval; + desc_ret = le16_to_cpu(desc[handle].retval); else - desc_ret = desc[0].retval; + desc_ret = le16_to_cpu(desc[0].retval); if ((enum hclge_cmd_return_status)desc_ret == HCLGE_CMD_EXEC_SUCCESS) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index d2ff0e37f9f5..8f3ba02aea3c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -434,8 +434,8 @@ struct hclge_rss_input_tuple_cmd { #define HCLGE_RSS_CFG_TBL_SIZE 16 struct hclge_rss_indirection_table_cmd { - u16 start_table_index; - u16 rss_set_bitmap; + __le16 start_table_index; + __le16 rss_set_bitmap; u8 rsv[4]; u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE]; }; @@ -446,7 +446,7 @@ struct hclge_rss_indirection_table_cmd { #define HCLGE_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGE_RSS_TC_VALID_B 15 struct hclge_rss_tc_mode_cmd { - u16 rss_tc_mode[HCLGE_MAX_TC_NUM]; + __le16 rss_tc_mode[HCLGE_MAX_TC_NUM]; u8 rsv[8]; }; @@ -601,7 +601,7 @@ struct hclge_cfg_func_mta_filter_cmd { #define HCLGE_CFG_MTA_ITEM_IDX_S 0x0 #define HCLGE_CFG_MTA_ITEM_IDX_M GENMASK(11, 0) struct hclge_cfg_func_mta_item_cmd { - u16 item_idx; /* Only used lowest 12 bit */ + __le16 item_idx; /* Only used lowest 12 bit */ u8 accept; /* Only used lowest 1 bit */ u8 rsv[21]; }; @@ -645,7 +645,7 @@ struct hclge_vlan_filter_pf_cfg_cmd { }; struct hclge_vlan_filter_vf_cfg_cmd { - u16 vlan_id; + __le16 vlan_id; u8 resp_code; u8 rsv; u8 vlan_cfg; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 5103f234d1c3..6115c2f730ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -362,7 +362,7 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev) #define HCLGE_64_BIT_RTN_DATANUM 4 u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats); struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM]; - u64 *desc_data; + __le64 *desc_data; int i, k, n; int ret; @@ -376,14 +376,14 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev) for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) { if (unlikely(i == 0)) { - desc_data = (u64 *)(&desc[i].data[0]); + desc_data = (__le64 *)(&desc[i].data[0]); n = HCLGE_64_BIT_RTN_DATANUM - 1; } else { - desc_data = (u64 *)(&desc[i]); + desc_data = (__le64 *)(&desc[i]); n = HCLGE_64_BIT_RTN_DATANUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le64(*desc_data); + *data++ += le64_to_cpu(*desc_data); desc_data++; } } @@ -411,7 +411,7 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev) struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM]; struct hclge_32_bit_stats *all_32_bit_stats; - u32 *desc_data; + __le32 *desc_data; int i, k, n; u64 *data; int ret; @@ -431,21 +431,27 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev) hclge_reset_partial_32bit_counter(all_32_bit_stats); for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) { if (unlikely(i == 0)) { - all_32_bit_stats->igu_rx_err_pkt += - cpu_to_le32(desc[i].data[0]); - all_32_bit_stats->igu_rx_no_eof_pkt += - cpu_to_le32(desc[i].data[1] & 0xffff); - all_32_bit_stats->igu_rx_no_sof_pkt += - cpu_to_le32((desc[i].data[1] >> 16) & 0xffff); + __le16 *desc_data_16bit; - desc_data = (u32 *)(&desc[i].data[2]); + all_32_bit_stats->igu_rx_err_pkt += + le32_to_cpu(desc[i].data[0]); + + desc_data_16bit = (__le16 *)&desc[i].data[1]; + all_32_bit_stats->igu_rx_no_eof_pkt += + le16_to_cpu(*desc_data_16bit); + + desc_data_16bit++; + all_32_bit_stats->igu_rx_no_sof_pkt += + le16_to_cpu(*desc_data_16bit); + + desc_data = &desc[i].data[2]; n = HCLGE_32_BIT_RTN_DATANUM - 4; } else { - desc_data = (u32 *)(&desc[i]); + desc_data = (__le32 *)&desc[i]; n = HCLGE_32_BIT_RTN_DATANUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le32(*desc_data); + *data++ += le32_to_cpu(*desc_data); desc_data++; } } @@ -460,7 +466,7 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; - u64 *desc_data; + __le64 *desc_data; int i, k, n; int ret; @@ -475,14 +481,14 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) { if (unlikely(i == 0)) { - desc_data = (u64 *)(&desc[i].data[0]); + desc_data = (__le64 *)(&desc[i].data[0]); n = HCLGE_RTN_DATA_NUM - 2; } else { - desc_data = (u64 *)(&desc[i]); + desc_data = (__le64 *)(&desc[i]); n = HCLGE_RTN_DATA_NUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le64(*desc_data); + *data++ += le64_to_cpu(*desc_data); desc_data++; } } @@ -508,7 +514,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) HCLGE_OPC_QUERY_RX_STATUS, true); - desc[0].data[0] = (tqp->index & 0x1ff); + desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -517,7 +523,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) return ret; } tqp->tqp_stats.rcb_rx_ring_pktnum_rcd += - cpu_to_le32(desc[0].data[4]); + le32_to_cpu(desc[0].data[4]); } for (i = 0; i < kinfo->num_tqps; i++) { @@ -528,7 +534,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) HCLGE_OPC_QUERY_TX_STATUS, true); - desc[0].data[0] = (tqp->index & 0x1ff); + desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -537,7 +543,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) return ret; } tqp->tqp_stats.rcb_tx_ring_pktnum_rcd += - cpu_to_le32(desc[0].data[4]); + le32_to_cpu(desc[0].data[4]); } return 0; @@ -552,12 +558,12 @@ static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data) for (i = 0; i < kinfo->num_tqps; i++) { tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q); - *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd); + *buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd; } for (i = 0; i < kinfo->num_tqps; i++) { tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q); - *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd); + *buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd; } return buff; @@ -993,16 +999,17 @@ static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg) int i, ret; for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) { + u32 offset = 0; + req = (struct hclge_cfg_param_cmd *)desc[i].data; hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM, true); - hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M, + hnae_set_field(offset, HCLGE_CFG_OFFSET_M, HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES); /* Len should be united by 4 bytes when send to hardware */ - hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M, - HCLGE_CFG_RD_LEN_S, + hnae_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S, HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT); - req->offset = cpu_to_le32(req->offset); + req->offset = cpu_to_le32(offset); } ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM); @@ -1101,14 +1108,21 @@ static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min, { struct hclge_cfg_tso_status_cmd *req; struct hclge_desc desc; + u16 tso_mss; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false); req = (struct hclge_cfg_tso_status_cmd *)desc.data; - hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M, + + tso_mss = 0; + hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, HCLGE_TSO_MSS_MIN_S, tso_mss_min); - hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M, + req->tso_mss_min = cpu_to_le16(tso_mss); + + tso_mss = 0; + hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, HCLGE_TSO_MSS_MIN_S, tso_mss_max); + req->tso_mss_max = cpu_to_le16(tso_mss); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -1152,7 +1166,7 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id, req = (struct hclge_tqp_map_cmd *)desc.data; req->tqp_id = cpu_to_le16(tqp_pid); - req->tqp_vf = cpu_to_le16(func_id); + req->tqp_vf = func_id; req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B | 1 << HCLGE_TQP_MAP_EN_B; req->tqp_vid = cpu_to_le16(tqp_vid); @@ -2131,12 +2145,14 @@ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) { struct hclge_config_auto_neg_cmd *req; struct hclge_desc desc; + u32 flag = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false); req = (struct hclge_config_auto_neg_cmd *)desc.data; - hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); + hnae_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); + req->cfg_an_cmd_flag = cpu_to_le32(flag); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -2525,8 +2541,9 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir) hclge_cmd_setup_basic_desc (&desc, HCLGE_OPC_RSS_INDIR_TABLE, false); - req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE; - req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK; + req->start_table_index = + cpu_to_le16(i * HCLGE_RSS_CFG_TBL_SIZE); + req->rss_set_bitmap = cpu_to_le16(HCLGE_RSS_SET_BITMAP_MSK); for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++) req->rss_result[j] = @@ -2555,12 +2572,15 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid, req = (struct hclge_rss_tc_mode_cmd *)desc.data; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B, - (tc_valid[i] & 0x1)); - hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M, + u16 mode = 0; + + hnae_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1)); + hnae_set_field(mode, HCLGE_RSS_TC_SIZE_M, HCLGE_RSS_TC_SIZE_S, tc_size[i]); - hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M, + hnae_set_field(mode, HCLGE_RSS_TC_OFFSET_M, HCLGE_RSS_TC_OFFSET_S, tc_offset[i]); + + req->rss_tc_mode[i] = cpu_to_le16(mode); } ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2763,15 +2783,16 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, i = 0; for (node = ring_chain; node; node = node->next) { - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M, - HCLGE_INT_TYPE_S, + u16 type_and_id = 0; + + hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, - HCLGE_TQP_ID_S, node->tqp_index); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, + node->tqp_index); + hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M, HCLGE_INT_GL_IDX_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->tqp_type_and_id[i] = cpu_to_le16(type_and_id); req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { @@ -2851,16 +2872,17 @@ static int hclge_unmap_ring_from_vector( i = 0; for (node = ring_chain; node; node = node->next) { - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M, - HCLGE_INT_TYPE_S, + u16 type_and_id = 0; + + hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, - HCLGE_TQP_ID_S, node->tqp_index); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, + node->tqp_index); + hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M, HCLGE_INT_GL_IDX_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->tqp_type_and_id[i] = cpu_to_le16(type_and_id); req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { @@ -2948,27 +2970,25 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) struct hclge_desc desc; struct hclge_config_mac_mode_cmd *req = (struct hclge_config_mac_mode_cmd *)desc.data; + u32 loop_en = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_RX_FCS_STRIP_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable); + req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -3145,16 +3165,16 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr) word_num = vfid / 32; bit_num = vfid % 32; if (clr) - desc[1].data[word_num] &= ~(1 << bit_num); + desc[1].data[word_num] &= cpu_to_le32(~(1 << bit_num)); else - desc[1].data[word_num] |= (1 << bit_num); + desc[1].data[word_num] |= cpu_to_le32(1 << bit_num); } else { word_num = (vfid - 192) / 32; bit_num = vfid % 32; if (clr) - desc[2].data[word_num] &= ~(1 << bit_num); + desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num)); else - desc[2].data[word_num] |= (1 << bit_num); + desc[2].data[word_num] |= cpu_to_le32(1 << bit_num); } return 0; @@ -3257,15 +3277,16 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport, struct hclge_dev *hdev = vport->back; struct hclge_cfg_func_mta_item_cmd *req; struct hclge_desc desc; + u16 item_idx = 0; int ret; req = (struct hclge_cfg_func_mta_item_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false); hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable); - hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M, + hnae_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M, HCLGE_CFG_MTA_ITEM_IDX_S, idx); - req->item_idx = cpu_to_le16(req->item_idx); + req->item_idx = cpu_to_le16(item_idx); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -3284,6 +3305,7 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, struct hclge_dev *hdev = vport->back; struct hclge_desc desc; u8 resp_code; + u16 retval; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false); @@ -3297,9 +3319,10 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, ret); return ret; } - resp_code = (desc.data[0] >> 8) & 0xff; + resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc.retval); - return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code, + return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_REMOVE); } @@ -3310,6 +3333,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, { struct hclge_dev *hdev = vport->back; u8 resp_code; + u16 retval; int ret; hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true); @@ -3338,9 +3362,10 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, ret); return ret; } - resp_code = (desc[0].data[0] >> 8) & 0xff; + resp_code = (le32_to_cpu(desc[0].data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc[0].retval); - return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code, + return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_LKUP); } @@ -3351,6 +3376,7 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, struct hclge_dev *hdev = vport->back; int cfg_status; u8 resp_code; + u16 retval; int ret; if (!mc_desc) { @@ -3362,8 +3388,10 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - resp_code = (desc.data[0] >> 8) & 0xff; - cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval, + resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc.retval); + + cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_ADD); } else { @@ -3376,9 +3404,10 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, memcpy(mc_desc[0].data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, mc_desc, 3); - resp_code = (mc_desc[0].data[0] >> 8) & 0xff; - cfg_status = hclge_get_mac_vlan_cmd_status(vport, - mc_desc[0].retval, + resp_code = (le32_to_cpu(mc_desc[0].data[0]) >> 8) & 0xff; + retval = le16_to_cpu(mc_desc[0].retval); + + cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_ADD); } @@ -3407,6 +3436,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; + u16 egress_port = 0; /* mac addr check */ if (is_zero_ether_addr(addr) || @@ -3426,15 +3456,15 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0); hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0); - hnae_set_bit(req.egress_port, - HCLGE_MAC_EPORT_SW_EN_B, 0); - hnae_set_bit(req.egress_port, - HCLGE_MAC_EPORT_TYPE_B, 0); - hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M, + + hnae_set_bit(egress_port, HCLGE_MAC_EPORT_SW_EN_B, 0); + hnae_set_bit(egress_port, HCLGE_MAC_EPORT_TYPE_B, 0); + hnae_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, HCLGE_MAC_EPORT_VFID_S, vport->vport_id); - hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M, + hnae_set_field(egress_port, HCLGE_MAC_EPORT_PFID_M, HCLGE_MAC_EPORT_PFID_S, 0); - req.egress_port = cpu_to_le16(req.egress_port); + + req.egress_port = cpu_to_le16(egress_port); hclge_prepare_mac_addr(&req, addr); @@ -3667,7 +3697,7 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, req0 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data; req1 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[1].data; - req0->vlan_id = vlan; + req0->vlan_id = cpu_to_le16(vlan); req0->vlan_cfg = is_kill; if (vf_byte_off < HCLGE_MAX_VF_BYTES) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 359ee670d1e1..1ae6eae82eb3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -283,6 +283,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, struct hclge_pg_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; + u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING : HCLGE_OPC_TM_PG_C_SHAPPING; @@ -292,11 +293,13 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pg_id = pg_id; - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, bs_b); + hclge_tm_set_field(shapping_para, BS_S, bs_s); + + shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -337,6 +340,7 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, struct hclge_pri_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; + u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING : HCLGE_OPC_TM_PRI_C_SHAPPING; @@ -347,11 +351,13 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pri_id = pri_id; - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, bs_b); + hclge_tm_set_field(shapping_para, BS_S, bs_s); + + shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index c2a0537c649f..aa73855366b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -761,7 +761,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, if (type == DESC_TYPE_SKB) { skb = (struct sk_buff *)priv; - paylen = cpu_to_le16(skb->len); + paylen = skb->len; if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_reset_mac_len(skb); @@ -795,7 +795,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, cpu_to_le32(ol_type_vlan_len_msec); desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso); - desc->tx.paylen = cpu_to_le16(paylen); + desc->tx.paylen = cpu_to_le32(paylen); desc->tx.mss = cpu_to_le16(mss); } From 1db9b1bf82b98cb265ed227dec3732aafae0eb0c Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 9 Oct 2017 15:44:01 +0800 Subject: [PATCH 0713/2177] net: hns3: Cleanup for non-static function in hns3 driver This patch fixes the following warning from sparse: warning: symbol 'hns3_set_multicast_list' was not declared. Should it be static. hns3_set_multicast_list turns out to be not used, so delete it. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.c | 4 ++-- .../hisilicon/hns3/hns3pf/hclge_main.c | 14 ++++++------- .../hisilicon/hns3/hns3pf/hns3_enet.c | 20 ++++--------------- .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 4 ++-- 4 files changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 05985d81dda0..8ecd80744767 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -276,8 +276,8 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) return retval; } -enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw, - u32 *version) +static enum hclge_cmd_status hclge_cmd_query_firmware_version( + struct hclge_hw *hw, u32 *version) { struct hclge_query_version_cmd *resp; struct hclge_desc desc; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6115c2f730ce..c91c779aeeed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1550,8 +1550,8 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev, * @buf_alloc: pointer to buffer calculation data * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev, - struct hclge_pkt_buf_alloc *buf_alloc) +static int hclge_rx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; @@ -2828,9 +2828,9 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, return 0; } -int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle, - int vector, - struct hnae3_ring_chain_node *ring_chain) +static int hclge_map_handle_ring_to_vector( + struct hnae3_handle *handle, int vector, + struct hnae3_ring_chain_node *ring_chain) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -3206,8 +3206,8 @@ static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req, new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff); } -u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport, - const u8 *addr) +static u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport, + const u8 *addr) { u16 high_val = addr[1] | (addr[0] << 8); struct hclge_dev *hdev = vport->back; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index aa73855366b3..26bbc91add65 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -24,7 +24,7 @@ #include "hnae3.h" #include "hns3_enet.h" -const char hns3_driver_name[] = "hns3"; +static const char hns3_driver_name[] = "hns3"; const char hns3_driver_version[] = VERMAGIC_STRING; static const char hns3_driver_string[] = "Hisilicon Ethernet Network Driver for Hip08 Family"; @@ -304,18 +304,6 @@ static int hns3_nic_net_stop(struct net_device *netdev) return 0; } -void hns3_set_multicast_list(struct net_device *netdev) -{ - struct hnae3_handle *h = hns3_get_handle(netdev); - struct netdev_hw_addr *ha = NULL; - - if (h->ae_algo->ops->set_mc_addr) { - netdev_for_each_mc_addr(ha, netdev) - if (h->ae_algo->ops->set_mc_addr(h, ha->addr)) - netdev_err(netdev, "set multicast fail\n"); - } -} - static int hns3_nic_uc_sync(struct net_device *netdev, const unsigned char *addr) { @@ -360,7 +348,7 @@ static int hns3_nic_mc_unsync(struct net_device *netdev, return 0; } -void hns3_nic_set_rx_mode(struct net_device *netdev) +static void hns3_nic_set_rx_mode(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -2596,7 +2584,7 @@ static void hns3_fini_ring(struct hns3_enet_ring *ring) ring->next_to_use = 0; } -int hns3_buf_size2type(u32 buf_size) +static int hns3_buf_size2type(u32 buf_size) { int bd_size_type; @@ -2908,7 +2896,7 @@ err_out: return ret; } -const struct hnae3_client_ops client_ops = { +static const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index a892a157f346..060bacebf86a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -215,8 +215,8 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data) * @stats: statistics info. * @data: statistics data. */ -void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats, - u64 *data) +static void hns3_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) { struct hnae3_handle *h = hns3_get_handle(netdev); u64 *p = data; From 3a4b0129bf33caca5743891906393f17a2224d44 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:03 -0700 Subject: [PATCH 0714/2177] nfp: output control messages to trace_devlink_hwmsg() Use standard devlink trace point to allow tracing of control messages. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_app.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index af640b5c2108..857bb33020ba 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -36,6 +36,8 @@ #include +#include + #include "nfp_net_repr.h" struct bpf_prog; @@ -271,11 +273,17 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { + trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, + skb->data, skb->len); + return nfp_ctrl_tx(app->ctrl, skb); } static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) { + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, + skb->data, skb->len); + app->type->ctrl_msg_rx(app, skb); } From a52b35c39ec6f33592df634ef2d1afae23401fdd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:04 -0700 Subject: [PATCH 0715/2177] nfp: bpf: lift the single-port limitation Limiting the eBPF offload to a single port was a workaround required for the PoC application FW which has not been released externally. It's not necessary any more. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index be2cf10a2cd7..074726980994 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -89,14 +89,6 @@ nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) struct nfp_net_bpf_priv *priv; int ret; - /* Limit to single port, otherwise it's just a NIC */ - if (id > 0) { - nfp_warn(app->cpp, - "BPF NIC doesn't support more than one port right now\n"); - nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev); - return PTR_ERR_OR_ZERO(nn->port); - } - priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; From b3f868df3c8904e964d7b257b47d7d90d93375e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:05 -0700 Subject: [PATCH 0716/2177] nfp: bpf: use the power of sparse to check we encode registers right Define a new __bitwise type for software representation of registers. This will allow us to catch incorrect parameter types using sparse. Accessors we define also allow us to return correct enum type and therefore ensure all switches handle all register types. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 99 ++++++++++--------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 24 +---- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 45 +++++++++ 3 files changed, 99 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 239dfbe8a0a1..7e8cdfb39607 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -128,11 +128,11 @@ struct nfp_insn_re_regs { bool i8; }; -static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) { - u16 val = FIELD_GET(NN_REG_VAL, swreg); + u16 val = swreg_value(reg); - switch (FIELD_GET(NN_REG_TYPE, swreg)) { + switch (swreg_type(reg)) { case NN_REG_GPR_A: case NN_REG_GPR_B: case NN_REG_GPR_BOTH: @@ -149,33 +149,34 @@ static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) return UR_REG_IMM_encode(val); case NN_REG_NONE: return is_dst ? UR_REG_NO_DST : REG_NONE; - default: - pr_err("unrecognized reg encoding %08x\n", swreg); - return 0; } + + pr_err("unrecognized reg encoding %08x\n", reg); + return 0; } static int -swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg) { memset(reg, 0, sizeof(*reg)); /* Decode destination */ - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + if (swreg_type(dst) == NN_REG_IMM) return -EFAULT; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + if (swreg_type(dst) == NN_REG_GPR_B) reg->dst_ab = ALU_DST_B; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + if (swreg_type(dst) == NN_REG_GPR_BOTH) reg->wr_both = true; reg->dst = nfp_swreg_to_unreg(dst, true); /* Decode source operands */ - if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + if (swreg_type(lreg) == swreg_type(rreg)) return -EFAULT; - if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || - FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { reg->areg = nfp_swreg_to_unreg(rreg, false); reg->breg = nfp_swreg_to_unreg(lreg, false); reg->swap = true; @@ -187,11 +188,11 @@ swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) return 0; } -static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) { - u16 val = FIELD_GET(NN_REG_VAL, swreg); + u16 val = swreg_value(reg); - switch (FIELD_GET(NN_REG_TYPE, swreg)) { + switch (swreg_type(reg)) { case NN_REG_GPR_A: case NN_REG_GPR_B: case NN_REG_GPR_BOTH: @@ -207,34 +208,37 @@ static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) return RE_REG_IMM_encode(val & 0x7f); case NN_REG_NONE: return is_dst ? RE_REG_NO_DST : REG_NONE; - default: - pr_err("unrecognized reg encoding\n"); + case NN_REG_NNR: + pr_err("NNRs used with restricted encoding\n"); return 0; } + + pr_err("unrecognized reg encoding\n"); + return 0; } static int -swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, - bool has_imm8) +swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8) { memset(reg, 0, sizeof(*reg)); /* Decode destination */ - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + if (swreg_type(dst) == NN_REG_IMM) return -EFAULT; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + if (swreg_type(dst) == NN_REG_GPR_B) reg->dst_ab = ALU_DST_B; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + if (swreg_type(dst) == NN_REG_GPR_BOTH) reg->wr_both = true; reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); /* Decode source operands */ - if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + if (swreg_type(lreg) == swreg_type(rreg)) return -EFAULT; - if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || - FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); reg->swap = true; @@ -281,7 +285,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, static void emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, - u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) + u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync) { struct nfp_insn_re_regs reg; int err; @@ -364,7 +368,7 @@ __emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, static void emit_br_byte_neq(struct nfp_prog *nfp_prog, - u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) + swreg dst, u8 imm, u8 byte, u16 addr, u8 defer) { struct nfp_insn_re_regs reg; int err; @@ -399,13 +403,13 @@ __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, } static void -emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, +emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, enum immed_width width, bool invert, enum immed_shift shift) { struct nfp_insn_ur_regs reg; int err; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + if (swreg_type(dst) == NN_REG_IMM) { nfp_prog->error = -EFAULT; return; } @@ -451,8 +455,8 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, } static void -emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, - enum shf_sc sc, u8 shift) +emit_shf(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) { struct nfp_insn_re_regs reg; int err; @@ -486,7 +490,8 @@ __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, } static void -emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +emit_alu(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum alu_op op, swreg rreg) { struct nfp_insn_ur_regs reg; int err; @@ -524,7 +529,7 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, static void emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, - u32 dst, u8 bmask, u32 src, bool zero) + swreg dst, u8 bmask, swreg src, bool zero) { struct nfp_insn_re_regs reg; int err; @@ -540,7 +545,7 @@ emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, } static void -emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, +emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, enum shf_sc sc, u8 shift) { emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); @@ -565,7 +570,7 @@ static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) return true; } -static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) { enum immed_shift shift; u16 val; @@ -586,7 +591,7 @@ static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) * If the @imm is small enough encode it directly in operand and return * otherwise load @imm to a spare register and return its encoding. */ -static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) { if (FIELD_FIT(UR_REG_IMM_MAX, imm)) return reg_imm(imm); @@ -599,7 +604,7 @@ static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) * If the @imm is small enough encode it directly in operand and return * otherwise load @imm to a spare register and return its encoding. */ -static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) { if (FIELD_FIT(RE_REG_IMM_MAX, imm)) return reg_imm(imm); @@ -629,7 +634,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, { unsigned int i; u16 shift, sz; - u32 tmp_reg; + swreg tmp_reg; /* We load the value from the address indicated in @offset and then * shift out the data we don't need. Note: this is big endian! @@ -697,7 +702,7 @@ static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { - u32 tmp_reg; + swreg tmp_reg; if (alu_op == ALU_OP_AND) { if (!imm) @@ -815,7 +820,7 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ u8 reg = insn->dst_reg * 2; - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1139,7 +1144,7 @@ static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - u32 dst = reg_both(meta->insn.dst_reg * 2); + swreg dst = reg_both(meta->insn.dst_reg * 2); if (meta->insn.off != offsetof(struct xdp_md, data) && meta->insn.off != offsetof(struct xdp_md, data_end)) @@ -1202,8 +1207,10 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); - u32 tmp_reg; + swreg or1, or2, tmp_reg; + + or1 = reg_a(insn->dst_reg * 2); + or2 = reg_b(insn->dst_reg * 2 + 1); if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1252,7 +1259,7 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1283,7 +1290,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 4051e943f363..ccc3dbea25f6 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -39,6 +39,7 @@ #include #include +#include "../nfp_asm.h" #include "../nfp_net.h" /* For branch fixup logic use up-most byte of branch instruction as scratch @@ -65,29 +66,6 @@ enum nfp_bpf_action_type { NN_ACT_XDP, }; -/* Software register representation, hardware encoding in asm.h */ -#define NN_REG_TYPE GENMASK(31, 24) -#define NN_REG_VAL GENMASK(7, 0) - -enum nfp_bpf_reg_type { - NN_REG_GPR_A = BIT(0), - NN_REG_GPR_B = BIT(1), - NN_REG_NNR = BIT(2), - NN_REG_XFER = BIT(3), - NN_REG_IMM = BIT(4), - NN_REG_NONE = BIT(5), -}; - -#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) - -#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) -#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) -#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) -#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) -#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) -#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) -#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) - #define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) #define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) #define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index d2b535739d2b..9b9d5d18ee20 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -34,6 +34,7 @@ #ifndef __NFP_ASM_H__ #define __NFP_ASM_H__ 1 +#include #include #define REG_NONE 0 @@ -230,4 +231,48 @@ enum lcsr_wr_src { #define OP_CARB_BASE 0x0e000000000ULL #define OP_CARB_OR 0x00000010000ULL +/* Software register representation, independent of operand type */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_GPR_BOTH = NN_REG_GPR_A | NN_REG_GPR_B, + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define reg_both(x) __enc_swreg((x), NN_REG_GPR_BOTH) +#define reg_a(x) __enc_swreg((x), NN_REG_GPR_A) +#define reg_b(x) __enc_swreg((x), NN_REG_GPR_B) +#define reg_nnr(x) __enc_swreg((x), NN_REG_NNR) +#define reg_xfer(x) __enc_swreg((x), NN_REG_XFER) +#define reg_imm(x) __enc_swreg((x), NN_REG_IMM) +#define reg_none() __enc_swreg(0, NN_REG_NONE) + +typedef __u32 __bitwise swreg; + +static inline swreg __enc_swreg(u16 id, u8 type) +{ + return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type)); +} + +static inline u32 swreg_raw(swreg reg) +{ + return (__force u32)reg; +} + +static inline enum nfp_bpf_reg_type swreg_type(swreg reg) +{ + return FIELD_GET(NN_REG_TYPE, swreg_raw(reg)); +} + +static inline u16 swreg_value(swreg reg) +{ + return FIELD_GET(NN_REG_VAL, swreg_raw(reg)); +} + #endif From 2a15bb1aba2bfca0a69cdbb113def57afd5666ab Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:06 -0700 Subject: [PATCH 0717/2177] nfp: bpf: move software reg helpers and cmd table out of translator Move the software reg helpers and some static data to nfp_asm.c. They are related to the previous patch, but move is done in a separate commit for ease of review. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 1 + drivers/net/ethernet/netronome/nfp/bpf/jit.c | 147 ---------------- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 167 +++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 24 +++ 4 files changed, 192 insertions(+), 147 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index becaacf1554d..bd3b2bd408bc 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -14,6 +14,7 @@ nfp-objs := \ nfpcore/nfp_resource.o \ nfpcore/nfp_rtsym.o \ nfpcore/nfp_target.o \ + nfp_asm.o \ nfp_app.o \ nfp_app_nic.o \ nfp_devlink.o \ diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 7e8cdfb39607..7bcff007075c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -110,154 +110,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) return offset - nfp_prog->start_off; } -/* --- SW reg --- */ -struct nfp_insn_ur_regs { - enum alu_dst_ab dst_ab; - u16 dst; - u16 areg, breg; - bool swap; - bool wr_both; -}; - -struct nfp_insn_re_regs { - enum alu_dst_ab dst_ab; - u8 dst; - u8 areg, breg; - bool swap; - bool wr_both; - bool i8; -}; - -static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) -{ - u16 val = swreg_value(reg); - - switch (swreg_type(reg)) { - case NN_REG_GPR_A: - case NN_REG_GPR_B: - case NN_REG_GPR_BOTH: - return val; - case NN_REG_NNR: - return UR_REG_NN | val; - case NN_REG_XFER: - return UR_REG_XFR | val; - case NN_REG_IMM: - if (val & ~0xff) { - pr_err("immediate too large\n"); - return 0; - } - return UR_REG_IMM_encode(val); - case NN_REG_NONE: - return is_dst ? UR_REG_NO_DST : REG_NONE; - } - - pr_err("unrecognized reg encoding %08x\n", reg); - return 0; -} - -static int -swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, - struct nfp_insn_ur_regs *reg) -{ - memset(reg, 0, sizeof(*reg)); - - /* Decode destination */ - if (swreg_type(dst) == NN_REG_IMM) - return -EFAULT; - - if (swreg_type(dst) == NN_REG_GPR_B) - reg->dst_ab = ALU_DST_B; - if (swreg_type(dst) == NN_REG_GPR_BOTH) - reg->wr_both = true; - reg->dst = nfp_swreg_to_unreg(dst, true); - - /* Decode source operands */ - if (swreg_type(lreg) == swreg_type(rreg)) - return -EFAULT; - - if (swreg_type(lreg) == NN_REG_GPR_B || - swreg_type(rreg) == NN_REG_GPR_A) { - reg->areg = nfp_swreg_to_unreg(rreg, false); - reg->breg = nfp_swreg_to_unreg(lreg, false); - reg->swap = true; - } else { - reg->areg = nfp_swreg_to_unreg(lreg, false); - reg->breg = nfp_swreg_to_unreg(rreg, false); - } - - return 0; -} - -static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) -{ - u16 val = swreg_value(reg); - - switch (swreg_type(reg)) { - case NN_REG_GPR_A: - case NN_REG_GPR_B: - case NN_REG_GPR_BOTH: - return val; - case NN_REG_XFER: - return RE_REG_XFR | val; - case NN_REG_IMM: - if (val & ~(0x7f | has_imm8 << 7)) { - pr_err("immediate too large\n"); - return 0; - } - *i8 = val & 0x80; - return RE_REG_IMM_encode(val & 0x7f); - case NN_REG_NONE: - return is_dst ? RE_REG_NO_DST : REG_NONE; - case NN_REG_NNR: - pr_err("NNRs used with restricted encoding\n"); - return 0; - } - - pr_err("unrecognized reg encoding\n"); - return 0; -} - -static int -swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, - struct nfp_insn_re_regs *reg, bool has_imm8) -{ - memset(reg, 0, sizeof(*reg)); - - /* Decode destination */ - if (swreg_type(dst) == NN_REG_IMM) - return -EFAULT; - - if (swreg_type(dst) == NN_REG_GPR_B) - reg->dst_ab = ALU_DST_B; - if (swreg_type(dst) == NN_REG_GPR_BOTH) - reg->wr_both = true; - reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); - - /* Decode source operands */ - if (swreg_type(lreg) == swreg_type(rreg)) - return -EFAULT; - - if (swreg_type(lreg) == NN_REG_GPR_B || - swreg_type(rreg) == NN_REG_GPR_A) { - reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); - reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); - reg->swap = true; - } else { - reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); - reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); - } - - return 0; -} - /* --- Emitters --- */ -static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { - [CMD_TGT_WRITE8] = { 0x00, 0x42 }, - [CMD_TGT_READ8] = { 0x01, 0x43 }, - [CMD_TGT_READ_LE] = { 0x01, 0x40 }, - [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, -}; - static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c new file mode 100644 index 000000000000..4c9201bf9331 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2016-2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "nfp_asm.h" + +const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) +{ + u16 val = swreg_value(reg); + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + } + + pr_err("unrecognized reg encoding %08x\n", reg); + return 0; +} + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg)) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = swreg_value(reg); + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + case NN_REG_NNR: + pr_err("NNRs used with restricted encoding\n"); + return 0; + } + + pr_err("unrecognized reg encoding\n"); + return 0; +} + +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg)) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 9b9d5d18ee20..8e87c0676c30 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -205,6 +205,8 @@ enum cmd_tgt_map { __CMD_TGT_MAP_SIZE, }; +extern const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE]; + enum cmd_mode { CMD_MODE_40b_AB = 0, CMD_MODE_40b_BA = 1, @@ -275,4 +277,26 @@ static inline u16 swreg_value(swreg reg) return FIELD_GET(NN_REG_VAL, swreg_raw(reg)); } +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg); +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8); + #endif From 3cae13193381fd4cb87791174d4c9fdf5b7025ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:07 -0700 Subject: [PATCH 0718/2177] nfp: bpf: encode all 64bit shifts Add encodings of all 64bit shift operations. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 44 +++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 7bcff007075c..095cf50e8450 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -825,12 +825,24 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - if (insn->imm != 32) - return 1; /* TODO */ - - wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + if (insn->imm < 32) { + emit_shf(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), + SHF_SC_R_DSHF, 32 - insn->imm); + emit_shf(nfp_prog, reg_both(dst), + reg_none(), SHF_OP_NONE, reg_b(dst), + SHF_SC_L_SHF, insn->imm); + } else if (insn->imm == 32) { + wrp_reg_mov(nfp_prog, dst + 1, dst); + wrp_immed(nfp_prog, reg_both(dst), 0); + } else if (insn->imm > 32) { + emit_shf(nfp_prog, reg_both(dst + 1), + reg_none(), SHF_OP_NONE, reg_b(dst), + SHF_SC_L_SHF, insn->imm - 32); + wrp_immed(nfp_prog, reg_both(dst), 0); + } return 0; } @@ -838,12 +850,24 @@ static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - if (insn->imm != 32) - return 1; /* TODO */ - - wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + if (insn->imm < 32) { + emit_shf(nfp_prog, reg_both(dst), + reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), + SHF_SC_R_DSHF, insn->imm); + emit_shf(nfp_prog, reg_both(dst + 1), + reg_none(), SHF_OP_NONE, reg_b(dst + 1), + SHF_SC_R_SHF, insn->imm); + } else if (insn->imm == 32) { + wrp_reg_mov(nfp_prog, dst, dst + 1); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else if (insn->imm > 32) { + emit_shf(nfp_prog, reg_both(dst), + reg_none(), SHF_OP_NONE, reg_b(dst + 1), + SHF_SC_R_SHF, insn->imm - 32); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } return 0; } From 226e0e94ce3575bd9ca85f90957516ac1dff5bf3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:08 -0700 Subject: [PATCH 0719/2177] nfp: bpf: remove register rename Remove the register renumbering optimization. To implement calling map and other helpers we need more strict register layout. We can't freely reassign register numbers. This will have the effect of running in 4 context/thread mode, which should be OK since we are moving towards integrating the BPF closer with FW app datapath anyway, and the target datapath itself runs in 4 context mode. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 46 ++------------------ 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 095cf50e8450..469dc8a055f2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1621,38 +1621,6 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) } } -/* Try to rename registers so that program uses only low ones */ -static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) -{ - bool reg_used[MAX_BPF_REG] = {}; - u8 tgt_reg[MAX_BPF_REG] = {}; - struct nfp_insn_meta *meta; - unsigned int i, j; - - list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) - continue; - - reg_used[meta->insn.src_reg] = true; - reg_used[meta->insn.dst_reg] = true; - } - - for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { - if (!reg_used[i]) - continue; - - tgt_reg[i] = j++; - } - nfp_prog->num_regs = j; - - list_for_each_entry(meta, &nfp_prog->insns, l) { - meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; - meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; - } - - return 0; -} - /* Remove masking after load since our load guarantees this is not needed */ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { @@ -1729,14 +1697,8 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) { - int ret; - nfp_bpf_opt_reg_init(nfp_prog); - ret = nfp_bpf_opt_reg_rename(nfp_prog); - if (ret) - return ret; - nfp_bpf_opt_ld_mask(nfp_prog); nfp_bpf_opt_ld_shift(nfp_prog); @@ -1783,10 +1745,8 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, if (ret) goto out; - if (nfp_prog->num_regs <= 7) - nfp_prog->regs_per_thread = 16; - else - nfp_prog->regs_per_thread = 32; + nfp_prog->num_regs = MAX_BPF_REG; + nfp_prog->regs_per_thread = 32; nfp_prog->prog = prog_mem; nfp_prog->__prog_alloc_len = prog_sz; @@ -1799,7 +1759,7 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, } res->n_instr = nfp_prog->prog_len; - res->dense_mode = nfp_prog->num_regs <= 7; + res->dense_mode = false; out: nfp_prog_free(nfp_prog); From 509144e25049831ffe94160b1f03cf1b900aaa3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:09 -0700 Subject: [PATCH 0720/2177] nfp: bpf: remove packet marking support Temporarily drop support for skb->mark. We are primarily focusing on XDP offload, and implementing skb->mark on the new datapath has lower priority. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 13 ------------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 +-- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 469dc8a055f2..4fa220f710d2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -542,16 +542,6 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) return construct_data_ind_ld(nfp_prog, offset, 0, false, size); } -static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) -{ - emit_alu(nfp_prog, NFP_BPF_ABI_MARK, - reg_none(), ALU_OP_NONE, reg_b(src)); - emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, - NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); - - return 0; -} - static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1053,9 +1043,6 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - if (meta->insn.off == offsetof(struct sk_buff, mark)) - return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); - return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index ccc3dbea25f6..7d959757a51a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -71,9 +71,8 @@ enum nfp_bpf_action_type { #define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) #define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) -#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAGS reg_imm(0) #define NFP_BPF_ABI_FLAG_MARK 1 -#define NFP_BPF_ABI_MARK reg_nnr(1) #define NFP_BPF_ABI_PKT reg_nnr(2) #define NFP_BPF_ABI_LEN reg_nnr(3) From 8afd9c961e95b1529cbc2b2b9c063a488659b337 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:10 -0700 Subject: [PATCH 0721/2177] nfp: add more white space to the instruction defines We need to add longer OP_* defines, move the values away. Purely whitespace commit. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 150 +++++++++---------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 8e87c0676c30..63cfd07da34e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -53,14 +53,14 @@ #define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) #define UR_REG_IMM_MAX 0x0ffULL -#define OP_BR_BASE 0x0d800000020ULL -#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL -#define OP_BR_MASK 0x0000000001fULL -#define OP_BR_EV_PIP 0x00000000300ULL -#define OP_BR_CSS 0x0000003c000ULL -#define OP_BR_DEFBR 0x00000300000ULL -#define OP_BR_ADDR_LO 0x007ffc00000ULL -#define OP_BR_ADDR_HI 0x10000000000ULL +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL #define nfp_is_br(_insn) \ (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) @@ -83,30 +83,30 @@ enum br_ctx_signal_state { BR_CSS_NONE = 2, }; -#define OP_BBYTE_BASE 0x0c800000000ULL -#define OP_BB_A_SRC 0x000000000ffULL -#define OP_BB_BYTE 0x00000000300ULL -#define OP_BB_B_SRC 0x0000003fc00ULL -#define OP_BB_I8 0x00000040000ULL -#define OP_BB_EQ 0x00000080000ULL -#define OP_BB_DEFBR 0x00000300000ULL -#define OP_BB_ADDR_LO 0x007ffc00000ULL -#define OP_BB_ADDR_HI 0x10000000000ULL +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL -#define OP_BALU_BASE 0x0e800000000ULL -#define OP_BA_A_SRC 0x000000003ffULL -#define OP_BA_B_SRC 0x000000ffc00ULL -#define OP_BA_DEFBR 0x00000300000ULL -#define OP_BA_ADDR_HI 0x0007fc00000ULL +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL -#define OP_IMMED_A_SRC 0x000000003ffULL -#define OP_IMMED_B_SRC 0x000000ffc00ULL -#define OP_IMMED_IMM 0x0000ff00000ULL -#define OP_IMMED_WIDTH 0x00060000000ULL -#define OP_IMMED_INV 0x00080000000ULL -#define OP_IMMED_SHIFT 0x00600000000ULL -#define OP_IMMED_BASE 0x0f000000000ULL -#define OP_IMMED_WR_AB 0x20000000000ULL +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL enum immed_width { IMMED_WIDTH_ALL = 0, @@ -120,17 +120,17 @@ enum immed_shift { IMMED_SHIFT_2B = 2, }; -#define OP_SHF_BASE 0x08000000000ULL -#define OP_SHF_A_SRC 0x000000000ffULL -#define OP_SHF_SC 0x00000000300ULL -#define OP_SHF_B_SRC 0x0000003fc00ULL -#define OP_SHF_I8 0x00000040000ULL -#define OP_SHF_SW 0x00000080000ULL -#define OP_SHF_DST 0x0000ff00000ULL -#define OP_SHF_SHIFT 0x001f0000000ULL -#define OP_SHF_OP 0x00e00000000ULL -#define OP_SHF_DST_AB 0x01000000000ULL -#define OP_SHF_WR_AB 0x20000000000ULL +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL enum shf_op { SHF_OP_NONE = 0, @@ -145,14 +145,14 @@ enum shf_sc { SHF_SC_R_DSHF = 3, }; -#define OP_ALU_A_SRC 0x000000003ffULL -#define OP_ALU_B_SRC 0x000000ffc00ULL -#define OP_ALU_DST 0x0003ff00000ULL -#define OP_ALU_SW 0x00040000000ULL -#define OP_ALU_OP 0x00f80000000ULL -#define OP_ALU_DST_AB 0x01000000000ULL -#define OP_ALU_BASE 0x0a000000000ULL -#define OP_ALU_WR_AB 0x20000000000ULL +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL enum alu_op { ALU_OP_NONE = 0x00, @@ -171,26 +171,26 @@ enum alu_dst_ab { ALU_DST_B = 1, }; -#define OP_LDF_BASE 0x0c000000000ULL -#define OP_LDF_A_SRC 0x000000000ffULL -#define OP_LDF_SC 0x00000000300ULL -#define OP_LDF_B_SRC 0x0000003fc00ULL -#define OP_LDF_I8 0x00000040000ULL -#define OP_LDF_SW 0x00000080000ULL -#define OP_LDF_ZF 0x00000100000ULL -#define OP_LDF_BMASK 0x0000f000000ULL -#define OP_LDF_SHF 0x001f0000000ULL -#define OP_LDF_WR_AB 0x20000000000ULL +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL -#define OP_CMD_A_SRC 0x000000000ffULL -#define OP_CMD_CTX 0x00000000300ULL -#define OP_CMD_B_SRC 0x0000003fc00ULL -#define OP_CMD_TOKEN 0x000000c0000ULL -#define OP_CMD_XFER 0x00001f00000ULL -#define OP_CMD_CNT 0x0000e000000ULL -#define OP_CMD_SIG 0x000f0000000ULL -#define OP_CMD_TGT_CMD 0x07f00000000ULL -#define OP_CMD_MODE 0x1c0000000000ULL +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL struct cmd_tgt_act { u8 token; @@ -218,11 +218,11 @@ enum cmd_ctx_swap { CMD_CTX_NO_SWAP = 3, }; -#define OP_LCSR_BASE 0x0fc00000000ULL -#define OP_LCSR_A_SRC 0x000000003ffULL -#define OP_LCSR_B_SRC 0x000000ffc00ULL -#define OP_LCSR_WRITE 0x00000200000ULL -#define OP_LCSR_ADDR 0x001ffc00000ULL +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL enum lcsr_wr_src { LCSR_WR_AREG, @@ -230,8 +230,8 @@ enum lcsr_wr_src { LCSR_WR_IMM, }; -#define OP_CARB_BASE 0x0e000000000ULL -#define OP_CARB_OR 0x00000010000ULL +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL /* Software register representation, independent of operand type */ #define NN_REG_TYPE GENMASK(31, 24) From 9f15d0f438372986b0f9de36f805fe2dd83f9c27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:11 -0700 Subject: [PATCH 0722/2177] nfp: bpf: encode LMEM accesses NFP LMEM is a large, indirectly accessed register file. There are two basic indirect access registers. Each access operation may either use offset (up to 8 or 16 words) or perform post decrement/increment. Add encodings of LMEM indexes as instruction operands. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 44 ++++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 41 ++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 4c9201bf9331..4bcab43da16d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) { + bool lm_id, lm_dec = false; u16 val = swreg_value(reg); switch (swreg_type(reg)) { @@ -59,6 +60,33 @@ static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) return UR_REG_NN | val; case NN_REG_XFER: return UR_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + switch (swreg_lm_mode(reg)) { + case NN_LM_MOD_NONE: + if (val & ~UR_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + return UR_REG_LM | FIELD_PREP(UR_REG_LM_IDX, lm_id) | + val; + case NN_LM_MOD_DEC: + lm_dec = true; + /* fall through */ + case NN_LM_MOD_INC: + if (val) { + pr_err("LM offset in inc/dev mode\n"); + return 0; + } + return UR_REG_LM | UR_REG_LM_POST_MOD | + FIELD_PREP(UR_REG_LM_IDX, lm_id) | + FIELD_PREP(UR_REG_LM_POST_MOD_DEC, lm_dec); + default: + pr_err("bad LM mode for unrestricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } case NN_REG_IMM: if (val & ~0xff) { pr_err("immediate too large\n"); @@ -108,6 +136,7 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) { u16 val = swreg_value(reg); + bool lm_id; switch (swreg_type(reg)) { case NN_REG_GPR_A: @@ -116,6 +145,21 @@ static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) return val; case NN_REG_XFER: return RE_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + if (swreg_lm_mode(reg) != NN_LM_MOD_NONE) { + pr_err("bad LM mode for restricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } + + if (val & ~RE_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + + return RE_REG_LM | FIELD_PREP(RE_REG_LM_IDX, lm_id) | val; case NN_REG_IMM: if (val & ~(0x7f | has_imm8 << 7)) { pr_err("immediate too large\n"); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 63cfd07da34e..d722f6878bd8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -44,9 +44,17 @@ #define RE_REG_IMM_encode(x) \ (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) #define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_LM 0x050 +#define RE_REG_LM_IDX 0x008 +#define RE_REG_LM_IDX_MAX 0x7 #define RE_REG_XFR 0x080 #define UR_REG_XFR 0x180 +#define UR_REG_LM 0x200 +#define UR_REG_LM_IDX 0x020 +#define UR_REG_LM_POST_MOD 0x010 +#define UR_REG_LM_POST_MOD_DEC 0x001 +#define UR_REG_LM_IDX_MAX 0xf #define UR_REG_NN 0x280 #define UR_REG_NO_DST 0x300 #define UR_REG_IMM UR_REG_NO_DST @@ -235,6 +243,8 @@ enum lcsr_wr_src { /* Software register representation, independent of operand type */ #define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_LM_IDX BIT(22) +#define NN_REG_LM_MOD GENMASK(21, 20) #define NN_REG_VAL GENMASK(7, 0) enum nfp_bpf_reg_type { @@ -245,6 +255,13 @@ enum nfp_bpf_reg_type { NN_REG_XFER = BIT(3), NN_REG_IMM = BIT(4), NN_REG_NONE = BIT(5), + NN_REG_LMEM = BIT(6), +}; + +enum nfp_bpf_lm_mode { + NN_LM_MOD_NONE = 0, + NN_LM_MOD_INC, + NN_LM_MOD_DEC, }; #define reg_both(x) __enc_swreg((x), NN_REG_GPR_BOTH) @@ -254,6 +271,10 @@ enum nfp_bpf_reg_type { #define reg_xfer(x) __enc_swreg((x), NN_REG_XFER) #define reg_imm(x) __enc_swreg((x), NN_REG_IMM) #define reg_none() __enc_swreg(0, NN_REG_NONE) +#define reg_lm(x, off) __enc_swreg_lm((x), NN_LM_MOD_NONE, (off)) +#define reg_lm_inc(x) __enc_swreg_lm((x), NN_LM_MOD_INC, 0) +#define reg_lm_dec(x) __enc_swreg_lm((x), NN_LM_MOD_DEC, 0) +#define __reg_lm(x, mod, off) __enc_swreg_lm((x), (mod), (off)) typedef __u32 __bitwise swreg; @@ -262,6 +283,16 @@ static inline swreg __enc_swreg(u16 id, u8 type) return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type)); } +static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off) +{ + WARN_ON(id > 1 || (off && mode != NN_LM_MOD_NONE)); + + return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) | + FIELD_PREP(NN_REG_LM_IDX, id) | + FIELD_PREP(NN_REG_LM_MOD, mode) | + off); +} + static inline u32 swreg_raw(swreg reg) { return (__force u32)reg; @@ -277,6 +308,16 @@ static inline u16 swreg_value(swreg reg) return FIELD_GET(NN_REG_VAL, swreg_raw(reg)); } +static inline bool swreg_lm_idx(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX, swreg_raw(reg)); +} + +static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg) +{ + return FIELD_GET(NN_REG_LM_MOD, swreg_raw(reg)); +} + struct nfp_insn_ur_regs { enum alu_dst_ab dst_ab; u16 dst; From 995e101ffa71eff6ae5f5d5bf1ca8ec757b4ed21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:12 -0700 Subject: [PATCH 0723/2177] nfp: bpf: encode extended LM pointer operands Most instructions have special fields which allow switching between base and extended Local Memory pointers. Introduce those to register encoding, we will use the extra LM pointers to access high addresses of the stack. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 56 ++++++++++++++------ drivers/net/ethernet/netronome/nfp/nfp_asm.c | 6 +++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 28 ++++++++-- 3 files changed, 70 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 4fa220f710d2..d7dc19feba8d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -153,6 +153,11 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, nfp_prog->error = -EFAULT; return; } + if (reg.dst_lmextn || reg.src_lmextn) { + pr_err("cmd can't use LMextn\n"); + nfp_prog->error = -EFAULT; + return; + } __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); } @@ -198,7 +203,7 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) static void __emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, - u8 byte, bool equal, u16 addr, u8 defer) + u8 byte, bool equal, u16 addr, u8 defer, bool src_lmextn) { u16 addr_lo, addr_hi; u64 insn; @@ -214,32 +219,34 @@ __emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, FIELD_PREP(OP_BB_EQ, equal) | FIELD_PREP(OP_BB_DEFBR, defer) | FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | - FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + FIELD_PREP(OP_BB_ADDR_HI, addr_hi) | + FIELD_PREP(OP_BB_SRC_LMEXTN, src_lmextn); nfp_prog_push(nfp_prog, insn); } static void emit_br_byte_neq(struct nfp_prog *nfp_prog, - swreg dst, u8 imm, u8 byte, u16 addr, u8 defer) + swreg src, u8 imm, u8 byte, u16 addr, u8 defer) { struct nfp_insn_re_regs reg; int err; - err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + err = swreg_to_restricted(reg_none(), src, reg_imm(imm), ®, true); if (err) { nfp_prog->error = err; return; } __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, - defer); + defer, reg.src_lmextn); } static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, - enum immed_shift shift, bool wr_both) + enum immed_shift shift, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -250,7 +257,9 @@ __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, FIELD_PREP(OP_IMMED_WIDTH, width) | FIELD_PREP(OP_IMMED_INV, invert) | FIELD_PREP(OP_IMMED_SHIFT, shift) | - FIELD_PREP(OP_IMMED_WR_AB, wr_both); + FIELD_PREP(OP_IMMED_WR_AB, wr_both) | + FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } @@ -274,13 +283,15 @@ emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, } __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, - invert, shift, reg.wr_both); + invert, shift, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, enum shf_sc sc, u8 shift, - u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -302,7 +313,9 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, FIELD_PREP(OP_SHF_SHIFT, shift) | FIELD_PREP(OP_SHF_OP, op) | FIELD_PREP(OP_SHF_DST_AB, dst_ab) | - FIELD_PREP(OP_SHF_WR_AB, wr_both); + FIELD_PREP(OP_SHF_WR_AB, wr_both) | + FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } @@ -321,12 +334,14 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst, } __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, - reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, - u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -337,7 +352,9 @@ __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, FIELD_PREP(OP_ALU_SW, swap) | FIELD_PREP(OP_ALU_OP, op) | FIELD_PREP(OP_ALU_DST_AB, dst_ab) | - FIELD_PREP(OP_ALU_WR_AB, wr_both); + FIELD_PREP(OP_ALU_WR_AB, wr_both) | + FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } @@ -356,13 +373,15 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst, } __emit_alu(nfp_prog, reg.dst, reg.dst_ab, - reg.areg, op, reg.breg, reg.swap, reg.wr_both); + reg.areg, op, reg.breg, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, - bool zero, bool swap, bool wr_both) + bool zero, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -375,7 +394,9 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, FIELD_PREP(OP_LDF_ZF, zero) | FIELD_PREP(OP_LDF_BMASK, bmask) | FIELD_PREP(OP_LDF_SHF, shift) | - FIELD_PREP(OP_LDF_WR_AB, wr_both); + FIELD_PREP(OP_LDF_WR_AB, wr_both) | + FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } @@ -394,7 +415,8 @@ emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, } __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, - reg.i8, zero, reg.swap, reg.wr_both); + reg.i8, zero, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 4bcab43da16d..1decc638ea6f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -130,6 +130,9 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, reg->breg = nfp_swreg_to_unreg(rreg, false); } + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + return 0; } @@ -207,5 +210,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); } + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index d722f6878bd8..40a51a45afd7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -100,6 +100,7 @@ enum br_ctx_signal_state { #define OP_BB_DEFBR 0x00000300000ULL #define OP_BB_ADDR_LO 0x007ffc00000ULL #define OP_BB_ADDR_HI 0x10000000000ULL +#define OP_BB_SRC_LMEXTN 0x40000000000ULL #define OP_BALU_BASE 0x0e800000000ULL #define OP_BA_A_SRC 0x000000003ffULL @@ -115,6 +116,8 @@ enum br_ctx_signal_state { #define OP_IMMED_SHIFT 0x00600000000ULL #define OP_IMMED_BASE 0x0f000000000ULL #define OP_IMMED_WR_AB 0x20000000000ULL +#define OP_IMMED_SRC_LMEXTN 0x40000000000ULL +#define OP_IMMED_DST_LMEXTN 0x80000000000ULL enum immed_width { IMMED_WIDTH_ALL = 0, @@ -139,6 +142,8 @@ enum immed_shift { #define OP_SHF_OP 0x00e00000000ULL #define OP_SHF_DST_AB 0x01000000000ULL #define OP_SHF_WR_AB 0x20000000000ULL +#define OP_SHF_SRC_LMEXTN 0x40000000000ULL +#define OP_SHF_DST_LMEXTN 0x80000000000ULL enum shf_op { SHF_OP_NONE = 0, @@ -161,6 +166,8 @@ enum shf_sc { #define OP_ALU_DST_AB 0x01000000000ULL #define OP_ALU_BASE 0x0a000000000ULL #define OP_ALU_WR_AB 0x20000000000ULL +#define OP_ALU_SRC_LMEXTN 0x40000000000ULL +#define OP_ALU_DST_LMEXTN 0x80000000000ULL enum alu_op { ALU_OP_NONE = 0x00, @@ -189,6 +196,8 @@ enum alu_dst_ab { #define OP_LDF_BMASK 0x0000f000000ULL #define OP_LDF_SHF 0x001f0000000ULL #define OP_LDF_WR_AB 0x20000000000ULL +#define OP_LDF_SRC_LMEXTN 0x40000000000ULL +#define OP_LDF_DST_LMEXTN 0x80000000000ULL #define OP_CMD_A_SRC 0x000000000ffULL #define OP_CMD_CTX 0x00000000300ULL @@ -231,6 +240,8 @@ enum cmd_ctx_swap { #define OP_LCSR_B_SRC 0x000000ffc00ULL #define OP_LCSR_WRITE 0x00000200000ULL #define OP_LCSR_ADDR 0x001ffc00000ULL +#define OP_LCSR_SRC_LMEXTN 0x40000000000ULL +#define OP_LCSR_DST_LMEXTN 0x80000000000ULL enum lcsr_wr_src { LCSR_WR_AREG, @@ -243,7 +254,9 @@ enum lcsr_wr_src { /* Software register representation, independent of operand type */ #define NN_REG_TYPE GENMASK(31, 24) -#define NN_REG_LM_IDX BIT(22) +#define NN_REG_LM_IDX GENMASK(23, 22) +#define NN_REG_LM_IDX_HI BIT(23) +#define NN_REG_LM_IDX_LO BIT(22) #define NN_REG_LM_MOD GENMASK(21, 20) #define NN_REG_VAL GENMASK(7, 0) @@ -285,7 +298,7 @@ static inline swreg __enc_swreg(u16 id, u8 type) static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off) { - WARN_ON(id > 1 || (off && mode != NN_LM_MOD_NONE)); + WARN_ON(id > 3 || (off && mode != NN_LM_MOD_NONE)); return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) | FIELD_PREP(NN_REG_LM_IDX, id) | @@ -310,7 +323,12 @@ static inline u16 swreg_value(swreg reg) static inline bool swreg_lm_idx(swreg reg) { - return FIELD_GET(NN_REG_LM_IDX, swreg_raw(reg)); + return FIELD_GET(NN_REG_LM_IDX_LO, swreg_raw(reg)); +} + +static inline bool swreg_lmextn(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX_HI, swreg_raw(reg)); } static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg) @@ -324,6 +342,8 @@ struct nfp_insn_ur_regs { u16 areg, breg; bool swap; bool wr_both; + bool dst_lmextn; + bool src_lmextn; }; struct nfp_insn_re_regs { @@ -333,6 +353,8 @@ struct nfp_insn_re_regs { bool swap; bool wr_both; bool i8; + bool dst_lmextn; + bool src_lmextn; }; int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, From 18e53b6cb9ac157f4b2c7db698d4adc064df2fa0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:13 -0700 Subject: [PATCH 0724/2177] nfp: bpf: move to datapath ABI version 2 Datapath ABI version 2 stores the packet information in LMEM instead of NNRs. We also have strict restrictions on which GPRs we can use. Only GPRs 0-23 are reserved for BPF. Adjust the static register locations and "ABI" registers. Note that packet length is packed with other info so we have to extract it into one of the scratch registers, OTOH since LMEM can be used in restricted operands we don't have to extract packet pointer. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 19 ++++++++------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 24 ++++++++++++------- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 2 +- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d7dc19feba8d..9b6c98ccebfe 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -526,22 +526,22 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, emit_alu(nfp_prog, imm_a(nfp_prog), imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); emit_alu(nfp_prog, reg_none(), - NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); /* Load data */ emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + pptr_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); } else { /* Check packet length */ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); emit_alu(nfp_prog, reg_none(), - NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + pptr_reg(nfp_prog), tmp_reg, sz - 1, true); } i = 0; @@ -1024,7 +1024,7 @@ static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, len)) emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + reg_none(), ALU_OP_NONE, plen_reg(nfp_prog)); else return -EOPNOTSUPP; @@ -1039,12 +1039,12 @@ static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.off != offsetof(struct xdp_md, data_end)) return -EOPNOTSUPP; - emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, pptr_reg(nfp_prog)); if (meta->insn.off == offsetof(struct xdp_md, data)) return 0; - emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN); + emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, plen_reg(nfp_prog)); return 0; } @@ -1403,8 +1403,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) static void nfp_intro(struct nfp_prog *nfp_prog) { - emit_alu(nfp_prog, pkt_reg(nfp_prog), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); + wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); } static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 7d959757a51a..b7a112acbdb7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -54,9 +54,13 @@ enum br_special { }; enum static_regs { - STATIC_REG_PKT = 1, -#define REG_PKT_BANK ALU_DST_A - STATIC_REG_IMM = 2, /* Bank AB */ + STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_PKT_LEN = 22, /* Bank B */ +}; + +enum pkt_vec { + PKT_VEC_PKT_LEN = 0, + PKT_VEC_PKT_PTR = 2, }; enum nfp_bpf_action_type { @@ -66,15 +70,17 @@ enum nfp_bpf_action_type { NN_ACT_XDP, }; -#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) -#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) -#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) -#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) +#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) +#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) + +#define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) +#define pptr_reg(np) pv_ctm_ptr(np) +#define imm_a(np) reg_a(STATIC_REG_IMM) +#define imm_b(np) reg_b(STATIC_REG_IMM) +#define imm_both(np) reg_both(STATIC_REG_IMM) #define NFP_BPF_ABI_FLAGS reg_imm(0) #define NFP_BPF_ABI_FLAG_MARK 1 -#define NFP_BPF_ABI_PKT reg_nnr(2) -#define NFP_BPF_ABI_LEN reg_nnr(3) struct nfp_prog; struct nfp_insn_meta; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index b0a452ba9039..782d452e0fc2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -255,7 +255,7 @@ * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ #define NFP_NET_CFG_BPF_ABI 0x0080 -#define NFP_NET_BPF_ABI 1 +#define NFP_NET_BPF_ABI 2 #define NFP_NET_CFG_BPF_CAP 0x0081 #define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ #define NFP_NET_CFG_BPF_MAX_LEN 0x0082 From fd068ddc888355dccd90ad610104e4addf23b7a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:14 -0700 Subject: [PATCH 0725/2177] nfp: bpf: calculate code store ECC In the initial PoC firmware I simply disabled ECC on the instruction store. Do the ECC calculation for generated instructions in the driver. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 20 +++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.c | 37 ++++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 3 ++ 3 files changed, 60 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 9b6c98ccebfe..f4aedc89bfc8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1715,6 +1715,23 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) return 0; } +static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog) +{ + int i; + + for (i = 0; i < nfp_prog->prog_len; i++) { + int err; + + err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); + if (err) + return err; + + nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); + } + + return 0; +} + /** * nfp_bpf_jit() - translate BPF code into NFP assembly * @filter: kernel BPF filter struct @@ -1766,8 +1783,11 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, pr_err("Translation failed with error %d (translated: %u)\n", ret, nfp_prog->n_translated); ret = -EINVAL; + goto out; } + ret = nfp_bpf_ustore_calc(nfp_prog); + res->n_instr = nfp_prog->prog_len; res->dense_mode = false; out: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 1decc638ea6f..de76e7444fc2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -215,3 +215,40 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, return 0; } + +#define NFP_USTORE_ECC_POLY_WORDS 7 +#define NFP_USTORE_OP_BITS 45 + +static const u64 nfp_ustore_ecc_polynomials[NFP_USTORE_ECC_POLY_WORDS] = { + 0x0ff800007fffULL, + 0x11f801ff801fULL, + 0x1e387e0781e1ULL, + 0x17cb8e388e22ULL, + 0x1af5b2c93244ULL, + 0x1f56d5525488ULL, + 0x0daf69a46910ULL, +}; + +static bool parity(u64 value) +{ + return hweight64(value) & 1; +} + +int nfp_ustore_check_valid_no_ecc(u64 insn) +{ + if (insn & ~GENMASK_ULL(NFP_USTORE_OP_BITS, 0)) + return -EINVAL; + + return 0; +} + +u64 nfp_ustore_calc_ecc_insn(u64 insn) +{ + u8 ecc = 0; + int i; + + for (i = 0; i < NFP_USTORE_ECC_POLY_WORDS; i++) + ecc |= parity(nfp_ustore_ecc_polynomials[i] & insn) << i; + + return insn | (u64)ecc << NFP_USTORE_OP_BITS; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 40a51a45afd7..d95087e5fb73 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -362,4 +362,7 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, struct nfp_insn_re_regs *reg, bool has_imm8); +int nfp_ustore_check_valid_no_ecc(u64 insn); +u64 nfp_ustore_calc_ecc_insn(u64 insn); + #endif From 1c03e03f9b5278701d4a0e3444b2de3b9ddc244b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:15 -0700 Subject: [PATCH 0726/2177] nfp: bpf: pad code with valid nops We need to append up to 8 nops after last instruction to make sure the CPU will not fetch garbage instructions with invalid ECC if the code store was not initialized. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 12 +++++++++++- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f4aedc89bfc8..e0600d037773 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -426,6 +426,11 @@ emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); } +static void emit_nop(struct nfp_prog *nfp_prog) +{ + __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); +} + /* --- Wrappers --- */ static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) { @@ -1550,7 +1555,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) static int nfp_translate(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; - int err; + int i, err; nfp_intro(nfp_prog); if (nfp_prog->error) @@ -1582,6 +1587,11 @@ static int nfp_translate(struct nfp_prog *nfp_prog) if (nfp_prog->error) return nfp_prog->error; + for (i = 0; i < NFP_USTORE_PREFETCH_WINDOW; i++) + emit_nop(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + return nfp_fixup_branches(nfp_prog); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index d95087e5fb73..c4c18dd5630a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -362,6 +362,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, struct nfp_insn_re_regs *reg, bool has_imm8); +#define NFP_USTORE_PREFETCH_WINDOW 8 + int nfp_ustore_check_valid_no_ecc(u64 insn); u64 nfp_ustore_calc_ecc_insn(u64 insn); From 2e85d3884f25a0419a941676d1a7c25779884be2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:16 -0700 Subject: [PATCH 0727/2177] nfp: bpf: byte swap the instructions Device expects the instructions in little endian. Make sure we byte swap on big endian hosts. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e0600d037773..f68052367db7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1725,7 +1725,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) return 0; } -static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog) +static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) { int i; @@ -1737,6 +1737,8 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog) return err; nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); + + ustore[i] = cpu_to_le64(nfp_prog->prog[i]); } return 0; @@ -1796,7 +1798,7 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, goto out; } - ret = nfp_bpf_ustore_calc(nfp_prog); + ret = nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)prog_mem); res->n_instr = nfp_prog->prog_len; res->dense_mode = false; From 2de1be1db25d3285f514920230790be20db92887 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 8 Oct 2017 21:04:17 -0700 Subject: [PATCH 0728/2177] nfp: bpf: pass dst register to ld_field instruction ld_field instruction is a bit special because the encoding uses two source registers and one of them becomes the output. We do need to pass the dst register to our encoding helpers though, otherwise the "write both banks" flag will not be observed. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f68052367db7..13148f30fc4c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -408,7 +408,8 @@ emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, struct nfp_insn_re_regs reg; int err; - err = swreg_to_restricted(reg_none(), dst, src, ®, true); + /* Note: ld_field is special as it uses one of the src regs as dst */ + err = swreg_to_restricted(dst, dst, src, ®, true); if (err) { nfp_prog->error = err; return; From 86e23494222f358138e3d2c337f57577b0893797 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 4 Sep 2017 20:40:22 +0200 Subject: [PATCH 0729/2177] ixgbe: add counter for times Rx pages gets allocated, not recycled The ixgbe driver have page recycle scheme based around the RX-ring queue, where a RX page is shared between two packets. Based on the refcnt, the driver can determine if the RX-page is currently only used by a single packet, if so it can then directly refill/recycle the RX-slot by with the opposite "side" of the page. While this is a clever trick, it is hard to determine when this recycling is successful and when it fails. Adding a counter, which is available via ethtool --statistics as 'alloc_rx_page'. Which counts the number of times the recycle fails and the real page allocator is invoked. When interpreting the stats, do remember that every alloc will serve two packets. The counter is collected per rx_ring, but is summed and ethtool exported as 'alloc_rx_page'. It would be relevant to know what rx_ring that cannot keep up, but that can be exported later if someone experience a need for this. Signed-off-by: Jesper Dangaard Brouer Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index dd5578756ae0..008d0085e01f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -275,6 +275,7 @@ struct ixgbe_rx_queue_stats { u64 rsc_count; u64 rsc_flush; u64 non_eop_descs; + u64 alloc_rx_page; u64 alloc_rx_page_failed; u64 alloc_rx_buff_failed; u64 csum_err; @@ -655,6 +656,7 @@ struct ixgbe_adapter { u64 rsc_total_count; u64 rsc_total_flush; u64 non_eop_descs; + u32 alloc_rx_page; u32 alloc_rx_page_failed; u32 alloc_rx_buff_failed; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 6d89f28cae06..de5704c7dd1b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -104,6 +104,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)}, {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)}, {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)}, + {"alloc_rx_page", IXGBE_STAT(alloc_rx_page)}, {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)}, {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)}, {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)}, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d83cc9d34de3..211074934d5b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1620,6 +1620,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->page = page; bi->page_offset = ixgbe_rx_offset(rx_ring); bi->pagecnt_bias = 1; + rx_ring->rx_stats.alloc_rx_page++; return true; } @@ -6794,6 +6795,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; + u64 alloc_rx_page = 0; u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; if (test_bit(__IXGBE_DOWN, &adapter->state) || @@ -6814,6 +6816,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; non_eop_descs += rx_ring->rx_stats.non_eop_descs; + alloc_rx_page += rx_ring->rx_stats.alloc_rx_page; alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; hw_csum_rx_error += rx_ring->rx_stats.csum_err; @@ -6821,6 +6824,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) packets += rx_ring->stats.packets; } adapter->non_eop_descs = non_eop_descs; + adapter->alloc_rx_page = alloc_rx_page; adapter->alloc_rx_page_failed = alloc_rx_page_failed; adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; adapter->hw_csum_rx_error = hw_csum_rx_error; From dcfd6b839c998bc9838e2a47f44f37afbdf3099c Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 11 Sep 2017 14:21:31 -0700 Subject: [PATCH 0730/2177] ixgbe: fix use of uninitialized padding This patch is resolving Coverity hits where padding in a structure could be used uninitialized. - Initialize fwd_cmd.pad/2 before ixgbe_calculate_checksum() - Initialize buffer.pad2/3 before ixgbe_hic_unlocked() Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 2c19070d2a0b..041940c4bb2b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3800,10 +3800,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, fw_cmd.ver_build = build; fw_cmd.ver_sub = sub; fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); fw_cmd.pad = 0; fw_cmd.pad2 = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { ret_val = ixgbe_host_interface_command(hw, &fw_cmd, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 8cea53b62e1b..cb7da5f9c4da 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, /* convert offset from words to bytes */ buffer.address = cpu_to_be32((offset + current_word) * 2); buffer.length = cpu_to_be16(words_to_read * 2); + buffer.pad2 = 0; + buffer.pad3 = 0; status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), IXGBE_HI_COMMAND_TIMEOUT); From c3aec05dfe2cb1bc1fdf3593da82bc987211338a Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 11 Sep 2017 14:21:36 -0700 Subject: [PATCH 0731/2177] ixgbe: fix the FWSM.PT check in ixgbe_mng_present() Bits other than FWSM.PT can be set in IXGBE_SWFW_MODE_MASK making the previous check invalid. Change the check for MNG present to be only based on FWSM.PT bit. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 041940c4bb2b..4e5c92dea869 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -4100,8 +4100,8 @@ bool ixgbe_mng_present(struct ixgbe_hw *hw) return false; fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw)); - fwsm &= IXGBE_FWSM_MODE_MASK; - return fwsm == IXGBE_FWSM_FW_MODE_PT; + + return !!(fwsm & IXGBE_FWSM_FW_MODE_PT); } /** From b4ded8327fea82b53fcec39e0845011246d020f4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Sep 2017 14:55:36 -0700 Subject: [PATCH 0732/2177] ixgbe: Update adaptive ITR algorithm The following change is meant to update the adaptive ITR algorithm to better support the needs of the network. Specifically with this change what I have done is make it so that our ITR algorithm will try to prevent either starving a socket buffer for memory in the case of Tx, or overrunning an Rx socket buffer on receive. In addition a side effect of the calculations used is that we should function better with new features such as XDP which can handle small packets at high rates without needing to lock us into NAPI polling mode. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 7 + drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 11 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 217 +++++++++++++----- 3 files changed, 179 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 008d0085e01f..468c3555a629 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -435,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) } #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define IXGBE_ITR_ADAPTIVE_MIN_INC 2 +#define IXGBE_ITR_ADAPTIVE_MIN_USECS 10 +#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 +#define IXGBE_ITR_ADAPTIVE_LATENCY 0x80 +#define IXGBE_ITR_ADAPTIVE_BULK 0x00 + struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ + unsigned long next_update; /* jiffies value of last update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ u16 work_limit; /* total work allowed per interrupt */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f1bfae0c41d0..8e2a957aca18 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring, ring->next = head->ring; head->ring = ring; head->count++; + head->next_update = jiffies + 1; } /** @@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize work limits */ q_vector->tx.work_limit = adapter->tx_work_limit; - /* initialize pointer to rings */ - ring = q_vector->ring; + /* Initialize setting for adaptive ITR */ + q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; + q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; /* intialize ITR */ if (txr_count && !rxr_count) { @@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, q_vector->itr = adapter->rx_itr_setting; } + /* initialize pointer to rings */ + ring = q_vector->ring; + while (txr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 211074934d5b..5e2686d106db 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2540,50 +2540,174 @@ enum latency_range { static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, struct ixgbe_ring_container *ring_container) { - int bytes = ring_container->total_bytes; - int packets = ring_container->total_packets; - u32 timepassed_us; - u64 bytes_perint; - u8 itr_setting = ring_container->itr; + unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; + unsigned int avg_wire_size, packets, bytes; + unsigned long next_update = jiffies; - if (packets == 0) - return; - - /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (12000 ints/s) + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. */ - /* what was last interrupt timeslice? */ - timepassed_us = q_vector->itr >> 2; - if (timepassed_us == 0) + if (!ring_container->ring) return; - bytes_perint = bytes / timepassed_us; /* bytes/usec */ + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, ring_container->next_update)) + goto clear_counts; - switch (itr_setting) { - case lowest_latency: - if (bytes_perint > 10) - itr_setting = low_latency; + packets = ring_container->total_packets; + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * When this occurs just tick up our delay by the minimum value + * and hope that this extra delay will prevent us from being called + * without any work on our queue. + */ + if (!packets) { + itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; + if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) + itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; + itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY; + goto clear_counts; + } + + bytes = ring_container->total_bytes; + + /* If packets are less than 4 or bytes are less than 9000 assume + * insufficient data to use bulk rate limiting approach. We are + * likely latency driven. + */ + if (packets < 4 && bytes < 9000) { + itr = IXGBE_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + + /* Between 4 and 48 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 48) { + itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; + if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) + itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; + goto clear_counts; + } + + /* Between 48 and 96 is our "goldilocks" zone where we are working + * out "just right". Just report that our current ITR is good for us. + */ + if (packets < 96) { + itr = q_vector->itr >> 2; + goto clear_counts; + } + + /* If packet count is 96 or greater we are likely looking at a slight + * overrun of the delay we want. Try halving our delay to see if that + * will cut the number of packets in half per interrupt. + */ + if (packets < 256) { + itr = q_vector->itr >> 3; + if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS) + itr = IXGBE_ITR_ADAPTIVE_MIN_USECS; + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since number + * of packets is 256 or greater. We are just going to have to compute + * a value and try to bring the count under control, though for smaller + * packet sizes there isn't much we can do as NAPI polling will likely + * be kicking in sooner rather than later. + */ + itr = IXGBE_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 50k ints/sec */ + avg_wire_size = 5120; + } else if (avg_wire_size <= 316) { + /* 50K ints/sec to 16K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 2720; + } else if (avg_wire_size <= 1084) { + /* 16K ints/sec to 9.2K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 9.2K ints/sec to 8K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 8K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode half our delay which doubles the rate + * to somewhere between 100K to 16K ints/sec + */ + if (itr & IXGBE_ITR_ADAPTIVE_LATENCY) + avg_wire_size >>= 1; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + switch (q_vector->adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + case IXGBE_LINK_SPEED_100_FULL: + default: + itr += DIV_ROUND_UP(avg_wire_size, + IXGBE_ITR_ADAPTIVE_MIN_INC * 256) * + IXGBE_ITR_ADAPTIVE_MIN_INC; break; - case low_latency: - if (bytes_perint > 20) - itr_setting = bulk_latency; - else if (bytes_perint <= 10) - itr_setting = lowest_latency; - break; - case bulk_latency: - if (bytes_perint <= 20) - itr_setting = low_latency; + case IXGBE_LINK_SPEED_2_5GB_FULL: + case IXGBE_LINK_SPEED_1GB_FULL: + case IXGBE_LINK_SPEED_10_FULL: + itr += DIV_ROUND_UP(avg_wire_size, + IXGBE_ITR_ADAPTIVE_MIN_INC * 64) * + IXGBE_ITR_ADAPTIVE_MIN_INC; break; } - /* clear work counters since we have the values we need */ +clear_counts: + /* write back value */ + ring_container->itr = itr; + + /* next update should occur within next jiffy */ + ring_container->next_update = next_update + 1; + ring_container->total_bytes = 0; ring_container->total_packets = 0; - - /* write updated itr to ring container */ - ring_container->itr = itr_setting; } /** @@ -2625,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) { - u32 new_itr = q_vector->itr; - u8 current_itr; + u32 new_itr; ixgbe_update_itr(q_vector, &q_vector->tx); ixgbe_update_itr(q_vector, &q_vector->rx); - current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + /* use the smallest value of new ITR delay calculations */ + new_itr = min(q_vector->rx.itr, q_vector->tx.itr); - switch (current_itr) { - /* counts and packets in update_itr are dependent on these numbers */ - case lowest_latency: - new_itr = IXGBE_100K_ITR; - break; - case low_latency: - new_itr = IXGBE_20K_ITR; - break; - case bulk_latency: - new_itr = IXGBE_12K_ITR; - break; - default: - break; - } + /* Clear latency flag if set, shift into correct position */ + new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY; + new_itr <<= 2; if (new_itr != q_vector->itr) { - /* do an exponential smoothing */ - new_itr = (10 * new_itr * q_vector->itr) / - ((9 * new_itr) + q_vector->itr); - /* save the algorithm value here */ q_vector->itr = new_itr; From b64666ae00327efe53613fc180df6ffbeee1d8d1 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 29 Sep 2017 10:55:42 -0700 Subject: [PATCH 0733/2177] ixgbe: fix crash when injecting AER after failed reset In case where AER recovery fails the device is left in a down state. Consecutive AER error injection can lead to a double IRQ free. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5e2686d106db..c6f9da7990c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10861,6 +10861,9 @@ skip_bad_vf_detection: if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state)) return PCI_ERS_RESULT_DISCONNECT; + if (!netif_device_present(netdev)) + return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); netif_device_detach(netdev); From 77041420751fe6d4acf2103b245dcc2b4b7b8360 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:31 +0200 Subject: [PATCH 0734/2177] net: bridge: Notify on bridge device mrouter state changes Add the SWITCHDEV_ATTR_ID_BRIDGE_MROUTER switchdev notification type, used to indicate whether the bridge is or isn't mrouter. Notify when the bridge changes its state, similarly to the already existing bridged port mrouter notifications. The notification uses the switchdev_attr.u.mrouter boolean flag to indicate the current bridge mrouter status. Thus, it only indicates whether the bridge is currently used as an mrouter or not, and does not indicate the exact mrouter state of the bridge (learning, permanent, etc.). Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + net/bridge/br_multicast.c | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d767b7991887..d756fbe46625 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -51,6 +51,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, }; struct switchdev_attr { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8dc5c8d69bcd..bd50550dd4ca 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -859,8 +859,32 @@ out: spin_unlock(&br->multicast_lock); } +static void br_mc_router_state_change(struct net_bridge *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + static void br_multicast_local_router_expired(unsigned long data) { + struct net_bridge *br = (struct net_bridge *)data; + + spin_lock(&br->multicast_lock); + if (br->multicast_router == MDB_RTR_TYPE_DISABLED || + br->multicast_router == MDB_RTR_TYPE_PERM || + timer_pending(&br->multicast_router_timer)) + goto out; + + br_mc_router_state_change(br, false); +out: + spin_unlock(&br->multicast_lock); } static void br_multicast_querier_expired(struct net_bridge *br, @@ -1364,9 +1388,12 @@ static void br_multicast_mark_router(struct net_bridge *br, unsigned long now = jiffies; if (!port) { - if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) + if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { + if (!timer_pending(&br->multicast_router_timer)) + br_mc_router_state_change(br, true); mod_timer(&br->multicast_router_timer, now + br->multicast_querier_interval); + } return; } @@ -1952,7 +1979,7 @@ void br_multicast_init(struct net_bridge *br) spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, - br_multicast_local_router_expired, 0); + br_multicast_local_router_expired, (unsigned long)br); setup_timer(&br->ip4_other_query.timer, br_ip4_multicast_querier_expired, (unsigned long)br); setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, @@ -2042,9 +2069,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) switch (val) { case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: + br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); del_timer(&br->multicast_router_timer); - /* fall through */ + br->multicast_router = val; + err = 0; + break; case MDB_RTR_TYPE_TEMP_QUERY: + if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) + br_mc_router_state_change(br, false); br->multicast_router = val; err = 0; break; From 0912bda436388a02c72164b4b490b578e64c012e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:32 +0200 Subject: [PATCH 0735/2177] net: bridge: Export bridge multicast router state Add an access function that, given a bridge netdevice, returns whether the bridge device is currently an mrouter or not. The function uses the already existing br_multicast_is_router function to check that. This function is needed in order to allow ports that join an already existing bridge to know the current mrouter state of the bridge device. Together with the bridge device mrouter ports switchdev notifications, it is possible to have full offloading of the semantics of the bridge device mcast router state. Due to the fact that the bridge multicast router status can change in packet RX path, take the multicast_router bridge spinlock to protect the read. Signed-off-by: Yotam Gigi Reviewed-by: Nogah Frankel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 5 +++++ net/bridge/br_multicast.c | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 316ee113a220..02639ebea2f0 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -64,6 +64,7 @@ int br_multicast_list_adjacent(struct net_device *dev, bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto); bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); +bool br_multicast_router(const struct net_device *dev); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -84,6 +85,10 @@ static inline bool br_multicast_enabled(const struct net_device *dev) { return false; } +static inline bool br_multicast_router(const struct net_device *dev) +{ + return false; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index bd50550dd4ca..7947e0436e18 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2216,6 +2216,18 @@ bool br_multicast_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_multicast_enabled); +bool br_multicast_router(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + bool is_router; + + spin_lock_bh(&br->multicast_lock); + is_router = br_multicast_is_router(br); + spin_unlock_bh(&br->multicast_lock); + return is_router; +} +EXPORT_SYMBOL_GPL(br_multicast_router); + int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { unsigned long max_delay; From b35750f19102271485af9092b800863164dd4be0 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:33 +0200 Subject: [PATCH 0736/2177] mlxsw: spectrum: router: Export the mlxsw_sp_router_port function In Spectrum hardware, the router port is a virtual port that is the gateway to the routing mechanism. Hence, in order for a packet to be L3 forwarded, it must first be L2 forwarded to the router port inside the hardware. Further patches in this patchset are going to introduce support in bridge device used as an mrouter port. In this case, the router port index will be needed in order to update the MDB entries to include the router port. Thus, export the mlxsw_sp_router_port function, which returns the index of the Spectrum router port. Signed-off-by: Yotam Gigi Reviewed-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e0f8ea4ed7af..a072903f2c4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5957,7 +5957,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) { return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3d449180b035..3f2d840cb285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -70,6 +70,7 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, From c4db953f00f09003519ac1fb078f3b5f57b32e3c Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:34 +0200 Subject: [PATCH 0737/2177] mlxsw: spectrum_switchdev: Add support for router port in SMID entries In Spectrum, MDB entries point to MID entries, that indicate which ports a packet should be forwarded to. Add the support in creating MID entries that forward the packet to the Spectrum router port. This will be later used to handle the bridge mrouter port switchdev notifications. Signed-off-by: Yotam Gigi Reviewed-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2cfdf22a145f..4b4584f8522a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -49,6 +49,7 @@ #include #include +#include "spectrum_router.h" #include "spectrum.h" #include "core.h" #include "reg.h" @@ -1243,7 +1244,8 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, } static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, - long *ports_bitmap) + long *ports_bitmap, + bool set_router_port) { char *smid_pl; int err, i; @@ -1258,9 +1260,15 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + mlxsw_reg_smid_port_mask_set(smid_pl, + mlxsw_sp_router_port(mlxsw_sp), 1); + for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core)) mlxsw_reg_smid_port_set(smid_pl, i, 1); + mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp), + set_router_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1364,7 +1372,8 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, + false); kfree(flood_bitmap); if (err) return false; From 593bc28ae211b864e7e0720ffc65a14cc5cff101 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:35 +0200 Subject: [PATCH 0738/2177] mlxsw: spectrum_switchdev: Support bridge mrouter notifications Support the SWITCHDEV_ATTR_ID_BRIDGE_MROUTER port attribute switchdev notification. To do that, add the mrouter flag to struct mlxsw_sp_bridge_device, which indicates whether the bridge device was set to be mrouter port. This field is set when: - A new bridge is created, where the value is taken from the kernel bridge value. - A switchdev SWITCHDEV_ATTR_ID_BRIDGE_MROUTER notification is sent. In addition, change the bridge MID entries to include the router port when the bridge device is configured to be mrouter port. The MID entries are updated in the following cases: - When a new MID entry is created, update the router port according to the bridge mrouter state. - When a SWITCHDEV_ATTR_ID_BRIDGE_MROUTER notification is sent, update all the bridge's MID entries. This is aligned with the case where a bridge slave is configured to be mrouter port. Signed-off-by: Yotam Gigi Reviewed-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 65 ++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 4b4584f8522a..7b8548e25ae7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -80,7 +80,8 @@ struct mlxsw_sp_bridge_device { struct list_head ports_list; struct list_head mids_list; u8 vlan_enabled:1, - multicast_enabled:1; + multicast_enabled:1, + mrouter:1; const struct mlxsw_sp_bridge_ops *ops; }; @@ -171,6 +172,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, bridge_device->dev = br_dev; bridge_device->vlan_enabled = vlan_enabled; bridge_device->multicast_enabled = br_multicast_enabled(br_dev); + bridge_device->mrouter = br_multicast_router(br_dev); INIT_LIST_HEAD(&bridge_device->ports_list); if (vlan_enabled) { bridge->vlan_enabled_exists = true; @@ -813,6 +815,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp, + u16 mid_idx, bool add) +{ + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, + mlxsw_sp_router_port(mlxsw_sp), add); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static void +mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool add) +{ + struct mlxsw_sp_mid *mid; + + list_for_each_entry(mid, &bridge_device->mids_list, list) + mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add); +} + +static int +mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_mrouter) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + if (bridge_device->mrouter != is_mrouter) + mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device, + is_mrouter); + bridge_device->mrouter = is_mrouter; + return 0; +} + static int mlxsw_sp_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -850,6 +906,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->orig_dev, attr->u.mc_disabled); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); + break; default: err = -EOPNOTSUPP; break; @@ -1373,7 +1434,7 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, mid->mid = mid_idx; err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, - false); + bridge_device->mrouter); kfree(flood_bitmap); if (err) return false; From f5823fe6897c444265ef3919d8684b647eef904f Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:43 +0300 Subject: [PATCH 0739/2177] qed: Add ll2 option to limit the number of bds per packet iWARP uses 3 ll2 connections, the maximum number of bds is known during connection setup. This patch modifies the static array in the ll2_tx_packet descriptor to be a flexible array and significantlly reduces memory size. In addition, some redundant fields in the ll2_tx_packet were removed, which also contributed to decreasing the descriptor size. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 29 ++++++++++++++++------- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 9 +++---- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 250afa5486cf..75af40a7690a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1105,6 +1105,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_info) { struct qed_ll2_tx_packet *p_descq; + u32 desc_size; u32 capacity; int rc = 0; @@ -1122,13 +1123,17 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, goto out; capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); - p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet), - GFP_KERNEL); + /* First element is part of the packet, rest are flexibly added */ + desc_size = (sizeof(*p_descq) + + (p_ll2_info->input.tx_max_bds_per_packet - 1) * + sizeof(p_descq->bds_set)); + + p_descq = kcalloc(capacity, desc_size, GFP_KERNEL); if (!p_descq) { rc = -ENOMEM; goto out; } - p_ll2_info->tx_queue.descq_array = p_descq; + p_ll2_info->tx_queue.descq_mem = p_descq; DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", @@ -1359,11 +1364,13 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) { struct qed_hwfn *p_hwfn = cxt; struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_packet *p_pkt; struct qed_ll2_rx_queue *p_rx; struct qed_ll2_tx_queue *p_tx; struct qed_ptt *p_ptt; int rc = -EINVAL; u32 i, capacity; + u32 desc_size; u8 qid; p_ptt = qed_ptt_acquire(p_hwfn); @@ -1397,9 +1404,15 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) INIT_LIST_HEAD(&p_tx->sending_descq); spin_lock_init(&p_tx->lock); capacity = qed_chain_get_capacity(&p_tx->txq_chain); - for (i = 0; i < capacity; i++) - list_add_tail(&p_tx->descq_array[i].list_entry, - &p_tx->free_descq); + /* First element is part of the packet, rest are flexibly added */ + desc_size = (sizeof(*p_pkt) + + (p_ll2_conn->input.tx_max_bds_per_packet - 1) * + sizeof(p_pkt->bds_set)); + + for (i = 0; i < capacity; i++) { + p_pkt = p_tx->descq_mem + desc_size * i; + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + } p_tx->cur_completing_bd_idx = 0; p_tx->bds_idx = 0; p_tx->b_completing_packet = false; @@ -1698,7 +1711,7 @@ int qed_ll2_prepare_tx_packet(void *cxt, p_tx = &p_ll2_conn->tx_queue; p_tx_chain = &p_tx->txq_chain; - if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET) + if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet) return -EIO; spin_lock_irqsave(&p_tx->lock, flags); @@ -1858,7 +1871,7 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle) qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); } - kfree(p_ll2_conn->tx_queue.descq_array); + kfree(p_ll2_conn->tx_queue.descq_mem); qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); kfree(p_ll2_conn->rx_queue.descq_array); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index a822528e9c63..9bdd08f15c79 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -63,17 +63,14 @@ struct qed_ll2_rx_packet { struct qed_ll2_tx_packet { struct list_head list_entry; u16 bd_used; - u16 vlan; - u16 l4_hdr_offset_w; - u8 bd_flags; bool notify_fw; void *cookie; - + /* Flexible Array of bds_set determined by max_bds_per_packet */ struct { struct core_tx_bd *txq_bd; dma_addr_t tx_frag; u16 frag_len; - } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET]; + } bds_set[1]; }; struct qed_ll2_rx_queue { @@ -101,7 +98,7 @@ struct qed_ll2_tx_queue { struct list_head active_descq; struct list_head free_descq; struct list_head sending_descq; - struct qed_ll2_tx_packet *descq_array; + void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/ struct qed_ll2_tx_packet *cur_send_packet; struct qed_ll2_tx_packet cur_completing_packet; u16 cur_completing_bd_idx; From ed468ebee04ffba0231a8f50616bdb250752a891 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:44 +0300 Subject: [PATCH 0740/2177] qed: Add ll2 ability of opening a secondary queue When more than one ll2 queue is opened ( that is not an OOO queue ) ll2 code does not have enough information to determine whether the queue is the main one or not, so a new field is added to the acquire input data to expose the control of determining whether the queue is the main queue or a secondary queue. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 ++++++- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 1 + include/linux/qed/qed_ll2_if.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 75af40a7690a..3c695da890df 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -894,7 +894,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg; p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en; p_ramrod->queue_id = p_ll2_conn->queue_id; - p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1; + p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0; if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) && @@ -1265,6 +1265,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data) p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW : CORE_TX_DEST_LB; + if (data->input.conn_type == QED_LL2_TYPE_OOO || + data->input.secondary_queue) + p_ll2_info->main_func_queue = false; + else + p_ll2_info->main_func_queue = true; /* Correct maximum number of Tx BDs */ p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 9bdd08f15c79..f65817012e97 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -121,6 +121,7 @@ struct qed_ll2_info { bool b_active; enum core_tx_dest tx_dest; u8 tx_stats_en; + bool main_func_queue; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; struct qed_ll2_cbs cbs; diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 89fa0bbd54f3..d7cca590b743 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -171,6 +171,7 @@ struct qed_ll2_acquire_data_inputs { enum qed_ll2_tx_dest tx_dest; enum qed_ll2_error_handle ai_err_packet_too_big; enum qed_ll2_error_handle ai_err_no_buf; + bool secondary_queue; u8 gsi_enable; }; From 77caa792f5d8e4ecc88eb1cf4b9c478c07e0ec57 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:45 +0300 Subject: [PATCH 0741/2177] qed: Add ll2 option for dropping a tx packet The option of sending a packet on the ll2 and dropping it exists in hardware and was not used until now, thus not exposed. The iWARP unaligned MPA flow requires this functionality for flushing the tx queue. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 16 ++++++++++++++-- include/linux/qed/qed_ll2_if.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 3c695da890df..ad67d36956e8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1597,8 +1597,20 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE : CORE_RROCE; - tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW - : CORE_TX_DEST_LB; + switch (pkt->tx_dest) { + case QED_LL2_TX_DEST_NW: + tx_dest = CORE_TX_DEST_NW; + break; + case QED_LL2_TX_DEST_LB: + tx_dest = CORE_TX_DEST_LB; + break; + case QED_LL2_TX_DEST_DROP: + tx_dest = CORE_TX_DEST_DROP; + break; + default: + tx_dest = CORE_TX_DEST_LB; + break; + } start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan); diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index d7cca590b743..95fdf02a3bbe 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -64,6 +64,7 @@ enum qed_ll2_roce_flavor_type { enum qed_ll2_tx_dest { QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */ QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */ + QED_LL2_TX_DEST_DROP, /* Light L2 Drop the TX packet */ QED_LL2_TX_DEST_MAX }; From 6df60fe703c348a507b0030b92c2947e68e1c589 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:46 +0300 Subject: [PATCH 0742/2177] qed: Fix initialization of ll2 offload feature enable_ip_cksum, enable_l4_cksum, calc_ip_len were added in commit stated below but not passed through to FW. This was OK until now as it wasn't used, but is required for the iWARP unaligned flow Fixes:7c7973b2ae27 ("qed: LL2 to use packed information for tx") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ad67d36956e8..6d144747111a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1621,6 +1621,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1); SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds); SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len)); start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data); DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag); start_bd->nbytes = cpu_to_le16(pkt->first_frag_len); From 89d65113097072de7936a2aea2f819818a7c987a Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:47 +0300 Subject: [PATCH 0743/2177] qed: Add the source of a packet sent on an iWARP ll2 connection When a packet is sent back to iWARP FW via the tx ll2 connection the FW needs to know the source of the packet. Whether it is OOO or unaligned MPA related. Since OOO is implemented entirely inside the ll2 code (and shared with iSCSI), packets are marked as IN_ORDER inside the ll2 code. For unaligned mpa the value will be determined in the iWARP code and sent on the pkt->vlan field. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 6d144747111a..8eb9645c880d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1613,7 +1613,12 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, } start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); - start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan); + if (QED_IS_IWARP_PERSONALITY(p_hwfn) && + p_ll2->input.conn_type == QED_LL2_TYPE_OOO) + start_bd->nw_vlan_or_lb_echo = + cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE); + else + start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan); SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, cpu_to_le16(pkt->l4_hdr_offset_w)); SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); From 6f34a284f36399501fcc034dc4522a2d8d9fa6c9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:48 +0300 Subject: [PATCH 0744/2177] qed: Add LL2 slowpath handling For iWARP unaligned MPA flow, a slowpath event of flushing an MPA connection that entered an unaligned state is required. The flush ramrod is received on the ll2 queue, and a pre-registered callback function is called to handle the flush event. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 40 +++++++++++++++++++++-- include/linux/qed/qed_ll2_if.h | 5 +++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 8eb9645c880d..047f556ca62e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -422,6 +422,41 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn, data->u.placement_offset = p_cqe->rx_cqe_fp.placement_offset; } +static int +qed_ll2_handle_slowpath(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_slow_path_cqe *sp_cqe; + + sp_cqe = &p_cqe->rx_cqe_sp; + if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) { + DP_NOTICE(p_hwfn, + "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n", + sp_cqe->ramrod_cmd_id); + return -EINVAL; + } + + if (!p_ll2_conn->cbs.slowpath_cb) { + DP_NOTICE(p_hwfn, + "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n"); + return -EINVAL; + } + + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); + + p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + le32_to_cpu(sp_cqe->opaque_data.data[0]), + le32_to_cpu(sp_cqe->opaque_data.data[1])); + + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); + + return 0; +} + static int qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, @@ -495,8 +530,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) switch (cqe->rx_cqe_sp.type) { case CORE_RX_CQE_TYPE_SLOW_PATH: - DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); - rc = -EINVAL; + rc = qed_ll2_handle_slowpath(p_hwfn, p_ll2_conn, + cqe, &flags); break; case CORE_RX_CQE_TYPE_GSI_OFFLOAD: case CORE_RX_CQE_TYPE_REGULAR: @@ -1214,6 +1249,7 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs) p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb; p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb; p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb; + p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb; p_ll2_info->cbs.cookie = cbs->cookie; return 0; diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 95fdf02a3bbe..e755954d85fd 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -151,11 +151,16 @@ void (*qed_ll2_release_tx_packet_cb)(void *cxt, dma_addr_t first_frag_addr, bool b_last_fragment, bool b_last_packet); +typedef +void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle, + u32 opaque_data_0, u32 opaque_data_1); + struct qed_ll2_cbs { qed_ll2_complete_rx_packet_cb rx_comp_cb; qed_ll2_release_rx_packet_cb rx_release_cb; qed_ll2_complete_tx_packet_cb tx_comp_cb; qed_ll2_release_tx_packet_cb tx_release_cb; + qed_ll2_slowpath_cb slowpath_cb; void *cookie; }; From ae3488ff37dc4f21985111f442d26a8805e56d45 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:49 +0300 Subject: [PATCH 0745/2177] qed: Add ll2 connection for processing unaligned MPA packets This patch adds only the establishment and termination of the ll2 connection that handles unaligned MPA packets. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 65 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 1 + 2 files changed, 66 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 8fc9c811f6e3..f413621a67b0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1713,6 +1713,19 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, return 0; } +/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ +#define QED_IWARP_MAX_BDS_PER_FPDU 3 +static void +qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) +{ + struct qed_iwarp_info *iwarp_info; + struct qed_hwfn *p_hwfn = cxt; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + qed_iwarp_ll2_post_rx(p_hwfn, data->cookie, + iwarp_info->ll2_mpa_handle); +} + static void qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) { @@ -1877,6 +1890,13 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, kfree(buffer); } +void +qed_iwarp_ll2_slowpath(void *cxt, + u8 connection_handle, + u32 opaque_data_0, u32 opaque_data_1) +{ +} + static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; @@ -1902,6 +1922,16 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; } + if (iwarp_info->ll2_mpa_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate mpa connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL; + } + qed_llh_remove_mac_filter(p_hwfn, p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); return rc; @@ -1953,12 +1983,14 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; + u32 mpa_buff_size; u16 n_ooo_bufs; int rc = 0; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; + iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL; iwarp_info->max_mtu = params->max_mtu; @@ -2029,6 +2061,39 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, if (rc) goto err; + /* Start Unaligned MPA connection */ + cbs.rx_comp_cb = qed_iwarp_ll2_comp_mpa_pkt; + cbs.slowpath_cb = qed_iwarp_ll2_slowpath; + + memset(&data, 0, sizeof(data)); + data.input.conn_type = QED_LL2_TYPE_IWARP; + data.input.mtu = params->max_mtu; + /* FW requires that once a packet arrives OOO, it must have at + * least 2 rx buffers available on the unaligned connection + * for handling the case that it is a partial fpdu. + */ + data.input.rx_num_desc = n_ooo_bufs * 2; + data.input.tx_num_desc = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU; + data.p_connection_handle = &iwarp_info->ll2_mpa_handle; + data.input.secondary_queue = true; + data.cbs = &cbs; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu); + rc = qed_iwarp_ll2_alloc_buffers(p_hwfn, + data.input.rx_num_desc, + mpa_buff_size, + iwarp_info->ll2_mpa_handle); + if (rc) + goto err; return rc; err: qed_iwarp_ll2_stop(p_hwfn, p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 9e2bfde894df..9d33a1fa1758 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -73,6 +73,7 @@ struct qed_iwarp_info { u8 tcp_flags; u8 ll2_syn_handle; u8 ll2_ooo_handle; + u8 ll2_mpa_handle; u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; From fcb39f6c10b24d2d16d4c2bdb4c256bc21b8a131 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:50 +0300 Subject: [PATCH 0746/2177] qed: Add mpa buffer descriptors for storing and processing mpa fpdus The mpa buff is a descriptor for iwarp ll2 buffers that contains additional information required for aligining fpdu's. In some cases, an additional packet will arrive which will complete the alignment of a fpdu, but we won't be able to post the fpdu due to insufficient place on the tx ring. In this case we can't loose the data and require storing it for later. Processing is therefore done in two places, during rx completion, where we initialize a mpa buffer descriptor and add it to the pending list, and during tx-completion, since we free up an entry in the tx chain we can process any pending mpa packets. The mpa buff descriptors are pre-allocated since we have to ensure that we won't reach a state where we can't store an incoming unaligned packet. All packets received on the ll2 MUST be processed by the driver at some stage. Since they are preallocated, we hold a free list. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 116 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 11 ++ 2 files changed, 127 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index f413621a67b0..efd4861c72e2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1415,7 +1415,10 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn) void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) { + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); + kfree(iwarp_info->mpa_bufs); } int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams) @@ -1715,13 +1718,104 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ #define QED_IWARP_MAX_BDS_PER_FPDU 3 +static void +qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn, + struct unaligned_opaque_data *curr_pkt, + u32 opaque_data0, u32 opaque_data1) +{ + u64 opaque_data; + + opaque_data = HILO_64(opaque_data1, opaque_data0); + *curr_pkt = *((struct unaligned_opaque_data *)&opaque_data); + + curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset + + le16_to_cpu(curr_pkt->first_mpa_offset); + curr_pkt->cid = le32_to_cpu(curr_pkt->cid); +} + +/* This function is called when an unaligned or incomplete MPA packet arrives + * driver needs to align the packet, perhaps using previous data and send + * it down to FW once it is aligned. + */ +static int +qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ll2_mpa_buf *mpa_buf) +{ + struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf; + int rc = -EINVAL; + + qed_iwarp_ll2_post_rx(p_hwfn, + buf, + p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle); + return rc; +} + +static void qed_iwarp_process_pending_pkts(struct qed_hwfn *p_hwfn) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct qed_iwarp_ll2_mpa_buf *mpa_buf = NULL; + int rc; + + while (!list_empty(&iwarp_info->mpa_buf_pending_list)) { + mpa_buf = list_first_entry(&iwarp_info->mpa_buf_pending_list, + struct qed_iwarp_ll2_mpa_buf, + list_entry); + + rc = qed_iwarp_process_mpa_pkt(p_hwfn, mpa_buf); + + /* busy means break and continue processing later, don't + * remove the buf from the pending list. + */ + if (rc == -EBUSY) + break; + + list_del(&mpa_buf->list_entry); + list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list); + + if (rc) { /* different error, don't continue */ + DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc); + break; + } + } +} + static void qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) { + struct qed_iwarp_ll2_mpa_buf *mpa_buf; struct qed_iwarp_info *iwarp_info; struct qed_hwfn *p_hwfn = cxt; iwarp_info = &p_hwfn->p_rdma_info->iwarp; + mpa_buf = list_first_entry(&iwarp_info->mpa_buf_list, + struct qed_iwarp_ll2_mpa_buf, list_entry); + if (!mpa_buf) { + DP_ERR(p_hwfn, "No free mpa buf\n"); + goto err; + } + + list_del(&mpa_buf->list_entry); + qed_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data, + data->opaque_data_0, data->opaque_data_1); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "LL2 MPA CompRx payload_len:0x%x\tfirst_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n", + data->length.packet_length, mpa_buf->data.first_mpa_offset, + mpa_buf->data.tcp_payload_offset, mpa_buf->data.flags, + mpa_buf->data.cid); + + mpa_buf->ll2_buf = data->cookie; + mpa_buf->tcp_payload_len = data->length.packet_length - + mpa_buf->data.first_mpa_offset; + mpa_buf->data.first_mpa_offset += data->u.placement_offset; + mpa_buf->placement_offset = data->u.placement_offset; + + list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_pending_list); + + qed_iwarp_process_pending_pkts(p_hwfn); + return; +err: qed_iwarp_ll2_post_rx(p_hwfn, data->cookie, iwarp_info->ll2_mpa_handle); } @@ -1872,6 +1966,11 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle, /* this was originally an rx packet, post it back */ qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle); + + if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle) + qed_iwarp_process_pending_pkts(p_hwfn); + + return; } static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, @@ -1986,6 +2085,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, u32 mpa_buff_size; u16 n_ooo_bufs; int rc = 0; + int i; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; @@ -2094,6 +2194,22 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->ll2_mpa_handle); if (rc) goto err; + /* The mpa_bufs array serves for pending RX packets received on the + * mpa ll2 that don't have place on the tx ring and require later + * processing. We can't fail on allocation of such a struct therefore + * we allocate enough to take care of all rx packets + */ + iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc, + sizeof(*iwarp_info->mpa_bufs), + GFP_KERNEL); + if (!iwarp_info->mpa_bufs) + goto err; + + INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list); + INIT_LIST_HEAD(&iwarp_info->mpa_buf_list); + for (i = 0; i < data.input.rx_num_desc; i++) + list_add_tail(&iwarp_info->mpa_bufs[i].list_entry, + &iwarp_info->mpa_buf_list); return rc; err: qed_iwarp_ll2_stop(p_hwfn, p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 9d33a1fa1758..2c53fe46345c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -60,10 +60,20 @@ struct qed_iwarp_ll2_buff { u32 buff_size; }; +struct qed_iwarp_ll2_mpa_buf { + struct list_head list_entry; + struct qed_iwarp_ll2_buff *ll2_buf; + struct unaligned_opaque_data data; + u16 tcp_payload_len; + u8 placement_offset; +}; + struct qed_iwarp_info { struct list_head listen_list; /* qed_iwarp_listener */ struct list_head ep_list; /* qed_iwarp_ep */ struct list_head ep_free_list; /* pre-allocated ep's */ + struct list_head mpa_buf_list; /* list of mpa_bufs */ + struct list_head mpa_buf_pending_list; spinlock_t iw_lock; /* for iwarp resources */ spinlock_t qp_lock; /* for teardown races */ u32 rcv_wnd_scale; @@ -77,6 +87,7 @@ struct qed_iwarp_info { u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; + struct qed_iwarp_ll2_mpa_buf *mpa_bufs; }; enum qed_iwarp_ep_state { From 469981b17a4f8ddac91837bd74ebc98578f2ddbf Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:51 +0300 Subject: [PATCH 0747/2177] qed: Add unaligned and packed packet processing The fpdu data structure is preallocated per connection. Each connection stores the current status of the connection: either nothing pending, or there is a partial fpdu that is waiting for the rest of the fpdu (incomplete bytes != 0). The same structure is also used for splitting a packet when there are packed fpdus. The structure is initialized with all data required for sending the fpdu back to the FW. A fpdu will always be spanned across a maximum of 3 tx bds. One for the header, one for the partial fdpu received and one for the remainder (unaligned) packet. In case of packed fpdu's, two fragments are used, one for the header and one for the data. Corner cases are not handled in the patch for clarity, and will be added as a separate patch. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 257 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 13 + 2 files changed, 270 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index efd4861c72e2..83b147fdacde 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1419,6 +1419,7 @@ void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); kfree(iwarp_info->mpa_bufs); + kfree(iwarp_info->partial_fpdus); } int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams) @@ -1716,8 +1717,170 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, return 0; } +static struct qed_iwarp_fpdu *qed_iwarp_get_curr_fpdu(struct qed_hwfn *p_hwfn, + u16 cid) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct qed_iwarp_fpdu *partial_fpdu; + u32 idx; + + idx = cid - qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP); + if (idx >= iwarp_info->max_num_partial_fpdus) { + DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid, + iwarp_info->max_num_partial_fpdus); + return NULL; + } + + partial_fpdu = &iwarp_info->partial_fpdus[idx]; + + return partial_fpdu; +} + +enum qed_iwarp_mpa_pkt_type { + QED_IWARP_MPA_PKT_PACKED, + QED_IWARP_MPA_PKT_PARTIAL, + QED_IWARP_MPA_PKT_UNALIGNED +}; + +#define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2) +#define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4) + +/* Pad to multiple of 4 */ +#define QED_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) ALIGN(data_len, 4) +#define QED_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \ + (QED_IWARP_PDU_DATA_LEN_WITH_PAD((_mpa_len) + \ + QED_IWARP_MPA_FPDU_LENGTH_SIZE) + \ + QED_IWARP_MPA_CRC32_DIGEST_SIZE) + /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ #define QED_IWARP_MAX_BDS_PER_FPDU 3 + +char *pkt_type_str[] = { + "QED_IWARP_MPA_PKT_PACKED", + "QED_IWARP_MPA_PKT_PARTIAL", + "QED_IWARP_MPA_PKT_UNALIGNED" +}; + +static enum qed_iwarp_mpa_pkt_type +qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + u16 tcp_payload_len, u8 *mpa_data) +{ + enum qed_iwarp_mpa_pkt_type pkt_type; + u16 mpa_len; + + if (fpdu->incomplete_bytes) { + pkt_type = QED_IWARP_MPA_PKT_UNALIGNED; + goto out; + } + + mpa_len = ntohs(*((u16 *)(mpa_data))); + fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + + if (fpdu->fpdu_length <= tcp_payload_len) + pkt_type = QED_IWARP_MPA_PKT_PACKED; + else + pkt_type = QED_IWARP_MPA_PKT_PARTIAL; + +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n", + pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len); + + return pkt_type; +} + +static void +qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + u16 tcp_payload_size, u8 placement_offset) +{ + fpdu->mpa_buf = buf; + fpdu->pkt_hdr = buf->data_phys_addr + placement_offset; + fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset; + fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset; + fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset; + + if (tcp_payload_size < fpdu->fpdu_length) + fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size; + else + fpdu->incomplete_bytes = 0; /* complete fpdu */ + + fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; +} + +static int +qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *curr_pkt, + struct qed_iwarp_ll2_buff *buf, + u16 tcp_payload_size, enum qed_iwarp_mpa_pkt_type pkt_type) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + + /* An unaligned packet means it's split over two tcp segments. So the + * complete packet requires 3 bds, one for the header, one for the + * part of the fpdu of the first tcp segment, and the last fragment + * will point to the remainder of the fpdu. A packed pdu, requires only + * two bds, one for the header and one for the data. + */ + tx_pkt.num_of_bds = (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2; + tx_pkt.tx_dest = QED_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; /* offset in words */ + + /* Send the mpa_buf only with the last fpdu (in case of packed) */ + if (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED || + tcp_payload_size <= fpdu->fpdu_length) + tx_pkt.cookie = fpdu->mpa_buf; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + /* Set first fragment to header */ + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + goto out; + + /* Set second fragment to first part of packet */ + rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle, + fpdu->mpa_frag, + fpdu->mpa_frag_len); + if (rc) + goto out; + + if (!fpdu->incomplete_bytes) + goto out; + + /* Set third fragment to second part of the packet */ + rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, + ll2_handle, + buf->data_phys_addr + + curr_pkt->first_mpa_offset, + fpdu->incomplete_bytes); +out: + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Sent FPDU num_bds=%d first_frag_len=%x, mpa_frag_len=0x%x, incomplete_bytes:0x%x rc=%d\n", + tx_pkt.num_of_bds, + tx_pkt.first_frag_len, + fpdu->mpa_frag_len, + fpdu->incomplete_bytes, rc); + + return rc; +} + static void qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn, struct unaligned_opaque_data *curr_pkt, @@ -1741,9 +1904,79 @@ static int qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, struct qed_iwarp_ll2_mpa_buf *mpa_buf) { + struct unaligned_opaque_data *curr_pkt = &mpa_buf->data; struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf; + enum qed_iwarp_mpa_pkt_type pkt_type; + struct qed_iwarp_fpdu *fpdu; int rc = -EINVAL; + u8 *mpa_data; + fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff); + if (!fpdu) { /* something corrupt with cid, post rx back */ + DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n", + curr_pkt->cid); + goto err; + } + + do { + mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset); + + pkt_type = qed_iwarp_mpa_classify(p_hwfn, fpdu, + mpa_buf->tcp_payload_len, + mpa_data); + + switch (pkt_type) { + case QED_IWARP_MPA_PKT_PARTIAL: + qed_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + mpa_buf->tcp_payload_len = 0; + break; + case QED_IWARP_MPA_PKT_PACKED: + qed_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + memset(fpdu, 0, sizeof(*fpdu)); + break; + } + + mpa_buf->tcp_payload_len -= fpdu->fpdu_length; + curr_pkt->first_mpa_offset += fpdu->fpdu_length; + break; + case QED_IWARP_MPA_PKT_UNALIGNED: + rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:delay rc=%d\n", rc); + /* don't reset fpdu -> we need it for next + * classify + */ + break; + } + + mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes; + curr_pkt->first_mpa_offset += fpdu->incomplete_bytes; + /* The framed PDU was sent - no more incomplete bytes */ + fpdu->incomplete_bytes = 0; + break; + } + } while (mpa_buf->tcp_payload_len && !rc); + + return rc; + +err: qed_iwarp_ll2_post_rx(p_hwfn, buf, p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle); @@ -1989,11 +2222,27 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, kfree(buffer); } +/* The only slowpath for iwarp ll2 is unalign flush. When this completion + * is received, need to reset the FPDU. + */ void qed_iwarp_ll2_slowpath(void *cxt, u8 connection_handle, u32 opaque_data_0, u32 opaque_data_1) { + struct unaligned_opaque_data unalign_data; + struct qed_hwfn *p_hwfn = cxt; + struct qed_iwarp_fpdu *fpdu; + + qed_iwarp_mpa_get_data(p_hwfn, &unalign_data, + opaque_data_0, opaque_data_1); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "(0x%x) Flush fpdu\n", + unalign_data.cid); + + fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid); + if (fpdu) + memset(fpdu, 0, sizeof(*fpdu)); } static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -2194,6 +2443,14 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->ll2_mpa_handle); if (rc) goto err; + + iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps, + sizeof(*iwarp_info->partial_fpdus), + GFP_KERNEL); + if (!iwarp_info->partial_fpdus) + goto err; + + iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; /* The mpa_bufs array serves for pending RX packets received on the * mpa ll2 that don't have place on the tx ring and require later * processing. We can't fail on allocation of such a struct therefore diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 2c53fe46345c..858755cafd2b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -68,6 +68,17 @@ struct qed_iwarp_ll2_mpa_buf { u8 placement_offset; }; +struct qed_iwarp_fpdu { + struct qed_iwarp_ll2_buff *mpa_buf; + void *mpa_frag_virt; + dma_addr_t mpa_frag; + dma_addr_t pkt_hdr; + u16 mpa_frag_len; + u16 fpdu_length; + u16 incomplete_bytes; + u8 pkt_hdr_size; +}; + struct qed_iwarp_info { struct list_head listen_list; /* qed_iwarp_listener */ struct list_head ep_list; /* qed_iwarp_ep */ @@ -87,7 +98,9 @@ struct qed_iwarp_info { u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; + struct qed_iwarp_fpdu *partial_fpdus; struct qed_iwarp_ll2_mpa_buf *mpa_bufs; + u16 max_num_partial_fpdus; }; enum qed_iwarp_ep_state { From d531038eeb6dd25dbf88402f932bf0ea524de82e Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:52 +0300 Subject: [PATCH 0748/2177] qed: Add support for freeing two ll2 buffers for corner cases When posting a packet on the ll2 tx, we can provide a cookie that will be returned upon tx completion. This cookie is the ll2 iwarp buffer which is then reposted to the rx ring. Part of the unaligned mpa flow is determining when a buffer can be reposted. Each buffer needs to be sent only once as a cookie for on the tx ring. In packed fpdu case, only the last packet will be sent with the buffer, meaning we need to handle the case that a cookie can be NULL on tx complete. In addition, when a fpdu splits over two buffers, but there are no more fpdus on the second buffer, two buffers need to be provided as a cookie. To avoid changing the ll2 interface to provide two cookies, we introduce a piggy buf pointer, relevant for iWARP only, that holds a pointer to a second buffer that needs to be released during tx completion. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 25 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 1 + 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 83b147fdacde..8b17369af9ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1846,6 +1846,12 @@ qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn, /* vlan overload with enum iwarp_ll2_tx_queues */ tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE; + /* special case of unaligned packet and not packed, need to send + * both buffers as cookie to release. + */ + if (tcp_payload_size == fpdu->incomplete_bytes) + fpdu->mpa_buf->piggy_buf = buf; + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; /* Set first fragment to header */ @@ -2195,9 +2201,19 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle, bool b_last_fragment, bool b_last_packet) { struct qed_iwarp_ll2_buff *buffer = cookie; + struct qed_iwarp_ll2_buff *piggy; struct qed_hwfn *p_hwfn = cxt; + if (!buffer) /* can happen in packed mpa unaligned... */ + return; + /* this was originally an rx packet, post it back */ + piggy = buffer->piggy_buf; + if (piggy) { + buffer->piggy_buf = NULL; + qed_iwarp_ll2_post_rx(p_hwfn, piggy, connection_handle); + } + qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle); if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle) @@ -2216,6 +2232,15 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, if (!buffer) return; + if (buffer->piggy_buf) { + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + buffer->piggy_buf->buff_size, + buffer->piggy_buf->data, + buffer->piggy_buf->data_phys_addr); + + kfree(buffer->piggy_buf); + } + dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size, buffer->data, buffer->data_phys_addr); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 858755cafd2b..58db51af26bd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -55,6 +55,7 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); #define QED_IWARP_HANDLE_INVAL (0xff) struct qed_iwarp_ll2_buff { + struct qed_iwarp_ll2_buff *piggy_buf; void *data; dma_addr_t data_phys_addr; u32 buff_size; From c7d1d839999476aac0d7e16732722285a9c30cce Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:53 +0300 Subject: [PATCH 0749/2177] qed: Add support for MPA header being split over two tcp packets There is a special case where an MPA header is split over to tcp packets, in this case we need to wait for the next packet to get the fpdu length. We use the incomplete_bytes to mark this fpdu as a "special" one which requires updating the length with the next packet Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 36 ++++++++++++++++++++- drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 6 ++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 8b17369af9ef..299494225f44 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1742,6 +1742,7 @@ enum qed_iwarp_mpa_pkt_type { QED_IWARP_MPA_PKT_UNALIGNED }; +#define QED_IWARP_INVALID_FPDU_LENGTH 0xffff #define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2) #define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4) @@ -1774,6 +1775,15 @@ qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn, goto out; } + /* special case of one byte remaining... + * lower byte will be read next packet + */ + if (tcp_payload_len == 1) { + fpdu->fpdu_length = *mpa_data << BITS_PER_BYTE; + pkt_type = QED_IWARP_MPA_PKT_PARTIAL; + goto out; + } + mpa_len = ntohs(*((u16 *)(mpa_data))); fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); @@ -1802,7 +1812,9 @@ qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf, fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset; fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset; - if (tcp_payload_size < fpdu->fpdu_length) + if (tcp_payload_size == 1) + fpdu->incomplete_bytes = QED_IWARP_INVALID_FPDU_LENGTH; + else if (tcp_payload_size < fpdu->fpdu_length) fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size; else fpdu->incomplete_bytes = 0; /* complete fpdu */ @@ -1810,6 +1822,27 @@ qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf, fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; } +static void +qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, u8 *mpa_data) +{ + u16 mpa_len; + + /* Update incomplete packets if needed */ + if (fpdu->incomplete_bytes == QED_IWARP_INVALID_FPDU_LENGTH) { + /* Missing lower byte is now available */ + mpa_len = fpdu->fpdu_length | *mpa_data; + fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + fpdu->mpa_frag_len = fpdu->fpdu_length; + /* one byte of hdr */ + fpdu->incomplete_bytes = fpdu->fpdu_length - 1; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n", + mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes); + } +} + static int qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu, @@ -1960,6 +1993,7 @@ qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, curr_pkt->first_mpa_offset += fpdu->fpdu_length; break; case QED_IWARP_MPA_PKT_UNALIGNED: + qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data); rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, mpa_buf->tcp_payload_len, pkt_type); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 58db51af26bd..c58793a47774 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -69,6 +69,12 @@ struct qed_iwarp_ll2_mpa_buf { u8 placement_offset; }; +/* In some cases a fpdu will arrive with only one byte of the header, in this + * case the fpdu_length will be partial (contain only higher byte and + * incomplete bytes will contain the invalid value + */ +#define QED_IWARP_INVALID_INCOMPLETE_BYTES 0xffff + struct qed_iwarp_fpdu { struct qed_iwarp_ll2_buff *mpa_buf; void *mpa_frag_virt; From 1e28eaad07ea1e2d6537586529e87cbc1d698ffd Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:54 +0300 Subject: [PATCH 0750/2177] qed: Add iWARP support for fpdu spanned over more than two tcp packets We continue to maintain a maximum of three buffers per fpdu, to ensure that there are enough buffers for additional unaligned mpa packets. To support this, if a fpdu is split over more than two tcp packets, we use an intermediate buffer to copy the data to the previous buffer, then we can release the data. We need an intermediate buffer as the initial buffer partial packet could be located at the end of the packet, not leaving room for additional data. This is a corner case, and will usually not be the case. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 193 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 1 + 2 files changed, 194 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 299494225f44..b2b1f87864ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1420,6 +1420,7 @@ void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); kfree(iwarp_info->mpa_bufs); kfree(iwarp_info->partial_fpdus); + kfree(iwarp_info->mpa_intermediate_buf); } int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams) @@ -1762,6 +1763,11 @@ char *pkt_type_str[] = { "QED_IWARP_MPA_PKT_UNALIGNED" }; +static int +qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct qed_iwarp_ll2_buff *buf); + static enum qed_iwarp_mpa_pkt_type qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu, @@ -1822,6 +1828,68 @@ qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf, fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; } +static int +qed_iwarp_cp_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + struct qed_iwarp_ll2_buff *buf, u16 tcp_payload_size) +{ + u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf; + int rc; + + /* need to copy the data from the partial packet stored in fpdu + * to the new buf, for this we also need to move the data currently + * placed on the buf. The assumption is that the buffer is big enough + * since fpdu_length <= mss, we use an intermediate buffer since + * we may need to copy the new data to an overlapping location + */ + if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) { + DP_ERR(p_hwfn, + "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, + tcp_payload_size, fpdu->incomplete_bytes); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n", + fpdu->mpa_frag_virt, fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + memcpy(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len); + memcpy(tmp_buf + fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + rc = qed_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf); + if (rc) + return rc; + + /* If we managed to post the buffer copy the data to the new buffer + * o/w this will occur in the next round... + */ + memcpy((u8 *)(buf->data), tmp_buf, + fpdu->mpa_frag_len + tcp_payload_size); + + fpdu->mpa_buf = buf; + /* fpdu->pkt_hdr remains as is */ + /* fpdu->mpa_frag is overridden with new buf */ + fpdu->mpa_frag = buf->data_phys_addr; + fpdu->mpa_frag_virt = buf->data; + fpdu->mpa_frag_len += tcp_payload_size; + + fpdu->incomplete_bytes -= tcp_payload_size; + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + + return 0; +} + static void qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu, u8 *mpa_data) @@ -1843,6 +1911,90 @@ qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, } } +#define QED_IWARP_IS_RIGHT_EDGE(_curr_pkt) \ + (GET_FIELD((_curr_pkt)->flags, \ + UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE)) + +/* This function is used to recycle a buffer using the ll2 drop option. It + * uses the mechanism to ensure that all buffers posted to tx before this one + * were completed. The buffer sent here will be sent as a cookie in the tx + * completion function and can then be reposted to rx chain when done. The flow + * that requires this is the flow where a FPDU splits over more than 3 tcp + * segments. In this case the driver needs to re-post a rx buffer instead of + * the one received, but driver can't simply repost a buffer it copied from + * as there is a case where the buffer was originally a packed FPDU, and is + * partially posted to FW. Driver needs to ensure FW is done with it. + */ +static int +qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct qed_iwarp_ll2_buff *buf) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + buf->piggy_buf = NULL; + tx_pkt.cookie = buf; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't drop packet rc=%d\n", rc); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n", + (unsigned long int)tx_pkt.first_frag, + tx_pkt.first_frag_len, buf, rc); + + return rc; +} + +static int +qed_iwarp_win_right_edge(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = QED_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send right edge rc=%d\n", rc); + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n", + tx_pkt.num_of_bds, + (unsigned long int)tx_pkt.first_frag, + tx_pkt.first_frag_len, rc); + + return rc; +} + static int qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu, @@ -1971,6 +2123,20 @@ qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, mpa_buf->tcp_payload_len, mpa_buf->placement_offset); + if (!QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + mpa_buf->tcp_payload_len = 0; + break; + } + + rc = qed_iwarp_win_right_edge(p_hwfn, fpdu); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + memset(fpdu, 0, sizeof(*fpdu)); + break; + } + mpa_buf->tcp_payload_len = 0; break; case QED_IWARP_MPA_PKT_PACKED: @@ -1994,6 +2160,28 @@ qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, break; case QED_IWARP_MPA_PKT_UNALIGNED: qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data); + if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) { + /* special handling of fpdu split over more + * than 2 segments + */ + if (QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + rc = qed_iwarp_win_right_edge(p_hwfn, + fpdu); + /* packet will be re-processed later */ + if (rc) + return rc; + } + + rc = qed_iwarp_cp_pkt(p_hwfn, fpdu, curr_pkt, + buf, + mpa_buf->tcp_payload_len); + if (rc) /* packet will be re-processed later */ + return rc; + + mpa_buf->tcp_payload_len = 0; + break; + } + rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, mpa_buf->tcp_payload_len, pkt_type); @@ -2510,6 +2698,11 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, goto err; iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; + + iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL); + if (!iwarp_info->mpa_intermediate_buf) + goto err; + /* The mpa_bufs array serves for pending RX packets received on the * mpa ll2 that don't have place on the tx ring and require later * processing. We can't fail on allocation of such a struct therefore diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index c58793a47774..c1ecd743305f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -107,6 +107,7 @@ struct qed_iwarp_info { enum mpa_rtr_type rtr_type; struct qed_iwarp_fpdu *partial_fpdus; struct qed_iwarp_ll2_mpa_buf *mpa_bufs; + u8 *mpa_intermediate_buf; u16 max_num_partial_fpdus; }; From bfd8e5a407133e58a92a38ccf3d0ba6db81f22d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Oct 2017 06:01:37 -0700 Subject: [PATCH 0751/2177] ipv6: avoid zeroing per cpu data again per cpu allocations are already zeroed, no need to clear them again. Fixes: d52d3997f843f ("ipv6: Create percpu rt6_info") Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Cc: Tejun Heo Acked-by: Tejun Heo Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 606e80325b21..3d7d4e09301e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -377,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, if (rt) { rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (rt->rt6i_pcpu) { - int cpu; - - for_each_possible_cpu(cpu) { - struct rt6_info **p; - - p = per_cpu_ptr(rt->rt6i_pcpu, cpu); - /* no one shares rt */ - *p = NULL; - } - } else { + if (!rt->rt6i_pcpu) { dst_release_immediate(&rt->dst); return NULL; } From acb40d8412572b2a79216a98acdbac10e620da1b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 9 Oct 2017 16:22:34 +0300 Subject: [PATCH 0752/2177] thunderbolt: Initialize Thunderbolt bus earlier The 0day kbuild robot reports following crash: BUG: unable to handle kernel NULL pointer dereference at 00000004 IP: tb_property_find+0xe/0x41 *pde = 00000000 Oops: 0000 [#1] CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.0-rc1-00741-ge69b6c0 #412 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 task: 89c80000 task.stack: 89c7c000 EIP: tb_property_find+0xe/0x41 EFLAGS: 00210246 CPU: 0 EAX: 00000000 EBX: 7a368f47 ECX: 00000044 EDX: 7a368f47 ESI: 8851d340 EDI: 7a368f47 EBP: 89c7df0c ESP: 89c7defc DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 CR0: 80050033 CR2: 00000004 CR3: 027a2000 CR4: 00000690 Call Trace: tb_register_property_dir+0x49/0xb9 ? cdc_mbim_driver_init+0x1b/0x1b tbnet_init+0x77/0x9f ? cdc_mbim_driver_init+0x1b/0x1b do_one_initcall+0x7e/0x145 ? parse_args+0x10c/0x1b3 ? kernel_init_freeable+0xbe/0x159 kernel_init_freeable+0xd1/0x159 ? rest_init+0x110/0x110 kernel_init+0xd/0xd0 ret_from_fork+0x19/0x30 The reason is that both Thunderbolt bus and thunderbolt-net are build into the kernel image, and the latter is linked first because drivers/net comes before drivers/thunderbolt. Since both use module_init() thunderbolt-net ends up calling Thunderbolt bus functions too early triggering the above crash. Fix this by moving Thunderbolt bus initialization to happen earlier to make sure all the data structures are ready when Thunderbolt service drivers are initialized. To be on the safe side also add a check for properly initialized xdomain_property_dir to tb_register_property_dir(). Reported-by: kernel test robot Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 2 +- drivers/thunderbolt/xdomain.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 0e79eebfcbb7..419a7a90bce0 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -1144,5 +1144,5 @@ static void __exit nhi_unload(void) tb_domain_exit(); } -module_init(nhi_init); +fs_initcall(nhi_init); module_exit(nhi_unload); diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index f2d06f6f7be9..138027537d29 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -1487,6 +1487,9 @@ int tb_register_property_dir(const char *key, struct tb_property_dir *dir) { int ret; + if (WARN_ON(!xdomain_property_dir)) + return -EAGAIN; + if (!key || strlen(key) > 8) return -EINVAL; From bb428a5c4df5f50acdce89449e476faa0b295e95 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 9 Oct 2017 16:59:48 +0300 Subject: [PATCH 0753/2177] net/mlx4: Fix endianness issue in qp context params Should take care of the endianness before assigning to params2 field. Fixes: 53f33ae295a5 ("net/mlx4_core: Port aggregation upper layer interface") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- drivers/net/ethernet/mellanox/mlx4/qp.c | 2 +- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 5a47f9669621..6883ac75d37f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -53,7 +53,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) - context->params2 |= MLX4_QP_BIT_FPP; + context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP); } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 728a2fb1f5c0..203320923340 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -925,7 +925,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); if (states[i + 1] != MLX4_QP_STATE_RTR) - context->params2 &= ~MLX4_QP_BIT_FPP; + context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index fabb53379727..04304dd894c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3185,7 +3185,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, optpar = be32_to_cpu(*(__be32 *) inbox->buf); if (slave != mlx4_master_func_num(dev)) { - qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); /* setting QP rate-limit is disallowed for VFs */ if (qp_ctx->rate_limit_params) return -EPERM; From b71322d9db924e1ae33b9fd00a2e37aee09df81f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 9 Oct 2017 16:59:49 +0300 Subject: [PATCH 0754/2177] net/mlx4_core: Fix cast warning in fw.c Fix the following SPARSE warning, in MLX4_GET() macro: drivers/net/ethernet/mellanox/mlx4/fw.c:233:9: warning: cast to restricted __be64 Fixes: 17d5ceb6e43e ("net/mlx4_core: Fix unaligned accesses") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 16c09949afd5..634f603f941c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -57,12 +57,12 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ - u64 val; \ - switch (sizeof(dest)) { \ + __be64 val; \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ - case 8: val = get_unaligned((u64 *)__p); \ + case 8: val = get_unaligned((__be64 *)__p); \ (dest) = be64_to_cpu(val); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ From 7ba5e7bd64a5715f624d39815f567842178afb72 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 9 Oct 2017 16:59:50 +0300 Subject: [PATCH 0755/2177] net/mlx4_en: Use __force to fix a sparse warning in TX datapath In TX data-path, we intentionally do not byte-swap, as documented in code and in the cited commit log. This fixes sparse warning: en_tx.c:720:23: warning: incorrect type in argument 1 (different base types) en_tx.c:720:23: expected unsigned int [unsigned] [usertype] en_tx.c:720:23: got restricted __be32 [usertype] doorbell_qpn Fixes: 492f5add4be8 ("net/mlx4_en: Doorbell is byteswapped in Little Endian archs") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8a32a8f7f9c0..2cc82dc07397 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -718,7 +718,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) #else iowrite32be( #endif - ring->doorbell_qpn, + (__force u32)ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); } From 66c77ff3a049a1d182c9c78cc6286bb1ceee70db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Oct 2017 09:52:24 -0700 Subject: [PATCH 0756/2177] ipv6: addrlabel: rework ip6addrlbl_get() If we allocate skb before the lookup, we can use RCU without the need of ip6addrlbl_hold() This means that the following patch can get rid of refcounting. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index c6311d7108f6..f31489652c4a 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -546,38 +546,28 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, return -EINVAL; addr = nla_data(tb[IFAL_ADDRESS]); + skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + err = -ESRCH; + rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); - if (p && !ip6addrlbl_hold(p)) - p = NULL; lseq = net->ipv6.ip6addrlbl_table.seq; + if (p) + err = ip6addrlbl_fill(skb, p, lseq, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, + RTM_NEWADDRLABEL, 0); rcu_read_unlock(); - if (!p) { - err = -ESRCH; - goto out; - } - - skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); - if (!skb) { - ip6addrlbl_put(p); - return -ENOBUFS; - } - - err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - RTM_NEWADDRLABEL, 0); - - ip6addrlbl_put(p); - if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - goto out; + } else { + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); } - - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); -out: return err; } From 2809c0957dbbd56d3a92e9ede27b19943fef7236 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Oct 2017 09:52:25 -0700 Subject: [PATCH 0757/2177] ipv6: addrlabel: remove refcounting After previous patch ("ipv6: addrlabel: rework ip6addrlbl_get()") we can remove the refcount from struct ip6addrlbl_entry, since it is no longer elevated in p6addrlbl_get() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f31489652c4a..2606d2fa3ac4 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -18,7 +18,6 @@ #include #include #include -#include #if 0 #define ADDRLABEL(x...) printk(x) @@ -36,7 +35,6 @@ struct ip6addrlbl_entry { int addrtype; u32 label; struct hlist_node list; - refcount_t refcnt; struct rcu_head rcu; }; @@ -111,28 +109,6 @@ static const __net_initconst struct ip6addrlbl_init_table } }; -/* Object management */ -static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) -{ - kfree(p); -} - -static void ip6addrlbl_free_rcu(struct rcu_head *h) -{ - ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); -} - -static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) -{ - return refcount_inc_not_zero(&p->refcnt); -} - -static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) -{ - if (refcount_dec_and_test(&p->refcnt)) - call_rcu(&p->rcu, ip6addrlbl_free_rcu); -} - /* Find label */ static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, const struct in6_addr *addr, @@ -219,7 +195,6 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); - refcount_set(&newp->refcnt, 1); return newp; } @@ -243,7 +218,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, goto out; } hlist_replace_rcu(&p->list, &newp->list); - ip6addrlbl_put(p); + kfree_rcu(p, rcu); goto out; } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || (p->prefixlen < newp->prefixlen)) { @@ -281,7 +256,7 @@ static int ip6addrlbl_add(struct net *net, ret = __ip6addrlbl_add(net, newp, replace); spin_unlock(&net->ipv6.ip6addrlbl_table.lock); if (ret) - ip6addrlbl_free(newp); + kfree(newp); return ret; } @@ -302,7 +277,7 @@ static int __ip6addrlbl_del(struct net *net, p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); - ip6addrlbl_put(p); + kfree_rcu(p, rcu); ret = 0; break; } @@ -360,7 +335,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) spin_lock(&net->ipv6.ip6addrlbl_table.lock); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { hlist_del_rcu(&p->list); - ip6addrlbl_put(p); + kfree_rcu(p, rcu); } spin_unlock(&net->ipv6.ip6addrlbl_table.lock); } From e8bfe868cf2cf364f3f52df8fa01aa6d28927aaf Mon Sep 17 00:00:00 2001 From: Ian W MORRISON Date: Sat, 7 Oct 2017 17:16:08 +1100 Subject: [PATCH 0758/2177] Bluetooth: hci_bcm: Correct context of IRQ polarity message As the overwriting of IRQ polarity to active low occurs during the driver probe using 'bt_dev_warn' to display the warning results in '(null)' being displayed for the device. This patch uses 'dev_warn' to correctly display the device in the warning instead. Signed-off-by: Ian W MORRISON Reviewed-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index ab1455e63b92..d9d0fc09acdb 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -832,7 +832,7 @@ static int bcm_acpi_probe(struct bcm_device *dev) dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table); if (dmi_id) { - bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low", + dev_warn(dev->dev, "%s: Overwriting IRQ polarity to active low", dmi_id->ident); dev->irq_active_low = true; } From b48be9978e4b21b28b7349f57574dae21378ddd5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 15:19:12 -0700 Subject: [PATCH 0759/2177] i40e: fix flags declaration Since we don't yet have more than 32 flags, we'll use a u32 for both the hw_features and flag field. Should we gain more flags in the future, we may need to convert to a u64 or separate flags out into two fields. This was overlooked in the previous commit 2781de2134c4 ("i40e/i40evf: organize and re-number feature flags"), where the feature flag was not converted form u64 to u32. Signed-off-by: Jacob Keller Reviewed-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 18c453a3e728..7baf6d8a84dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -424,7 +424,7 @@ struct i40e_pf { #define I40E_HW_PORT_ID_VALID BIT(17) #define I40E_HW_RESTART_AUTONEG BIT(18) - u64 flags; + u32 flags; #define I40E_FLAG_RX_CSUM_ENABLED BIT(0) #define I40E_FLAG_MSI_ENABLED BIT(1) #define I40E_FLAG_MSIX_ENABLED BIT(2) From 784548c40d6f43eff2297220ad7800dc04be03c6 Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Thu, 7 Sep 2017 08:05:46 -0400 Subject: [PATCH 0760/2177] i40e: use the safe hash table iterator when deleting mac filters This patch replaces hash_for_each function with hash_for_each_safe when calling __i40e_del_filter. The hash_for_each_safe function is the right one to use when iterating over a hash table to safely remove a hash entry. Otherwise, incorrect values may be read from freed memory. Detected by CoverityScan, CID 1402048 Read from pointer after free Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 04568137e029..c062d74d21f3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2883,6 +2883,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct i40e_mac_filter *f; struct i40e_vf *vf; int ret = 0; + struct hlist_node *h; int bkt; /* validate the request */ @@ -2921,7 +2922,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* Delete all the filters for this VSI - we're going to kill it * anyway. */ - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) __i40e_del_filter(vsi, f); spin_unlock_bh(&vsi->mac_filter_hash_lock); From c766b9af9abe897eb5480ef9ef3914fc07b815c5 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 7 Sep 2017 08:05:47 -0400 Subject: [PATCH 0761/2177] i40evf: fix mac filter removal timing issue Due to the asynchronous nature in which mac filters are added and deleted, there exists a bug in which filters are erroneously removed if removed then added again quickly. The events are as such: - filter marked for removal - same filter is re-added before watchdog that cleans up filters - we skip re-adding the filter because we have it already in the list - watchdog filter cleanup kicks off and filter is removed So when we were re-adding the same filter, it didn't actually get added because it already existed in the list, but was marked for removal and had yet to actually be removed. This patch fixes the issue by making sure that when adding a filter, if we find it already existing in our list, make sure it is not marked to be removed. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 1d2fc898b664..f62d9565c7b5 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -880,6 +880,8 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; + } else { + f->remove = false; } clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); From 427025592955d245997b12923111e85f07850d5f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:48 -0400 Subject: [PATCH 0762/2177] i40e/i40evf: fix incorrect default ITR values on driver load The ITR register expects to be programmed in units of 2 microseconds. Because of this, all of the drivers I40E_ITR_* constants are in terms of this 2 microsecond register. Unfortunately, the rx_itr_default value is expected to be programmed in microseconds. Effectively the driver defaults to an ITR value of half the expected value (in terms of minimum microseconds between interrupts). Fix this by changing the default values to be calculated using ITR_REG_TO_USEC macro which indicates that we're converting from the register units into microseconds. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 6 ++++-- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 6 ++++-- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 60b11fdeca2d..d4b0cc36afb1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8983,8 +8983,8 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_MSIX_ENABLED; /* Set default ITR */ - pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF; - pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF; + pf->rx_itr_default = I40E_ITR_RX_DEF; + pf->tx_itr_default = I40E_ITR_TX_DEF; /* Depending on PF configurations, it is possible that the RSS * maximum might end up larger than the available queues diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index a4e3e665a1a1..c3156aa3f709 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -38,8 +38,10 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_20K -#define I40E_ITR_TX_DEF I40E_ITR_20K +#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index d8ca802a71a9..8f9830d7649a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -38,8 +38,10 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_20K -#define I40E_ITR_TX_DEF I40E_ITR_20K +#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f62d9565c7b5..5bcbd46e2f6c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1223,7 +1223,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); + tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1232,7 +1232,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); + rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; From dbadbbe235f82f13224c85d29e65cf859afaa18c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:49 -0400 Subject: [PATCH 0763/2177] i40e/i40evf: always set the CLEARPBA flag when re-enabling interrupts In the past we changed driver behavior to not clear the PBA when re-enabling interrupts. This change was motivated by the flawed belief that clearing the PBA would cause a lost interrupt if a receive interrupt occurred while interrupts were disabled. According to empirical testing this isn't the case. Additionally, the data sheet specifically says that we should set the CLEARPBA bit when re-enabling interrupts in a polling setup. This reverts commit 40d72a509862 ("i40e/i40evf: don't lose interrupts") Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 5 +---- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++------ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 6 ++---- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 4 +--- 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 7baf6d8a84dd..8139b4ee1dc3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -949,9 +949,6 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) struct i40e_hw *hw = &pf->hw; u32 val; - /* definitely clear the PBA here, as this function is meant to - * clean out all previous interrupts AND enable the interrupt - */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); @@ -960,7 +957,7 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) } void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf); int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); int i40e_open(struct net_device *netdev); int i40e_close(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d4b0cc36afb1..00a83afb02e9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3403,15 +3403,14 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure - * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | + I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); @@ -3597,7 +3596,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); } i40e_flush(&pf->hw); @@ -3746,7 +3745,7 @@ enable_intr: wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf, false); + i40e_irq_dynamic_enable_icr0(pf); } return ret; @@ -8455,7 +8454,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); return err; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3bd176606c09..616abf79253e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2202,9 +2202,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - /* Don't clear PBA because that can cause lost interrupts that - * came in while we were cleaning/polling - */ + I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -2241,7 +2239,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { - i40e_irq_dynamic_enable_icr0(vsi->back, false); + i40e_irq_dynamic_enable_icr0(vsi->back); return; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c062d74d21f3..10298956a81b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1358,7 +1358,7 @@ err_alloc: i40e_free_vfs(pf); err_iov: /* Re-enable interrupt 0. */ - i40e_irq_dynamic_enable_icr0(pf, false); + i40e_irq_dynamic_enable_icr0(pf); return ret; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 37e1de886d48..fe817e2b6fef 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1409,9 +1409,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - /* Don't clear PBA because that can cause lost interrupts that - * came in while we were cleaning/polling - */ + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); From 7362be9eeed01980bfa03cf49737703a0e43fe50 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:50 -0400 Subject: [PATCH 0764/2177] i40e: reduce lrxqthresh from 2 to 1 The lrxq thresh value tells hardware to immediately interrupt when there are fewer than N*64 packets left in the ring. Counter intuitively, empirical testing has shown that decreasing this value from 2 to 1, and thus changing from an immediate interrupt at fewer than 128 descriptors down to 64 descriptors causes a small increase in the maximum total packets per second we can receive. This increase occurs even when we're polling with interrupts masked, as the hardware must still handle interrupts internally even if we've disabled them in software. Also reduce the value for any VFs we allocate. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 00a83afb02e9..74875ddaeb33 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3030,7 +3030,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (hw->revision_id == 0) rx_ctx.lrxqthresh = 0; else - rx_ctx.lrxqthresh = 2; + rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; /* this controls whether VLAN is stripped from inner headers */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 10298956a81b..83727906a386 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -639,7 +639,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, rx_ctx.dsize = 1; /* default values */ - rx_ctx.lrxqthresh = 2; + rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.prefena = 1; rx_ctx.l2tsel = 1; From 11f29003d6376fb123b7c3779dba49bb56fb0815 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:51 -0400 Subject: [PATCH 0765/2177] i40e/i40evf: bump tail only in multiples of 8 Hardware only fetches descriptors on cachelines of 8, essentially ignoring the lower 3 bits of the tail register. Thus, it is pointless to bump tail by an unaligned access as the hardware will ignore some of the new descriptors we allocated. Thus, it's ideal if we can ensure tail writes are always aligned to 8. At first, it seems like we'd already do this, since we allocate descriptors in batches which are a multiple of 8. Since we'd always increment by a multiple of 8, it seems like the value should always be aligned. However, this ignores allocation failures. If we fail to allocate a buffer, our tail register will become unaligned. Once it has become unaligned it will essentially be stuck unaligned until a buffer allocation happens to fail at the exact amount necessary to re-align it. We can do better, by simply rounding down the number of buffers we're about to allocate (cleaned_count) such that "next_to_clean + cleaned_count" is rounded to the nearest multiple of 8. We do this by calculating how far off that value is and subtracting it from the cleaned_count. This essentially defers allocation of buffers if they're going to be ignored by hardware anyways, and re-aligns our next_to_use and tail values after a failure to allocate a descriptor. This calculation ensures that we always align the tail writes in a way the hardware expects and don't unnecessarily allocate buffers which won't be fetched immediately. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 616abf79253e..a23306f04e00 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1372,6 +1372,15 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + /* Hardware only fetches new descriptors in cache lines of 8, + * essentially ignoring the lower 3 bits of the tail register. We want + * to ensure our tail writes are aligned to avoid unnecessary work. We + * can't simply round down the cleaned count, since we might fail to + * allocate some buffers. What we really want is to ensure that + * next_to_used + cleaned_count produces an aligned value. + */ + cleaned_count -= (ntu + cleaned_count) & 0x7; + /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index fe817e2b6fef..6806ada11490 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -711,6 +711,15 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + /* Hardware only fetches new descriptors in cache lines of 8, + * essentially ignoring the lower 3 bits of the tail register. We want + * to ensure our tail writes are aligned to avoid unnecessary work. We + * can't simply round down the cleaned count, since we might fail to + * allocate some buffers. What we really want is to ensure that + * next_to_used + cleaned_count produces an aligned value. + */ + cleaned_count -= (ntu + cleaned_count) & 0x7; + /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; From 95bc2fb4c6c7d23db6dc54a3d49bdbadb13c392b Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:52 -0400 Subject: [PATCH 0766/2177] i40e/i40evf: bundle more descriptors when allocating buffers Double the number of descriptors we'll bundle into one tail bump when receiving. Empirical testing has shown that we reduce CPU utilization and don't appear to reduce throughput or packet rate. 32 seems to be the sweet spot, as it's half the default polling budget, so we'd essentially reduce from 4 tail writes when polling down to 2. Increasing this up to 64 appears to have negative impacts as it may become possible that we don't bump the tail each time we get polled, which could cause a long delay between returning descriptors to the hardware. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index c3156aa3f709..ff57ae451524 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -208,7 +208,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, } /* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ #define I40E_RX_INCREMENT(r, i) \ do { \ (i)++; \ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 8f9830d7649a..8d26c85d12e1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -191,7 +191,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, } /* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ #define I40E_RX_INCREMENT(r, i) \ do { \ (i)++; \ From 6f853d4f8e93eeace504b021e05dfdbeb4d3b40f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Sep 2017 08:05:53 -0400 Subject: [PATCH 0767/2177] i40e: allow XPS with QoS enabled Recently, the kernel gained support for enabling XPS and QoS at the same time. Thus, we no longer need to worry about the number of traffic classes when enabling XPS. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 74875ddaeb33..b26f615bed5a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2879,23 +2879,18 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) **/ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { - struct i40e_vsi *vsi = ring->vsi; int cpu; if (!ring->q_vector || !ring->netdev) return; - if ((vsi->tc_config.numtc <= 1) && - !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) { - cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); - netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), - ring->queue_index); - } + /* We only initialize XPS once, so as not to overwrite user settings */ + if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) + return; - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), + ring->queue_index); } /** From b861fb762a26144593387b84cd078ef86d99cf6f Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Thu, 7 Sep 2017 08:05:54 -0400 Subject: [PATCH 0768/2177] i40e: add check for return from find_first_bit call The find_first_bit function will return the size passed to search if the first set bit is not found. This patch adds the check in case that happens as the return value would be used as the index in an array and that would have caused the out-of-bounds access. Detected by CoverityScan, CID 1295969 Out-of-bounds access Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 83727906a386..125dcd1d2233 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -306,6 +306,10 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, next_q = find_first_bit(&linklistmap, (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)); + if (unlikely(next_q == (I40E_MAX_VSI_QP * + I40E_VIRTCHNL_SUPPORTED_QTYPES))) + goto irq_list_done; + vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); From 4988410f8d3a6fa04381072e2406a1d3979ffb95 Mon Sep 17 00:00:00 2001 From: Jayaprakash Shanmugam Date: Thu, 7 Sep 2017 08:05:55 -0400 Subject: [PATCH 0769/2177] i40e: Retry AQC GetPhyAbilities to overcome I2CRead hangs - When the I2C is busy, the PHY reads are delayed. The firmware will return EGAIN in these cases with an expectation that the SW will trigger the reads again - This patch retries the operation for a maximum period of 500ms Signed-off-by: Jayaprakash Shanmugam Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 42 +++++++++++++------ drivers/net/ethernet/intel/i40e/i40e_type.h | 3 ++ drivers/net/ethernet/intel/i40evf/i40e_type.h | 3 ++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 60542beda7ad..53aad378d49c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1567,30 +1567,46 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, struct i40e_aq_desc desc; i40e_status status; u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp); + u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0; if (!abilities) return I40E_ERR_PARAM; - i40e_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_get_phy_abilities); + do { + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_abilities); - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); - if (abilities_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); + if (abilities_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - if (qualified_modules) - desc.params.external.param0 |= + if (qualified_modules) + desc.params.external.param0 |= cpu_to_le32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES); - if (report_init) - desc.params.external.param0 |= + if (report_init) + desc.params.external.param0 |= cpu_to_le32(I40E_AQ_PHY_REPORT_INITIAL_VALUES); - status = i40e_asq_send_command(hw, &desc, abilities, abilities_size, - cmd_details); + status = i40e_asq_send_command(hw, &desc, abilities, + abilities_size, cmd_details); - if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) - status = I40E_ERR_UNKNOWN_PHY; + if (status) + break; + + if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) { + status = I40E_ERR_UNKNOWN_PHY; + break; + } else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) { + usleep_range(1000, 2000); + total_delay++; + status = I40E_ERR_TIMEOUT; + } + } while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) && + (total_delay < max_delay)); + + if (status) + return status; if (report_init) { if (hw->mac.type == I40E_MAC_XL710 && diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 4b32b1d38a66..0410fcbdbb94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -46,6 +46,9 @@ /* Max default timeout in ms, */ #define I40E_MAX_NVM_TIMEOUT 18000 +/* Max timeout in ms for the phy to respond */ +#define I40E_MAX_PHY_TIMEOUT 500 + /* Switch from ms to the 1usec global time (this is the GTIME resolution) */ #define I40E_MS_TO_GTIME(time) ((time) * 1000) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 9364b67fff9c..213b773dfad6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -46,6 +46,9 @@ /* Max default timeout in ms, */ #define I40E_MAX_NVM_TIMEOUT 18000 +/* Max timeout in ms for the phy to respond */ +#define I40E_MAX_PHY_TIMEOUT 500 + /* Switch from ms to the 1usec global time (this is the GTIME resolution) */ #define I40E_MS_TO_GTIME(time) ((time) * 1000) From 9bcc07f0651b3078f1c3164c710f72a558665345 Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Thu, 7 Sep 2017 08:05:56 -0400 Subject: [PATCH 0770/2177] i40e: use a local variable instead of calculating multiple times The computed result of I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES is used more than three times in function i40e_config_irq_link_list. Simply declare a local variable to store it to improve readability. Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 125dcd1d2233..0c4fa225c7be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -273,7 +273,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, struct i40e_hw *hw = &pf->hw; u16 vsi_queue_id, pf_queue_id; enum i40e_queue_type qtype; - u16 next_q, vector_id; + u16 next_q, vector_id, size; u32 reg, reg_idx; u16 itr_idx = 0; @@ -303,11 +303,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, vsi_queue_id + 1)); } - next_q = find_first_bit(&linklistmap, - (I40E_MAX_VSI_QP * - I40E_VIRTCHNL_SUPPORTED_QTYPES)); - if (unlikely(next_q == (I40E_MAX_VSI_QP * - I40E_VIRTCHNL_SUPPORTED_QTYPES))) + size = I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES; + next_q = find_first_bit(&linklistmap, size); + if (unlikely(next_q == size)) goto irq_list_done; vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; @@ -317,7 +315,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, wr32(hw, reg_idx, reg); - while (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) { + while (next_q < size) { switch (qtype) { case I40E_QUEUE_TYPE_RX: reg_idx = I40E_QINT_RQCTL(pf_queue_id); @@ -331,12 +329,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, break; } - next_q = find_next_bit(&linklistmap, - (I40E_MAX_VSI_QP * - I40E_VIRTCHNL_SUPPORTED_QTYPES), - next_q + 1); - if (next_q < - (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) { + next_q = find_next_bit(&linklistmap, size, next_q + 1); + if (next_q < size) { vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, From 3d7d7a86ec6ea5abaea30194eeb175e2a3d0bdc7 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sat, 16 Sep 2017 05:49:48 +0300 Subject: [PATCH 0771/2177] i40e: fix a typo This patch fixes a typo in i40e_vsi_alloc_arrays() documentation. The first parameter name should be "vsi" instead of "type". Signed-off-by: Rami Rosen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b26f615bed5a..4de52001a2b9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7688,7 +7688,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) /** * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi - * @type: VSI pointer + * @vsi: VSI pointer * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. * * On error: returns error code (negative) From 2c4d36b7087538704fc9e3464d185dcc4d04e863 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 6 Sep 2017 10:11:39 +0200 Subject: [PATCH 0772/2177] i40e: Avoid some useless variables and initializers in NVM functions Fixes: 09f79fd49d94 ("i40e: avoid NVM acquire deadlock during NVM update") Signed-off-by: Stefano Brivio Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 57505b1df98d..151d9cfb6ea4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -311,13 +311,10 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset, static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data) { - i40e_status ret_code = 0; - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) - ret_code = i40e_read_nvm_word_aq(hw, offset, data); - else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); - return ret_code; + return i40e_read_nvm_word_aq(hw, offset, data); + + return i40e_read_nvm_word_srctl(hw, offset, data); } /** @@ -331,7 +328,7 @@ static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw, i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data) { - i40e_status ret_code = 0; + i40e_status ret_code; ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (ret_code) @@ -446,13 +443,10 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, u16 *words, u16 *data) { - i40e_status ret_code = 0; - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) - ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data); - else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); - return ret_code; + return i40e_read_nvm_buffer_aq(hw, offset, words, data); + + return i40e_read_nvm_buffer_srctl(hw, offset, words, data); } /** From d0e60206bea2dec46c0a28fd6b116646aa67c5ae Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 9 Oct 2017 17:17:26 -0700 Subject: [PATCH 0773/2177] ipv6: use rcu_dereference_bh() in ipv6_route_seq_next() This patch replaces rcu_deference() with rcu_dereference_bh() in ipv6_route_seq_next() to avoid the following warning: [ 19.431685] WARNING: suspicious RCU usage [ 19.433451] 4.14.0-rc3-00914-g66f5d6c #118 Not tainted [ 19.435509] ----------------------------- [ 19.437267] net/ipv6/ip6_fib.c:2259 suspicious rcu_dereference_check() usage! [ 19.440790] [ 19.440790] other info that might help us debug this: [ 19.440790] [ 19.444734] [ 19.444734] rcu_scheduler_active = 2, debug_locks = 1 [ 19.447757] 2 locks held by odhcpd/3720: [ 19.449480] #0: (&p->lock){+.+.}, at: [] seq_read+0x3c/0x333 [ 19.452720] #1: (rcu_read_lock_bh){....}, at: [] ipv6_route_seq_start+0x5/0xfd [ 19.456323] [ 19.456323] stack backtrace: [ 19.458812] CPU: 0 PID: 3720 Comm: odhcpd Not tainted 4.14.0-rc3-00914-g66f5d6c #118 [ 19.462042] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 19.465414] Call Trace: [ 19.466788] dump_stack+0x86/0xc0 [ 19.468358] lockdep_rcu_suspicious+0xea/0xf3 [ 19.470183] ipv6_route_seq_next+0x71/0x164 [ 19.471963] seq_read+0x244/0x333 [ 19.473522] proc_reg_read+0x48/0x67 [ 19.475152] ? proc_reg_write+0x67/0x67 [ 19.476862] __vfs_read+0x26/0x10b [ 19.478463] ? __might_fault+0x37/0x84 [ 19.480148] vfs_read+0xba/0x146 [ 19.481690] SyS_read+0x51/0x8e [ 19.483197] do_int80_syscall_32+0x66/0x15a [ 19.484969] entry_INT80_compat+0x32/0x50 [ 19.486707] RIP: 0023:0xf7f0be8e [ 19.488244] RSP: 002b:00000000ffa75d04 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 [ 19.491431] RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 0000000008056068 [ 19.493886] RDX: 0000000000001000 RSI: 0000000008056008 RDI: 0000000000001000 [ 19.496331] RBP: 00000000000001ff R08: 0000000000000000 R09: 0000000000000000 [ 19.498768] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 19.501217] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Reported-by: Xiaolong Ye Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 52a29ba32928..c2ecd5ec638a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2262,7 +2262,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!v) goto iter_table; - n = rcu_dereference(((struct rt6_info *)v)->dst.rt6_next); + n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next); if (n) { ++*pos; return n; From cf4c950b87ee2f547ad3abd3aca6ae3f3eb3443f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 14:30:52 -0700 Subject: [PATCH 0774/2177] once: switch to new jump label API Switch the DO_ONCE() macro from the deprecated jump label API to the new one. The new one is more readable, and for DO_ONCE() it also makes the generated code more icache-friendly: now the one-time initialization code is placed out-of-line at the jump target, rather than at the inline fallthrough case. Acked-by: Hannes Frederic Sowa Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- include/linux/once.h | 6 +++--- lib/once.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/once.h b/include/linux/once.h index 9c98aaa87cbc..724724918e8b 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -5,7 +5,7 @@ #include bool __do_once_start(bool *done, unsigned long *flags); -void __do_once_done(bool *done, struct static_key *once_key, +void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags); /* Call a function exactly once. The idea of DO_ONCE() is to perform @@ -38,8 +38,8 @@ void __do_once_done(bool *done, struct static_key *once_key, ({ \ bool ___ret = false; \ static bool ___done = false; \ - static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) { \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ ___ret = __do_once_start(&___done, &___flags); \ if (unlikely(___ret)) { \ diff --git a/lib/once.c b/lib/once.c index 05c8604627eb..831c5a6b0bb2 100644 --- a/lib/once.c +++ b/lib/once.c @@ -5,7 +5,7 @@ struct once_work { struct work_struct work; - struct static_key *key; + struct static_key_true *key; }; static void once_deferred(struct work_struct *w) @@ -14,11 +14,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); - static_key_slow_dec(work->key); + static_branch_disable(work->key); kfree(work); } -static void once_disable_jump(struct static_key *key) +static void once_disable_jump(struct static_key_true *key) { struct once_work *w; @@ -51,7 +51,7 @@ bool __do_once_start(bool *done, unsigned long *flags) } EXPORT_SYMBOL(__do_once_start); -void __do_once_done(bool *done, struct static_key *once_key, +void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags) __releases(once_lock) { From ceaa001a170e43608854d5290a48064f57b565ed Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 4 Oct 2017 17:03:12 -0700 Subject: [PATCH 0775/2177] openvswitch: Add erspan tunnel support. Add erspan netlink interface for OVS. Signed-off-by: William Tu Cc: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + net/openvswitch/flow_netlink.c | 51 +++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 156ee4cab82e..efdbfbfd3ee2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -359,6 +359,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, + OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* be32 ERSPAN index. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index e8eb427ce6d1..fc0ca9a89b8e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "flow_netlink.h" @@ -319,7 +320,8 @@ size_t ovs_tun_key_attr_size(void) * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ - + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ + + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ + + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ } size_t ovs_key_attr_size(void) @@ -371,6 +373,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -593,6 +596,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } +static int erspan_tun_opt_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + struct erspan_metadata opts; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + memset(&opts, 0, sizeof(opts)); + opts.index = nla_get_be32(attr); + + /* Index has only 20-bit */ + if (ntohl(opts.index) & ~INDEX_MASK) { + OVS_NLERR(log, "ERSPAN index number %x too large.", + ntohl(opts.index)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask); + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + + return 0; +} + static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -700,6 +730,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_ERSPAN_OPT; + opts_type = type; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -824,6 +867,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + ((struct erspan_metadata *)tun_opts)->index)) + return -EMSGSIZE; } return 0; @@ -2195,6 +2242,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + break; } }; From 1e6f74536de08b5e50cf0e37e735911c2cef7c62 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 6 Oct 2017 13:22:31 -0400 Subject: [PATCH 0776/2177] vhost_net: do not stall on zerocopy depletion Vhost-net has a hard limit on the number of zerocopy skbs in flight. When reached, transmission stalls. Stalls cause latency, as well as head-of-line blocking of other flows that do not use zerocopy. Instead of stalling, revert to copy-based transmission. Tested by sending two udp flows from guest to host, one with payload of VHOST_GOODCOPY_LEN, the other too small for zerocopy (1B). The large flow is redirected to a netem instance with 1MBps rate limit and deep 1000 entry queue. modprobe ifb ip link set dev ifb0 up tc qdisc add dev ifb0 root netem limit 1000 rate 1MBit tc qdisc add dev tap0 ingress tc filter add dev tap0 parent ffff: protocol ip \ u32 match ip dport 8000 0xffff \ action mirred egress redirect dev ifb0 Before the delay, both flows process around 80K pps. With the delay, before this patch, both process around 400. After this patch, the large flow is still rate limited, while the small reverts to its original rate. See also discussion in the first link, below. Without rate limiting, {1, 10, 100}x TCP_STREAM tests continued to send at 100% zerocopy. The limit in vhost_exceeds_maxpend must be carefully chosen. With vq->num >> 1, the flows remain correlated. This value happens to correspond to VHOST_MAX_PENDING for vq->num == 256. Allow smaller fractions and ensure correctness also for much smaller values of vq->num, by testing the min() of both explicitly. See also the discussion in the second link below. Changes v1 -> v2 - replaced min with typed min_t - avoid unnecessary whitespace change Link:http://lkml.kernel.org/r/CAF=yD-+Wk9sc9dXMUq1+x_hh=3ThTXa6BnZkygP3tgVpjbp93g@mail.gmail.com Link:http://lkml.kernel.org/r/20170819064129.27272-1-den@klaipeden.com Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/vhost/net.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 58585ec8699e..68677d930e20 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -436,8 +436,8 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net) struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; - return (nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV - == nvq->done_idx; + return (nvq->upend_idx + UIO_MAXIOV - nvq->done_idx) % UIO_MAXIOV > + min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2); } /* Expects to be always run from workqueue - which acts as @@ -480,11 +480,6 @@ static void handle_tx(struct vhost_net *net) if (zcopy) vhost_zerocopy_signal_used(net, vq); - /* If more outstanding DMAs, queue the work. - * Handle upend_idx wrap around - */ - if (unlikely(vhost_exceeds_maxpend(net))) - break; head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, ARRAY_SIZE(vq->iov), @@ -519,8 +514,7 @@ static void handle_tx(struct vhost_net *net) len = msg_data_left(&msg); zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN - && (nvq->upend_idx + 1) % UIO_MAXIOV != - nvq->done_idx + && !vhost_exceeds_maxpend(net) && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ From 812b5ca7d376e7e008ac0c897d1ef94eb05ddc3b Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sun, 8 Oct 2017 13:40:08 +0000 Subject: [PATCH 0777/2177] Add a driver for Renesas uPD60620 and uPD60620A PHYs Signed-off-by: Bernd Edlinger Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 5 ++ drivers/net/phy/Makefile | 1 + drivers/net/phy/uPD60620.c | 109 +++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 drivers/net/phy/uPD60620.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index cd931cf9dcc2..e2cf8ffc5c6f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -366,6 +366,11 @@ config REALTEK_PHY ---help--- Supports the Realtek 821x PHY. +config RENESAS_PHY + tristate "Driver for Renesas PHYs" + ---help--- + Supports the Renesas PHYs uPD60620 and uPD60620A. + config ROCKCHIP_PHY tristate "Driver for Rockchip Ethernet PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 416df92fbf4f..1404ad3b77c7 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_MICROSEMI_PHY) += mscc.o obj-$(CONFIG_NATIONAL_PHY) += national.o obj-$(CONFIG_QSEMI_PHY) += qsemi.o obj-$(CONFIG_REALTEK_PHY) += realtek.o +obj-$(CONFIG_RENESAS_PHY) += uPD60620.o obj-$(CONFIG_ROCKCHIP_PHY) += rockchip.o obj-$(CONFIG_SMSC_PHY) += smsc.o obj-$(CONFIG_STE10XP) += ste10Xp.o diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c new file mode 100644 index 000000000000..96b33475ea5e --- /dev/null +++ b/drivers/net/phy/uPD60620.c @@ -0,0 +1,109 @@ +/* + * Driver for the Renesas PHY uPD60620. + * + * Copyright (C) 2015 Softing Industrial Automation GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include + +#define UPD60620_PHY_ID 0xb8242824 + +/* Extended Registers and values */ +/* PHY Special Control/Status */ +#define PHY_PHYSCR 0x1F /* PHY.31 */ +#define PHY_PHYSCR_10MB 0x0004 /* PHY speed = 10mb */ +#define PHY_PHYSCR_100MB 0x0008 /* PHY speed = 100mb */ +#define PHY_PHYSCR_DUPLEX 0x0010 /* PHY Duplex */ + +/* PHY Special Modes */ +#define PHY_SPM 0x12 /* PHY.18 */ + +/* Init PHY */ + +static int upd60620_config_init(struct phy_device *phydev) +{ + /* Enable support for passive HUBs (could be a strap option) */ + /* PHYMODE: All speeds, HD in parallel detect */ + return phy_write(phydev, PHY_SPM, 0x0180 | phydev->mdio.addr); +} + +/* Get PHY status from common registers */ + +static int upd60620_read_status(struct phy_device *phydev) +{ + int phy_state; + + /* Read negotiated state */ + phy_state = phy_read(phydev, MII_BMSR); + if (phy_state < 0) + return phy_state; + + phydev->link = 0; + phydev->lp_advertising = 0; + phydev->pause = 0; + phydev->asym_pause = 0; + + if (phy_state & (BMSR_ANEGCOMPLETE | BMSR_LSTATUS)) { + phy_state = phy_read(phydev, PHY_PHYSCR); + if (phy_state < 0) + return phy_state; + + if (phy_state & (PHY_PHYSCR_10MB | PHY_PHYSCR_100MB)) { + phydev->link = 1; + phydev->speed = SPEED_10; + phydev->duplex = DUPLEX_HALF; + + if (phy_state & PHY_PHYSCR_100MB) + phydev->speed = SPEED_100; + if (phy_state & PHY_PHYSCR_DUPLEX) + phydev->duplex = DUPLEX_FULL; + + phy_state = phy_read(phydev, MII_LPA); + if (phy_state < 0) + return phy_state; + + phydev->lp_advertising + = mii_lpa_to_ethtool_lpa_t(phy_state); + + if (phydev->duplex == DUPLEX_FULL) { + if (phy_state & LPA_PAUSE_CAP) + phydev->pause = 1; + if (phy_state & LPA_PAUSE_ASYM) + phydev->asym_pause = 1; + } + } + } + return 0; +} + +MODULE_DESCRIPTION("Renesas uPD60620 PHY driver"); +MODULE_AUTHOR("Bernd Edlinger "); +MODULE_LICENSE("GPL"); + +static struct phy_driver upd60620_driver[1] = { { + .phy_id = UPD60620_PHY_ID, + .phy_id_mask = 0xfffffffe, + .name = "Renesas uPD60620", + .features = PHY_BASIC_FEATURES, + .flags = 0, + .config_init = upd60620_config_init, + .config_aneg = genphy_config_aneg, + .read_status = upd60620_read_status, +} }; + +module_phy_driver(upd60620_driver); + +static struct mdio_device_id __maybe_unused upd60620_tbl[] = { + { UPD60620_PHY_ID, 0xfffffffe }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, upd60620_tbl); From 4294625e029028854596865be401b9c5c1f906ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Oct 2017 10:01:52 +0200 Subject: [PATCH 0778/2177] Bluetooth: avoid silent hci_bcm ACPI PM regression The hci_bcm platform-device hack which was used to implement power management for ACPI devices is being replaced by a serial-device-bus implementation. Unfortunately, when the corresponding change to the ACPI code lands (a change that will stop enumerating and registering the serial-device-node child as a platform device) PM will break silently unless serdev TTY-port controller support has been enabled. Specifically, hciattach (btattach) would still succeed, but power management would no longer work. Although this is strictly a runtime dependency, let's make the driver depend on SERIAL_DEV_CTRL_TTYPORT, which is the particular serdev controller implementation used by the ACPI devices currently managed by this driver, to avoid breaking PM without anyone noticing. Note that the driver already has a (build-time) dependency on the serdev bus code. Signed-off-by: Johan Hovold Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index fae5a74dc737..082e1c7329de 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -169,6 +169,7 @@ config BT_HCIUART_BCM bool "Broadcom protocol support" depends on BT_HCIUART depends on BT_HCIUART_SERDEV + depends on (!ACPI || SERIAL_DEV_CTRL_TTYPORT) select BT_HCIUART_H4 select BT_BCM help From fc09785de0a364427a5df63d703bae9a306ed116 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 26 Sep 2017 17:11:33 +0200 Subject: [PATCH 0779/2177] p54: don't unregister leds when they are not initialized ieee80211_register_hw() in p54_register_common() may fail and leds won't get initialized. Currently p54_unregister_common() doesn't check that and always calls p54_unregister_leds(). The fix is to check priv->registered flag before calling p54_unregister_leds(). Found by syzkaller. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 1404 Comm: kworker/1:1 Not tainted 4.14.0-rc1-42251-gebb2c2437d80-dirty #205 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 register_lock_class+0x6c4/0x1a00 kernel/locking/lockdep.c:769 __lock_acquire+0x27e/0x4550 kernel/locking/lockdep.c:3385 lock_acquire+0x259/0x620 kernel/locking/lockdep.c:4002 flush_work+0xf0/0x8c0 kernel/workqueue.c:2886 __cancel_work_timer+0x51d/0x870 kernel/workqueue.c:2961 cancel_delayed_work_sync+0x1f/0x30 kernel/workqueue.c:3081 p54_unregister_leds+0x6c/0xc0 drivers/net/wireless/intersil/p54/led.c:160 p54_unregister_common+0x3d/0xb0 drivers/net/wireless/intersil/p54/main.c:856 p54u_disconnect+0x86/0x120 drivers/net/wireless/intersil/p54/p54usb.c:1073 usb_unbind_interface+0x21c/0xa90 drivers/usb/core/driver.c:423 __device_release_driver drivers/base/dd.c:861 device_release_driver_internal+0x4f4/0x5c0 drivers/base/dd.c:893 device_release_driver+0x1e/0x30 drivers/base/dd.c:918 bus_remove_device+0x2f4/0x4b0 drivers/base/bus.c:565 device_del+0x5c4/0xab0 drivers/base/core.c:1985 usb_disable_device+0x1e9/0x680 drivers/usb/core/message.c:1170 usb_disconnect+0x260/0x7a0 drivers/usb/core/hub.c:2124 hub_port_connect drivers/usb/core/hub.c:4754 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x1318/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 process_scheduled_works kernel/workqueue.c:2179 worker_thread+0xb2b/0x1850 kernel/workqueue.c:2255 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Cc: stable@vger.kernel.org Signed-off-by: Andrey Konovalov Acked-by: Christian Lamparter Signed-off-by: Kalle Valo --- drivers/net/wireless/intersil/p54/main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c index d5a3bf91a03e..ab6d39e12069 100644 --- a/drivers/net/wireless/intersil/p54/main.c +++ b/drivers/net/wireless/intersil/p54/main.c @@ -852,12 +852,11 @@ void p54_unregister_common(struct ieee80211_hw *dev) { struct p54_common *priv = dev->priv; -#ifdef CONFIG_P54_LEDS - p54_unregister_leds(priv); -#endif /* CONFIG_P54_LEDS */ - if (priv->registered) { priv->registered = false; +#ifdef CONFIG_P54_LEDS + p54_unregister_leds(priv); +#endif /* CONFIG_P54_LEDS */ ieee80211_unregister_hw(dev); } From 1bdb68b2e841a94b4331046ad6a55e94f214dbbf Mon Sep 17 00:00:00 2001 From: Ian W MORRISON Date: Sat, 7 Oct 2017 17:15:25 +1100 Subject: [PATCH 0780/2177] Bluetooth: hci_bcm: Add support for MINIX Z83-4 based devices The MINIX NEO Z83-4 and MINIX NEO Z83-4 Pro devices use an AP6255 chip for wifi and bluetooth. Bluetooth requires an ACPI device id of BCM2EA4 with BCM4345 rev C0 firmware. This patch adds the device id and to use trigger type IRQF_TRIGGER_FALLING as defined by 'GpioInt' in the ACPI DSDT table: Device (BLT0) { Name (_HID, "BCM2EA4") // _HID: Hardware ID Method (_STA, 0, NotSerialized) // _STA: Status { Return (0x0F) } Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { Name (UBUF, ResourceTemplate () { UartSerialBusV2 (0x0001C200, DataBitsEight, StopBitsOne, 0xFC, LittleEndian, ParityTypeNone, FlowControlHardware, 0x0020, 0x0020, "\\_SB.PCI0.URT1", 0x00, ResourceConsumer, , Exclusive, ) GpioInt (Level, ActiveLow, Exclusive, PullNone, 0x0000, "\\_SB.GPO1", 0x00, ResourceConsumer, , ) { // Pin list 0x0005 } GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, IoRestrictionOutputOnly, "\\_SB.GPO1", 0x00, ResourceConsumer, , ) { // Pin list 0x0007 } GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, IoRestrictionOutputOnly, "\\_SB.GPO1", 0x00, ResourceConsumer, , ) { // Pin list 0x0004 } }) Return (UBUF) /* \_SB_.PCI0.URT1.BLT0._CRS.UBUF */ } } Signed-off-by: Ian W MORRISON Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index d9d0fc09acdb..16c2eaaaf72b 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -726,6 +726,13 @@ static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), }, }, + { + .ident = "MINIX Z83-4", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MINIX"), + DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), + }, + }, { } }; @@ -934,6 +941,7 @@ static const struct acpi_device_id bcm_acpi_match[] = { { "BCM2E7C", (kernel_ulong_t)&acpi_bcm_int_last_gpios }, { "BCM2E95", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, { "BCM2E96", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, + { "BCM2EA4", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, { }, }; MODULE_DEVICE_TABLE(acpi, bcm_acpi_match); From 18a39b9ab27026b5ba55f135648635ed3a870e44 Mon Sep 17 00:00:00 2001 From: Ian W MORRISON Date: Fri, 6 Oct 2017 18:34:18 +1100 Subject: [PATCH 0781/2177] Bluetooth: btbcm: Add support for MINIX Z83-4 based devices The MINIX NEO Z83-4 and MINIX NEO Z83-4 Pro devices use an AP6255 chip for wifi and bluetooth. Bluetooth requires an ACPI device id of BCM2EA4 with BCM4345 rev C0 firmware. This patch defines the firmware subversion. Signed-off-by: Ian W MORRISON Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btbcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index cc4bdefa6648..06e8bed4f5eb 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -327,6 +327,7 @@ static const struct { { 0x4406, "BCM4324B3" }, /* 002.004.006 */ { 0x610c, "BCM4354" }, /* 003.001.012 */ { 0x2209, "BCM43430A1" }, /* 001.002.009 */ + { 0x6119, "BCM4345C0" }, /* 003.001.025 */ { } }; From 64e79426c2041b931c053a11743277a4f7f279c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Sep 2017 10:52:34 +0300 Subject: [PATCH 0782/2177] rtlwifi: silence underflow warning My static checker complains that we have an upper bound but no lower bound. I suspect neither are really required but it doesn't hurt to add a check for negatives. Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index c53cbf3d52bd..294a6b43d1bc 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -453,7 +453,8 @@ static void _rtl_add_wowlan_patterns(struct ieee80211_hw *hw, for (i = 0; i < wow->n_patterns; i++) { memset(&rtl_pattern, 0, sizeof(struct rtl_wow_pattern)); memset(mask, 0, MAX_WOL_BIT_MASK_SIZE); - if (patterns[i].pattern_len > MAX_WOL_PATTERN_SIZE) { + if (patterns[i].pattern_len < 0 || + patterns[i].pattern_len > MAX_WOL_PATTERN_SIZE) { RT_TRACE(rtlpriv, COMP_POWER, DBG_WARNING, "Pattern[%d] is too long\n", i); continue; From 073a435d55a62813d0a871abcb1ae904b5726098 Mon Sep 17 00:00:00 2001 From: Karthik Ananthapadmanabha Date: Fri, 29 Sep 2017 16:23:10 +0530 Subject: [PATCH 0783/2177] mwifiex: Random MAC address during scanning Driver will advertise RANDOM_MAC support only if the device supports this feature. Signed-off-by: Karthik Ananthapadmanabha Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 10 ++++++---- drivers/net/wireless/marvell/mwifiex/fw.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index ac01af4d7bfb..cc7d777eb26c 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4305,10 +4305,12 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->features |= NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_INACTIVITY_TIMER | NL80211_FEATURE_LOW_PRIORITY_SCAN | - NL80211_FEATURE_NEED_OBSS_SCAN | - NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | - NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | - NL80211_FEATURE_ND_RANDOM_MAC_ADDR; + NL80211_FEATURE_NEED_OBSS_SCAN; + + if (ISSUPP_RANDOM_MAC(adapter->fw_cap_info)) + wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info)) wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH; diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 9e75522d248a..6b765f3f37fd 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -238,6 +238,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define ISSUPP_DRCS_ENABLED(FwCapInfo) (FwCapInfo & BIT(15)) #define ISSUPP_SDIO_SPA_ENABLED(FwCapInfo) (FwCapInfo & BIT(16)) #define ISSUPP_ADHOC_ENABLED(FwCapInfo) (FwCapInfo & BIT(25)) +#define ISSUPP_RANDOM_MAC(FwCapInfo) (FwCapInfo & BIT(27)) #define MWIFIEX_DEF_HT_CAP (IEEE80211_HT_CAP_DSSSCCK40 | \ (1 << IEEE80211_HT_CAP_RX_STBC_SHIFT) | \ From c4c40e51f9c32c6dd8adf606624c930a1c4d9bbb Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 21 Jul 2017 11:36:23 -0700 Subject: [PATCH 0784/2177] e1000e: Fix error path in link detection In case of error from e1e_rphy(), the loop will exit early and "success" will be set to true erroneously. Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/phy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index d78d47b41a71..86ff0969efb6 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, s32 ret_val = 0; u16 i, phy_status; + *success = false; for (i = 0; i < iterations; i++) { /* Some PHYs require the MII_BMSR register to be read * twice due to the link bit being sticky. No harm doing @@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); if (ret_val) break; - if (phy_status & BMSR_LSTATUS) + if (phy_status & BMSR_LSTATUS) { + *success = true; break; + } if (usec_interval >= 1000) msleep(usec_interval / 1000); else udelay(usec_interval); } - *success = (i < iterations); - return ret_val; } From 65a29da1f5fd20fdebef3b959bef9b3660807b20 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 21 Jul 2017 11:36:24 -0700 Subject: [PATCH 0785/2177] e1000e: Fix wrong comment related to link detection Reading e1000e_check_for_copper_link() shows that get_link_status is set to false after link has been detected. Therefore, it stays TRUE until then. Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 8436c5f2c3e8..ead4c112580e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5074,7 +5074,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) /* get_link_status is set on LSC (link status) interrupt or * Rx sequence error interrupt. get_link_status will stay - * false until the check_for_link establishes link + * true until the check_for_link establishes link * for copper adapters ONLY */ switch (hw->phy.media_type) { @@ -5092,7 +5092,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) break; case e1000_media_type_internal_serdes: ret_val = hw->mac.ops.check_for_link(hw); - link_active = adapter->hw.mac.serdes_has_link; + link_active = hw->mac.serdes_has_link; break; default: case e1000_media_type_unknown: From d3509f8bc7b0560044c15f0e3ecfde1d9af757a6 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 21 Jul 2017 11:36:25 -0700 Subject: [PATCH 0786/2177] e1000e: Fix return value test All the helpers return -E1000_ERR_PHY. Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ead4c112580e..a740de6a30b0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5099,7 +5099,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) break; } - if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && + if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ e_info("Gigabit has been disabled, downgrading speed\n"); From 19110cfbb34d4af0cdfe14cd243f3b09dc95b013 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 21 Jul 2017 11:36:26 -0700 Subject: [PATCH 0787/2177] e1000e: Separate signaling for link check/link up Lennart reported the following race condition: \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link /* link is up */ mac->get_link_status = false; /* interrupt */ \ e1000_msix_other hw->mac.get_link_status = true; link_active = !hw->mac.get_link_status /* link_active is false, wrongly */ This problem arises because the single flag get_link_status is used to signal two different states: link status needs checking and link status is down. Avoid the problem by using the return value of .check_for_link to signal the link status to e1000e_has_link(). Reported-by: Lennart Sorensen Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/mac.c | 11 ++++++++--- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b322011ec282..f457c5703d0c 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a740de6a30b0..0a5f95ab0d3c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5081,7 +5081,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; + link_active = ret_val > 0; } else { link_active = true; } From 4aea7a5c5e940c1723add439f4088844cd26196d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 21 Jul 2017 11:36:27 -0700 Subject: [PATCH 0788/2177] e1000e: Avoid receiver overrun interrupt bursts When e1000e_poll() is not fast enough to keep up with incoming traffic, the adapter (when operating in msix mode) raises the Other interrupt to signal Receiver Overrun. This is a double problem because 1) at the moment e1000_msix_other() assumes that it is only called in case of Link Status Change and 2) if the condition persists, the interrupt is repeatedly raised again in quick succession. Ideally we would configure the Other interrupt to not be raised in case of receiver overrun but this doesn't seem possible on this adapter. Instead, we handle the first part of the problem by reverting to the practice of reading ICR in the other interrupt handler, like before commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt"). Thanks to commit 0a8047ac68e5 ("e1000e: Fix msi-x interrupt automask") which cleared IAME from CTRL_EXT, reading ICR doesn't interfere with RxQ0, TxQ0 interrupts anymore. We handle the second part of the problem by not re-enabling the Other interrupt right away when there is overrun. Instead, we wait until traffic subsides, napi polling mode is exited and interrupts are re-enabled. Reported-by: Lennart Sorensen Fixes: 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/defines.h | 1 + drivers/net/ethernet/intel/e1000e/netdev.c | 33 ++++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 0641c0098738..afb7ebe20b24 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -398,6 +398,7 @@ #define E1000_ICR_LSC 0x00000004 /* Link Status Change */ #define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ /* If this bit asserted, the driver should claim the interrupt */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 0a5f95ab0d3c..ee9de3500331 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1910,14 +1910,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + u32 icr; + bool enable = true; - hw->mac.get_link_status = true; - - /* guard against interrupt when we're going down */ - if (!test_bit(__E1000_DOWN, &adapter->state)) { - mod_timer(&adapter->watchdog_timer, jiffies + 1); - ew32(IMS, E1000_IMS_OTHER); + icr = er32(ICR); + if (icr & E1000_ICR_RXO) { + ew32(ICR, E1000_ICR_RXO); + enable = false; + /* napi poll will re-enable Other, make sure it runs */ + if (napi_schedule_prep(&adapter->napi)) { + adapter->total_rx_bytes = 0; + adapter->total_rx_packets = 0; + __napi_schedule(&adapter->napi); + } } + if (icr & E1000_ICR_LSC) { + ew32(ICR, E1000_ICR_LSC); + hw->mac.get_link_status = true; + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + if (enable && !test_bit(__E1000_DOWN, &adapter->state)) + ew32(IMS, E1000_IMS_OTHER); return IRQ_HANDLED; } @@ -2687,7 +2703,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight) napi_complete_done(napi, work_done); if (!test_bit(__E1000_DOWN, &adapter->state)) { if (adapter->msix_entries) - ew32(IMS, adapter->rx_ring->ims_val); + ew32(IMS, adapter->rx_ring->ims_val | + E1000_IMS_OTHER); else e1000_irq_enable(adapter); } @@ -4204,7 +4221,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter) struct e1000_hw *hw = &adapter->hw; if (adapter->msix_entries) - ew32(ICS, E1000_ICS_OTHER); + ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER); else ew32(ICS, E1000_ICS_LSC); } From b10effb92e272051dd1ec0d7be56bf9ca85ab927 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 6 Aug 2017 16:49:18 +0300 Subject: [PATCH 0789/2177] e1000e: fix buffer overrun while the I219 is processing DMA transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel® 100/200 Series Chipset platforms reduced the round-trip latency for the LAN Controller DMA accesses, causing in some high performance cases a buffer overrun while the I219 LAN Connected Device is processing the DMA transactions. I219LM and I219V devices can fall into unrecovered Tx hang under very stressfully UDP traffic and multiple reconnection of Ethernet cable. This Tx hang of the LAN Controller is only recovered if the system is rebooted. Slightly slow down DMA access by reducing the number of outstanding requests. This workaround could have an impact on TCP traffic performance on the platform. Disabling TSO eliminates performance loss for TCP traffic without a noticeable impact on CPU performance. Please, refer to I218/I219 specification update: https://www.intel.com/content/www/us/en/embedded/products/networking/ ethernet-connection-i218-family-documentation.html Signed-off-by: Sasha Neftin Reviewed-by: Dima Ruinskiy Reviewed-by: Raanan Avargil Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ee9de3500331..14b096f3d1da 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3021,8 +3021,8 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) hw->mac.ops.config_collision_dist(hw); - /* SPT and CNP Si errata workaround to avoid data corruption */ - if (hw->mac.type >= e1000_pch_spt) { + /* SPT and KBL Si errata workaround to avoid data corruption */ + if (hw->mac.type == e1000_pch_spt) { u32 reg_val; reg_val = er32(IOSFPC); @@ -3030,7 +3030,9 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) ew32(IOSFPC, reg_val); reg_val = er32(TARC(0)); - reg_val |= E1000_TARC0_CB_MULTIQ_3_REQ; + /* SPT and KBL Si errata workaround to avoid Tx hang */ + reg_val &= ~BIT(28); + reg_val |= BIT(29); ew32(TARC(0), reg_val); } } From 48072ae1ec7a1c778771cad8c1b8dd803c4992ab Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 25 Aug 2017 11:06:26 -0400 Subject: [PATCH 0790/2177] e1000e: apply burst mode settings only on default Devices that support FLAG2_DMA_BURST have different default values for RDTR and RADV. Apply burst mode default settings only when no explicit value was passed at module load. The RDTR default is zero. If the module is loaded for low latency operation with RxIntDelay=0, do not override this value with a burst default of 32. Move the decision to apply burst values earlier, where explicitly initialized module variables can be distinguished from defaults. Signed-off-by: Willem de Bruijn Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/e1000.h | 4 ---- drivers/net/ethernet/intel/e1000e/netdev.c | 8 -------- drivers/net/ethernet/intel/e1000e/param.c | 16 +++++++++++++++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 98e68888abb1..2311b31bdcac 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -94,10 +94,6 @@ struct e1000_info; */ #define E1000_CHECK_RESET_COUNT 25 -#define DEFAULT_RDTR 0 -#define DEFAULT_RADV 8 -#define BURST_RDTR 0x20 -#define BURST_RADV 0x20 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 14b096f3d1da..00f48d4cabec 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3242,14 +3242,6 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) */ ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE); ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE); - - /* override the delay timers for enabling bursting, only if - * the value was not set by the user via module options - */ - if (adapter->rx_int_delay == DEFAULT_RDTR) - adapter->rx_int_delay = BURST_RDTR; - if (adapter->rx_abs_int_delay == DEFAULT_RADV) - adapter->rx_abs_int_delay = BURST_RADV; } /* set the Receive Delay Timer Register */ diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index 6d8c39abee16..47da51864543 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); /* Receive Interrupt Delay in units of 1.024 microseconds * hardware will likely hang if you set this to anything but zero. * + * Burst variant is used as default if device has FLAG2_DMA_BURST. + * * Valid Range: 0-65535 */ E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); +#define DEFAULT_RDTR 0 +#define BURST_RDTR 0x20 #define MAX_RXDELAY 0xFFFF #define MIN_RXDELAY 0 /* Receive Absolute Interrupt Delay in units of 1.024 microseconds + * + * Burst variant is used as default if device has FLAG2_DMA_BURST. * * Valid Range: 0-65535 */ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); +#define DEFAULT_RADV 8 +#define BURST_RADV 0x20 #define MAX_RXABSDELAY 0xFFFF #define MIN_RXABSDELAY 0 @@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter *adapter) .max = MAX_RXDELAY } } }; + if (adapter->flags2 & FLAG2_DMA_BURST) + opt.def = BURST_RDTR; + if (num_RxIntDelay > bd) { adapter->rx_int_delay = RxIntDelay[bd]; e1000_validate_option(&adapter->rx_int_delay, &opt, @@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) } /* Receive Absolute Interrupt Delay */ { - static const struct e1000_option opt = { + static struct e1000_option opt = { .type = range_option, .name = "Receive Absolute Interrupt Delay", .err = "using default of " @@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter *adapter) .max = MAX_RXABSDELAY } } }; + if (adapter->flags2 & FLAG2_DMA_BURST) + opt.def = BURST_RADV; + if (num_RxAbsIntDelay > bd) { adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; e1000_validate_option(&adapter->rx_abs_int_delay, &opt, From 377b62736c01f14309141c69caa6d84363c12e12 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 25 Aug 2017 18:14:24 -0700 Subject: [PATCH 0791/2177] e1000e: Be drop monitor friendly e1000e_put_txbuf() can be called from normal reclamation path as well as when a DMA mapping failure, so we need to differentiate these two cases when freeing SKBs to be drop monitor friendly. e1000e_tx_hwtstamp_work() and e1000_remove() are processing TX timestamped SKBs and those should not be accounted as drops either. Signed-off-by: Florian Fainelli Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 00f48d4cabec..bf8f38f76953 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1071,7 +1071,8 @@ next_desc: } static void e1000_put_txbuf(struct e1000_ring *tx_ring, - struct e1000_buffer *buffer_info) + struct e1000_buffer *buffer_info, + bool drop) { struct e1000_adapter *adapter = tx_ring->adapter; @@ -1085,7 +1086,10 @@ static void e1000_put_txbuf(struct e1000_ring *tx_ring, buffer_info->dma = 0; } if (buffer_info->skb) { - dev_kfree_skb_any(buffer_info->skb); + if (drop) + dev_kfree_skb_any(buffer_info->skb); + else + dev_consume_skb_any(buffer_info->skb); buffer_info->skb = NULL; } buffer_info->time_stamp = 0; @@ -1199,7 +1203,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) wmb(); /* force write prior to skb_tstamp_tx */ skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); } else if (time_after(jiffies, adapter->tx_hwtstamp_start + adapter->tx_timeout_factor * HZ)) { dev_kfree_skb_any(adapter->tx_hwtstamp_skb); @@ -1254,7 +1258,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) } } - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, false); tx_desc->upper.data = 0; i++; @@ -2437,7 +2441,7 @@ static void e1000_clean_tx_ring(struct e1000_ring *tx_ring) for (i = 0; i < tx_ring->count; i++) { buffer_info = &tx_ring->buffer_info[i]; - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, false); } netdev_reset_queue(adapter->netdev); @@ -5625,7 +5629,7 @@ dma_error: i += tx_ring->count; i--; buffer_info = &tx_ring->buffer_info[i]; - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, true); } return 0; @@ -7419,7 +7423,7 @@ static void e1000_remove(struct pci_dev *pdev) if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) { cancel_work_sync(&adapter->tx_hwtstamp_work); if (adapter->tx_hwtstamp_skb) { - dev_kfree_skb_any(adapter->tx_hwtstamp_skb); + dev_consume_skb_any(adapter->tx_hwtstamp_skb); adapter->tx_hwtstamp_skb = NULL; } } From 18eb86362a52f0af933cc0fd5e37027317eb2d1c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 27 Aug 2017 08:39:51 +0200 Subject: [PATCH 0792/2177] igb: check memory allocation failure Check memory allocation failures and return -ENOMEM in such cases, as already done for other memory allocations in this function. This avoids NULL pointers dereference. Signed-off-by: Christophe JAILLET Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd4a46b03cc8..837d9b46a390 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), GFP_ATOMIC); + if (!adapter->shadow_vfta) + return -ENOMEM; /* This call may decrease the number of queues */ if (igb_init_interrupt_scheme(adapter, true)) { From b2427e671766c51379bc882fdf809d905c6f9619 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Oct 2017 18:01:16 +0100 Subject: [PATCH 0793/2177] ipv6: fix dereference of rt6_ex before null check error Currently rt6_ex is being dereferenced before it is null checked hence there is a possible null dereference bug. Fix this by only dereferencing rt6_ex after it has been null checked. Detected by CoverityScan, CID#1457749 ("Dereference before null check") Fixes: 81eb8447daae ("ipv6: take care of rt6_stats") Signed-off-by: Colin Ian King Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 13b0d6d1cc30..c1c44f484286 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1142,10 +1142,12 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { - struct net *net = dev_net(rt6_ex->rt6i->dst.dev); + struct net *net; if (!bucket || !rt6_ex) return; + + net = dev_net(rt6_ex->rt6i->dst.dev); rt6_ex->rt6i->rt6i_node = NULL; hlist_del_rcu(&rt6_ex->hlist); rt6_release(rt6_ex->rt6i); From 442d713baa33db0f78adadee6125c215f10f5a75 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Oct 2017 19:10:30 +0100 Subject: [PATCH 0794/2177] ipv6: fix incorrect bitwise operator used on rt6i_flags The use of the | operator always leads to true which looks rather suspect to me. Fix this by using & instead to just check the RTF_CACHE entry bit. Detected by CoverityScan, CID#1457734, #1457747 ("Wrong operator used") Fixes: 35732d01fe31 ("ipv6: introduce a hash table to store dst cache") Signed-off-by: Colin Ian King Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c1c44f484286..2e8842fa6450 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1415,7 +1415,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) int err; if (!from || - !(rt->rt6i_flags | RTF_CACHE)) + !(rt->rt6i_flags & RTF_CACHE)) return -EINVAL; if (!rcu_access_pointer(from->rt6i_exception_bucket)) @@ -1459,7 +1459,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) struct rt6_exception *rt6_ex; if (!from || - !(rt->rt6i_flags | RTF_CACHE)) + !(rt->rt6i_flags & RTF_CACHE)) return; rcu_read_lock(); From a99ca6dbf429fe6c035cfb880d91bf6a223fcd3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:09 -0700 Subject: [PATCH 0795/2177] selftests/bpf: add a test for verifier logs Add a test for verifier log handling. Check bad attr combinations but focus on cases when log is truncated. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 2 +- .../testing/selftests/bpf/test_verifier_log.c | 171 ++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_verifier_log.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 924af8d79bde..2e7880ea0add 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -12,7 +12,7 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align + test_align test_verifier_log TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c new file mode 100644 index 000000000000..3cc0b561489e --- /dev/null +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define LOG_SIZE (1 << 20) + +#define err(str...) printf("ERROR: " str) + +static const struct bpf_insn code_sample[] = { + /* We need a few instructions to pass the min log length */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static int load(char *log, size_t log_len, int log_level) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn)); + attr.insns = ptr_to_u64(code_sample); + attr.license = ptr_to_u64("GPL"); + attr.log_buf = ptr_to_u64(log); + attr.log_size = log_len; + attr.log_level = log_level; + + return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +static void check_ret(int ret, int exp_errno) +{ + if (ret > 0) { + close(ret); + err("broken sample loaded successfully!?\n"); + exit(1); + } + + if (!ret || errno != exp_errno) { + err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n", + ret, errno, -1, exp_errno); + exit(1); + } +} + +static void check_ones(const char *buf, size_t len, const char *msg) +{ + while (len--) + if (buf[len] != 1) { + err("%s", msg); + exit(1); + } +} + +static void test_log_good(char *log, size_t buf_len, size_t log_len, + size_t exp_len, int exp_errno, const char *full_log) +{ + size_t len; + int ret; + + memset(log, 1, buf_len); + + ret = load(log, log_len, 1); + check_ret(ret, exp_errno); + + len = strnlen(log, buf_len); + if (len == buf_len) { + err("verifier did not NULL terminate the log\n"); + exit(1); + } + if (exp_len && len != exp_len) { + err("incorrect log length expected:%zd have:%zd\n", + exp_len, len); + exit(1); + } + + if (strchr(log, 1)) { + err("verifier leaked a byte through\n"); + exit(1); + } + + check_ones(log + len + 1, buf_len - len - 1, + "verifier wrote bytes past NULL termination\n"); + + if (memcmp(full_log, log, LOG_SIZE)) { + err("log did not match expected output\n"); + exit(1); + } +} + +static void test_log_bad(char *log, size_t log_len, int log_level) +{ + int ret; + + ret = load(log, log_len, log_level); + check_ret(ret, EINVAL); + if (log) + check_ones(log, LOG_SIZE, + "verifier touched log with bad parameters\n"); +} + +int main(int argc, char **argv) +{ + char full_log[LOG_SIZE]; + char log[LOG_SIZE]; + size_t want_len; + int i; + + memset(log, 1, LOG_SIZE); + + /* Test incorrect attr */ + printf("Test log_level 0...\n"); + test_log_bad(log, LOG_SIZE, 0); + + printf("Test log_size < 128...\n"); + test_log_bad(log, 15, 1); + + printf("Test log_buff = NULL...\n"); + test_log_bad(NULL, LOG_SIZE, 1); + + /* Test with log big enough */ + printf("Test oversized buffer...\n"); + test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log); + + want_len = strlen(full_log); + + printf("Test exact buffer...\n"); + test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log); + + printf("Test undersized buffers...\n"); + for (i = 0; i < 64; i++) { + full_log[want_len - i + 1] = 1; + full_log[want_len - i] = 0; + + test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i, + ENOSPC, full_log); + } + + printf("test_verifier_log: OK\n"); + return 0; +} From e7bf8249e8f1bac64885eeccb55bcf6111901a81 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:10 -0700 Subject: [PATCH 0796/2177] bpf: encapsulate verifier log state into a structure Put the loose log_* variables into a structure. This will make it simpler to remove the global verifier state in following patches. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 13 ++++++++ kernel/bpf/verifier.c | 57 +++++++++++++++++++----------------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b8d200f60a40..163541ba70d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,6 +115,19 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifer_log { + u32 level; + char *kbuf; + char __user *ubuf; + u32 len_used; + u32 len_total; +}; + +static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log) +{ + return log->len_used >= log->len_total - 1; +} + struct bpf_verifier_env; struct bpf_ext_analyzer_ops { int (*insn_hook)(struct bpf_verifier_env *env, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6352a88ca6d1..e53458b02249 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -156,8 +156,7 @@ struct bpf_call_arg_meta { /* verbose verifier prints what it's seeing * bpf_check() is called under lock, so no race to access these global vars */ -static u32 log_level, log_size, log_len; -static char *log_buf; +static struct bpf_verifer_log verifier_log; static DEFINE_MUTEX(bpf_verifier_lock); @@ -167,13 +166,15 @@ static DEFINE_MUTEX(bpf_verifier_lock); */ static __printf(1, 2) void verbose(const char *fmt, ...) { + struct bpf_verifer_log *log = &verifier_log; va_list args; - if (log_level == 0 || log_len >= log_size - 1) + if (!log->level || bpf_verifier_log_full(log)) return; va_start(args, fmt); - log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args); + log->len_used += vscnprintf(log->kbuf + log->len_used, + log->len_total - log->len_used, fmt, args); va_end(args); } @@ -886,7 +887,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. */ - if (log_level) + if (verifier_log.level) print_verifier_state(state); /* The minimum value is only important with signed * comparisons where we can't assume the floor of a @@ -2956,7 +2957,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; } - if (log_level) + if (verifier_log.level) print_verifier_state(this_branch); return 0; } @@ -3712,7 +3713,7 @@ static int do_check(struct bpf_verifier_env *env) return err; if (err == 1) { /* found equivalent state, can prune the search */ - if (log_level) { + if (verifier_log.level) { if (do_print_state) verbose("\nfrom %d to %d: safe\n", prev_insn_idx, insn_idx); @@ -3725,8 +3726,9 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (log_level > 1 || (log_level && do_print_state)) { - if (log_level > 1) + if (verifier_log.level > 1 || + (verifier_log.level && do_print_state)) { + if (verifier_log.level > 1) verbose("%d:", insn_idx); else verbose("\nfrom %d to %d:", @@ -3735,7 +3737,7 @@ static int do_check(struct bpf_verifier_env *env) do_print_state = false; } - if (log_level) { + if (verifier_log.level) { verbose("%d: ", insn_idx); print_bpf_insn(env, insn); } @@ -4389,7 +4391,7 @@ static void free_states(struct bpf_verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { - char __user *log_ubuf = NULL; + struct bpf_verifer_log *log = &verifier_log; struct bpf_verifier_env *env; int ret = -EINVAL; @@ -4414,23 +4416,23 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* user requested verbose verifier output * and supplied buffer to store the verification trace */ - log_level = attr->log_level; - log_ubuf = (char __user *) (unsigned long) attr->log_buf; - log_size = attr->log_size; - log_len = 0; + log->level = attr->log_level; + log->ubuf = (char __user *) (unsigned long) attr->log_buf; + log->len_total = attr->log_size; + log->len_used = 0; ret = -EINVAL; - /* log_* values have to be sane */ - if (log_size < 128 || log_size > UINT_MAX >> 8 || - log_level == 0 || log_ubuf == NULL) + /* log attributes have to be sane */ + if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || + !log->level || !log->ubuf) goto err_unlock; ret = -ENOMEM; - log_buf = vmalloc(log_size); - if (!log_buf) + log->kbuf = vmalloc(log->len_total); + if (!log->kbuf) goto err_unlock; } else { - log_level = 0; + log->level = 0; } env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); @@ -4467,15 +4469,16 @@ skip_full_check: if (ret == 0) ret = fixup_bpf_calls(env); - if (log_level && log_len >= log_size - 1) { - BUG_ON(log_len >= log_size); + if (log->level && bpf_verifier_log_full(log)) { + BUG_ON(log->len_used >= log->len_total); /* verifier log exceeded user supplied buffer */ ret = -ENOSPC; /* fall through to return what was recorded */ } /* copy verifier log back to user space including trailing zero */ - if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) { + if (log->level && copy_to_user(log->ubuf, log->kbuf, + log->len_used + 1) != 0) { ret = -EFAULT; goto free_log_buf; } @@ -4502,8 +4505,8 @@ skip_full_check: } free_log_buf: - if (log_level) - vfree(log_buf); + if (log->level) + vfree(log->kbuf); if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. @@ -4540,7 +4543,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, /* grab the mutex to protect few globals used by verifier */ mutex_lock(&bpf_verifier_lock); - log_level = 0; + verifier_log.level = 0; env->strict_alignment = false; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) From 61bd5218eef349fcacc4976a251bc83a4748b4af Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:11 -0700 Subject: [PATCH 0797/2177] bpf: move global verifier log into verifier environment The biggest piece of global state protected by the verifier lock is the verifier_log. Move that log to struct bpf_verifier_env. struct bpf_verifier_env has to be passed now to all invocations of verbose(). Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 2 + kernel/bpf/verifier.c | 491 ++++++++++++++++++----------------- 2 files changed, 261 insertions(+), 232 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 163541ba70d9..5ddb9a626a51 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -152,6 +152,8 @@ struct bpf_verifier_env { bool allow_ptr_leaks; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + + struct bpf_verifer_log log; }; int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e53458b02249..a352f93cd4b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -153,20 +153,16 @@ struct bpf_call_arg_meta { int access_size; }; -/* verbose verifier prints what it's seeing - * bpf_check() is called under lock, so no race to access these global vars - */ -static struct bpf_verifer_log verifier_log; - static DEFINE_MUTEX(bpf_verifier_lock); /* log_level controls verbosity level of eBPF verifier. * verbose() is used to dump the verification trace to the log, so the user * can figure out what's wrong with the program */ -static __printf(1, 2) void verbose(const char *fmt, ...) +static __printf(2, 3) void verbose(struct bpf_verifier_env *env, + const char *fmt, ...) { - struct bpf_verifer_log *log = &verifier_log; + struct bpf_verifer_log *log = &env->log; va_list args; if (!log->level || bpf_verifier_log_full(log)) @@ -214,7 +210,8 @@ static const char *func_id_name(int id) return "unknown"; } -static void print_verifier_state(struct bpf_verifier_state *state) +static void print_verifier_state(struct bpf_verifier_env *env, + struct bpf_verifier_state *state) { struct bpf_reg_state *reg; enum bpf_reg_type t; @@ -225,21 +222,21 @@ static void print_verifier_state(struct bpf_verifier_state *state) t = reg->type; if (t == NOT_INIT) continue; - verbose(" R%d=%s", i, reg_type_str[t]); + verbose(env, " R%d=%s", i, reg_type_str[t]); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ - verbose("%lld", reg->var_off.value + reg->off); + verbose(env, "%lld", reg->var_off.value + reg->off); } else { - verbose("(id=%d", reg->id); + verbose(env, "(id=%d", reg->id); if (t != SCALAR_VALUE) - verbose(",off=%d", reg->off); + verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) - verbose(",r=%d", reg->range); + verbose(env, ",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) - verbose(",ks=%d,vs=%d", + verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size); if (tnum_is_const(reg->var_off)) { @@ -247,38 +244,38 @@ static void print_verifier_state(struct bpf_verifier_state *state) * could be a pointer whose offset is too big * for reg->off */ - verbose(",imm=%llx", reg->var_off.value); + verbose(env, ",imm=%llx", reg->var_off.value); } else { if (reg->smin_value != reg->umin_value && reg->smin_value != S64_MIN) - verbose(",smin_value=%lld", + verbose(env, ",smin_value=%lld", (long long)reg->smin_value); if (reg->smax_value != reg->umax_value && reg->smax_value != S64_MAX) - verbose(",smax_value=%lld", + verbose(env, ",smax_value=%lld", (long long)reg->smax_value); if (reg->umin_value != 0) - verbose(",umin_value=%llu", + verbose(env, ",umin_value=%llu", (unsigned long long)reg->umin_value); if (reg->umax_value != U64_MAX) - verbose(",umax_value=%llu", + verbose(env, ",umax_value=%llu", (unsigned long long)reg->umax_value); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(",var_off=%s", tn_buf); + verbose(env, ",var_off=%s", tn_buf); } } - verbose(")"); + verbose(env, ")"); } } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) - verbose(" fp%d=%s", -MAX_BPF_STACK + i, + verbose(env, " fp%d=%s", -MAX_BPF_STACK + i, reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]); } - verbose("\n"); + verbose(env, "\n"); } static const char *const bpf_class_string[] = { @@ -333,15 +330,15 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_end_insn(const struct bpf_verifier_env *env, +static void print_bpf_end_insn(struct bpf_verifier_env *env, const struct bpf_insn *insn) { - verbose("(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, + verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", insn->imm, insn->dst_reg); } -static void print_bpf_insn(const struct bpf_verifier_env *env, +static void print_bpf_insn(struct bpf_verifier_env *env, const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); @@ -349,23 +346,23 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, if (class == BPF_ALU || class == BPF_ALU64) { if (BPF_OP(insn->code) == BPF_END) { if (class == BPF_ALU64) - verbose("BUG_alu64_%02x\n", insn->code); + verbose(env, "BUG_alu64_%02x\n", insn->code); else print_bpf_end_insn(env, insn); } else if (BPF_OP(insn->code) == BPF_NEG) { - verbose("(%02x) r%d = %s-r%d\n", + verbose(env, "(%02x) r%d = %s-r%d\n", insn->code, insn->dst_reg, class == BPF_ALU ? "(u32) " : "", insn->dst_reg); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose("(%02x) %sr%d %s %sr%d\n", + verbose(env, "(%02x) %sr%d %s %sr%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], class == BPF_ALU ? "(u32) " : "", insn->src_reg); } else { - verbose("(%02x) %sr%d %s %s%d\n", + verbose(env, "(%02x) %sr%d %s %s%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], @@ -374,46 +371,46 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, } } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_MEM) - verbose("(%02x) *(%s *)(r%d %+d) = r%d\n", + verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); else if (BPF_MODE(insn->code) == BPF_XADD) - verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n", + verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); else - verbose("BUG_%02x\n", insn->code); + verbose(env, "BUG_%02x\n", insn->code); } else if (class == BPF_ST) { if (BPF_MODE(insn->code) != BPF_MEM) { - verbose("BUG_st_%02x\n", insn->code); + verbose(env, "BUG_st_%02x\n", insn->code); return; } - verbose("(%02x) *(%s *)(r%d %+d) = %d\n", + verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { - verbose("BUG_ldx_%02x\n", insn->code); + verbose(env, "BUG_ldx_%02x\n", insn->code); return; } - verbose("(%02x) r%d = *(%s *)(r%d %+d)\n", + verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n", insn->code, insn->dst_reg, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->src_reg, insn->off); } else if (class == BPF_LD) { if (BPF_MODE(insn->code) == BPF_ABS) { - verbose("(%02x) r0 = *(%s *)skb[%d]\n", + verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->imm); } else if (BPF_MODE(insn->code) == BPF_IND) { - verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n", + verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->src_reg, insn->imm); @@ -428,36 +425,37 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, if (map_ptr && !env->allow_ptr_leaks) imm = 0; - verbose("(%02x) r%d = 0x%llx\n", insn->code, + verbose(env, "(%02x) r%d = 0x%llx\n", insn->code, insn->dst_reg, (unsigned long long)imm); } else { - verbose("BUG_ld_%02x\n", insn->code); + verbose(env, "BUG_ld_%02x\n", insn->code); return; } } else if (class == BPF_JMP) { u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { - verbose("(%02x) call %s#%d\n", insn->code, + verbose(env, "(%02x) call %s#%d\n", insn->code, func_id_name(insn->imm), insn->imm); } else if (insn->code == (BPF_JMP | BPF_JA)) { - verbose("(%02x) goto pc%+d\n", + verbose(env, "(%02x) goto pc%+d\n", insn->code, insn->off); } else if (insn->code == (BPF_JMP | BPF_EXIT)) { - verbose("(%02x) exit\n", insn->code); + verbose(env, "(%02x) exit\n", insn->code); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose("(%02x) if r%d %s r%d goto pc%+d\n", + verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n", insn->code, insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], insn->src_reg, insn->off); } else { - verbose("(%02x) if r%d %s 0x%x goto pc%+d\n", + verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n", insn->code, insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], insn->imm, insn->off); } } else { - verbose("(%02x) %s\n", insn->code, bpf_class_string[class]); + verbose(env, "(%02x) %s\n", + insn->code, bpf_class_string[class]); } } @@ -496,7 +494,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, env->head = elem; env->stack_size++; if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { - verbose("BPF program is too complex\n"); + verbose(env, "BPF program is too complex\n"); goto err; } return &elem->st; @@ -534,10 +532,11 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg) __mark_reg_known(reg, 0); } -static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno) +static void mark_reg_known_zero(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, u32 regno) { if (WARN_ON(regno >= MAX_BPF_REG)) { - verbose("mark_reg_known_zero(regs, %u)\n", regno); + verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); /* Something bad happened, let's kill all regs */ for (regno = 0; regno < MAX_BPF_REG; regno++) __mark_reg_not_init(regs + regno); @@ -647,10 +646,11 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg) __mark_reg_unbounded(reg); } -static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno) +static void mark_reg_unknown(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, u32 regno) { if (WARN_ON(regno >= MAX_BPF_REG)) { - verbose("mark_reg_unknown(regs, %u)\n", regno); + verbose(env, "mark_reg_unknown(regs, %u)\n", regno); /* Something bad happened, let's kill all regs */ for (regno = 0; regno < MAX_BPF_REG; regno++) __mark_reg_not_init(regs + regno); @@ -665,10 +665,11 @@ static void __mark_reg_not_init(struct bpf_reg_state *reg) reg->type = NOT_INIT; } -static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +static void mark_reg_not_init(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, u32 regno) { if (WARN_ON(regno >= MAX_BPF_REG)) { - verbose("mark_reg_not_init(regs, %u)\n", regno); + verbose(env, "mark_reg_not_init(regs, %u)\n", regno); /* Something bad happened, let's kill all regs */ for (regno = 0; regno < MAX_BPF_REG; regno++) __mark_reg_not_init(regs + regno); @@ -677,22 +678,23 @@ static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) __mark_reg_not_init(regs + regno); } -static void init_reg_state(struct bpf_reg_state *regs) +static void init_reg_state(struct bpf_verifier_env *env, + struct bpf_reg_state *regs) { int i; for (i = 0; i < MAX_BPF_REG; i++) { - mark_reg_not_init(regs, i); + mark_reg_not_init(env, regs, i); regs[i].live = REG_LIVE_NONE; } /* frame pointer */ regs[BPF_REG_FP].type = PTR_TO_STACK; - mark_reg_known_zero(regs, BPF_REG_FP); + mark_reg_known_zero(env, regs, BPF_REG_FP); /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; - mark_reg_known_zero(regs, BPF_REG_1); + mark_reg_known_zero(env, regs, BPF_REG_1); } enum reg_arg_type { @@ -726,26 +728,26 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, struct bpf_reg_state *regs = env->cur_state.regs; if (regno >= MAX_BPF_REG) { - verbose("R%d is invalid\n", regno); + verbose(env, "R%d is invalid\n", regno); return -EINVAL; } if (t == SRC_OP) { /* check whether register used as source operand can be read */ if (regs[regno].type == NOT_INIT) { - verbose("R%d !read_ok\n", regno); + verbose(env, "R%d !read_ok\n", regno); return -EACCES; } mark_reg_read(&env->cur_state, regno); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { - verbose("frame pointer is read only\n"); + verbose(env, "frame pointer is read only\n"); return -EACCES; } regs[regno].live |= REG_LIVE_WRITTEN; if (t == DST_OP) - mark_reg_unknown(regs, regno); + mark_reg_unknown(env, regs, regno); } return 0; } @@ -770,7 +772,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct bpf_verifier_state *state, int off, +static int check_stack_write(struct bpf_verifier_env *env, + struct bpf_verifier_state *state, int off, int size, int value_regno) { int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; @@ -783,7 +786,7 @@ static int check_stack_write(struct bpf_verifier_state *state, int off, /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { - verbose("invalid size of register spill\n"); + verbose(env, "invalid size of register spill\n"); return -EACCES; } @@ -818,7 +821,8 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo } } -static int check_stack_read(struct bpf_verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_env *env, + struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -828,12 +832,12 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, if (slot_type[0] == STACK_SPILL) { if (size != BPF_REG_SIZE) { - verbose("invalid size of register spill\n"); + verbose(env, "invalid size of register spill\n"); return -EACCES; } for (i = 1; i < BPF_REG_SIZE; i++) { if (slot_type[i] != STACK_SPILL) { - verbose("corrupted spill memory\n"); + verbose(env, "corrupted spill memory\n"); return -EACCES; } } @@ -849,14 +853,14 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, } else { for (i = 0; i < size; i++) { if (slot_type[i] != STACK_MISC) { - verbose("invalid read from stack off %d+%d size %d\n", + verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; } } if (value_regno >= 0) /* have read misc data from the stack */ - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(env, state->regs, value_regno); return 0; } } @@ -868,7 +872,7 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, struct bpf_map *map = env->cur_state.regs[regno].map_ptr; if (off < 0 || size <= 0 || off + size > map->value_size) { - verbose("invalid access to map value, value_size=%d off=%d size=%d\n", + verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", map->value_size, off, size); return -EACCES; } @@ -887,8 +891,8 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. */ - if (verifier_log.level) - print_verifier_state(state); + if (env->log.level) + print_verifier_state(env, state); /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our @@ -896,13 +900,14 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * will have a set floor within our range. */ if (reg->smin_value < 0) { - verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; } err = __check_map_access(env, regno, reg->smin_value + off, size); if (err) { - verbose("R%d min value is outside of the array range\n", regno); + verbose(env, "R%d min value is outside of the array range\n", + regno); return err; } @@ -911,13 +916,14 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * If reg->umax_value + off could overflow, treat that as unbounded too. */ if (reg->umax_value >= BPF_MAX_VAR_OFF) { - verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", + verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n", regno); return -EACCES; } err = __check_map_access(env, regno, reg->umax_value + off, size); if (err) - verbose("R%d max value is outside of the array range\n", regno); + verbose(env, "R%d max value is outside of the array range\n", + regno); return err; } @@ -956,7 +962,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, struct bpf_reg_state *reg = ®s[regno]; if (off < 0 || size <= 0 || (u64)off + size > reg->range) { - verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", + verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; } @@ -979,13 +985,13 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, * detail to prove they're safe. */ if (reg->smin_value < 0) { - verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; } err = __check_packet_access(env, regno, off, size); if (err) { - verbose("R%d offset is outside of the packet\n", regno); + verbose(env, "R%d offset is outside of the packet\n", regno); return err; } return err; @@ -1021,7 +1027,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return 0; } - verbose("invalid bpf_context access off=%d size=%d\n", off, size); + verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); return -EACCES; } @@ -1039,7 +1045,8 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); } -static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, +static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int off, int size, bool strict) { struct tnum reg_off; @@ -1064,7 +1071,8 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("misaligned packet access off %d+%s+%d+%d size %d\n", + verbose(env, + "misaligned packet access off %d+%s+%d+%d size %d\n", ip_align, tn_buf, reg->off, off, size); return -EACCES; } @@ -1072,7 +1080,8 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, return 0; } -static int check_generic_ptr_alignment(const struct bpf_reg_state *reg, +static int check_generic_ptr_alignment(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, const char *pointer_desc, int off, int size, bool strict) { @@ -1087,7 +1096,7 @@ static int check_generic_ptr_alignment(const struct bpf_reg_state *reg, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("misaligned %saccess off %s+%d+%d size %d\n", + verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", pointer_desc, tn_buf, reg->off, off, size); return -EACCES; } @@ -1108,7 +1117,7 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, /* Special case, because of NET_IP_ALIGN. Given metadata sits * right in front, treat it the very same way. */ - return check_pkt_ptr_alignment(reg, off, size, strict); + return check_pkt_ptr_alignment(env, reg, off, size, strict); case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@ -1121,7 +1130,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, default: break; } - return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); + return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, + strict); } /* check whether memory at (regno + off) is accessible for t = (read | write) @@ -1153,20 +1163,20 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { - verbose("R%d leaks addr into map\n", value_regno); + verbose(env, "R%d leaks addr into map\n", value_regno); return -EACCES; } err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(env, state->regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { - verbose("R%d leaks addr into ctx\n", value_regno); + verbose(env, "R%d leaks addr into ctx\n", value_regno); return -EACCES; } /* ctx accesses must be at a fixed offset, so that we can @@ -1176,7 +1186,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("variable ctx access var_off=%s off=%d size=%d", + verbose(env, + "variable ctx access var_off=%s off=%d size=%d", tn_buf, off, size); return -EACCES; } @@ -1188,9 +1199,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * case, we know the offset is zero. */ if (reg_type == SCALAR_VALUE) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(env, state->regs, value_regno); else - mark_reg_known_zero(state->regs, value_regno); + mark_reg_known_zero(env, state->regs, + value_regno); state->regs[value_regno].id = 0; state->regs[value_regno].off = 0; state->regs[value_regno].range = 0; @@ -1206,13 +1218,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("variable stack access var_off=%s off=%d size=%d", + verbose(env, "variable stack access var_off=%s off=%d size=%d", tn_buf, off, size); return -EACCES; } off += reg->var_off.value; if (off >= 0 || off < -MAX_BPF_STACK) { - verbose("invalid stack off=%d size=%d\n", off, size); + verbose(env, "invalid stack off=%d size=%d\n", off, + size); return -EACCES; } @@ -1223,29 +1236,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!env->allow_ptr_leaks && state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && size != BPF_REG_SIZE) { - verbose("attempt to corrupt spilled pointer on stack\n"); + verbose(env, "attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno); } else { - err = check_stack_read(state, off, size, value_regno); + err = check_stack_read(env, state, off, size, + value_regno); } } else if (reg_is_pkt_pointer(reg)) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { - verbose("cannot write into packet\n"); + verbose(env, "cannot write into packet\n"); return -EACCES; } if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { - verbose("R%d leaks addr into packet\n", value_regno); + verbose(env, "R%d leaks addr into packet\n", + value_regno); return -EACCES; } err = check_packet_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(env, state->regs, value_regno); } else { - verbose("R%d invalid mem access '%s'\n", - regno, reg_type_str[reg->type]); + verbose(env, "R%d invalid mem access '%s'\n", regno, + reg_type_str[reg->type]); return -EACCES; } @@ -1265,7 +1281,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || insn->imm != 0) { - verbose("BPF_XADD uses reserved fields\n"); + verbose(env, "BPF_XADD uses reserved fields\n"); return -EINVAL; } @@ -1280,7 +1296,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins return err; if (is_pointer_value(env, insn->src_reg)) { - verbose("R%d leaks addr into mem\n", insn->src_reg); + verbose(env, "R%d leaks addr into mem\n", insn->src_reg); return -EACCES; } @@ -1321,7 +1337,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, register_is_null(regs[regno])) return 0; - verbose("R%d type=%s expected=%s\n", regno, + verbose(env, "R%d type=%s expected=%s\n", regno, reg_type_str[regs[regno].type], reg_type_str[PTR_TO_STACK]); return -EACCES; @@ -1332,13 +1348,13 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); - verbose("invalid variable stack read R%d var_off=%s\n", + verbose(env, "invalid variable stack read R%d var_off=%s\n", regno, tn_buf); } off = regs[regno].off + regs[regno].var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || access_size <= 0) { - verbose("invalid stack type R%d off=%d access_size=%d\n", + verbose(env, "invalid stack type R%d off=%d access_size=%d\n", regno, off, access_size); return -EACCES; } @@ -1354,7 +1370,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, for (i = 0; i < access_size; i++) { if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { - verbose("invalid indirect read from stack off %d+%d size %d\n", + verbose(env, "invalid indirect read from stack off %d+%d size %d\n", off, i, access_size); return -EACCES; } @@ -1397,7 +1413,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (arg_type == ARG_ANYTHING) { if (is_pointer_value(env, regno)) { - verbose("R%d leaks addr into helper function\n", regno); + verbose(env, "R%d leaks addr into helper function\n", + regno); return -EACCES; } return 0; @@ -1405,7 +1422,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) { - verbose("helper access to the packet is not allowed\n"); + verbose(env, "helper access to the packet is not allowed\n"); return -EACCES; } @@ -1443,7 +1460,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; } else { - verbose("unsupported arg_type %d\n", arg_type); + verbose(env, "unsupported arg_type %d\n", arg_type); return -EFAULT; } @@ -1461,7 +1478,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, * we have to check map_key here. Otherwise it means * that kernel subsystem misconfigured verifier */ - verbose("invalid map_ptr to access map->key\n"); + verbose(env, "invalid map_ptr to access map->key\n"); return -EACCES; } if (type_is_pkt_pointer(type)) @@ -1477,7 +1494,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (!meta->map_ptr) { /* kernel subsystem misconfigured verifier */ - verbose("invalid map_ptr to access map->value\n"); + verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } if (type_is_pkt_pointer(type)) @@ -1497,7 +1514,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (regno == 0) { /* kernel subsystem misconfigured verifier */ - verbose("ARG_CONST_SIZE cannot be first argument\n"); + verbose(env, + "ARG_CONST_SIZE cannot be first argument\n"); return -EACCES; } @@ -1514,7 +1532,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, meta = NULL; if (reg->smin_value < 0) { - verbose("R%d min value is negative, either use unsigned or 'var &= const'\n", + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", regno); return -EACCES; } @@ -1528,7 +1546,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } if (reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", regno); return -EACCES; } @@ -1539,12 +1557,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return err; err_type: - verbose("R%d type=%s expected=%s\n", regno, + verbose(env, "R%d type=%s expected=%s\n", regno, reg_type_str[type], reg_type_str[expected_type]); return -EACCES; } -static int check_map_func_compatibility(struct bpf_map *map, int func_id) +static int check_map_func_compatibility(struct bpf_verifier_env *env, + struct bpf_map *map, int func_id) { if (!map) return 0; @@ -1632,7 +1651,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) return 0; error: - verbose("cannot pass map_type %d into func %s#%d\n", + verbose(env, "cannot pass map_type %d into func %s#%d\n", map->map_type, func_id_name(func_id), func_id); return -EINVAL; } @@ -1666,7 +1685,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) for (i = 0; i < MAX_BPF_REG; i++) if (reg_is_pkt_pointer_any(®s[i])) - mark_reg_unknown(regs, i); + mark_reg_unknown(env, regs, i); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) @@ -1688,7 +1707,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) /* find function prototype */ if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { - verbose("invalid func %s#%d\n", func_id_name(func_id), func_id); + verbose(env, "invalid func %s#%d\n", func_id_name(func_id), + func_id); return -EINVAL; } @@ -1696,13 +1716,14 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) fn = env->prog->aux->ops->get_func_proto(func_id); if (!fn) { - verbose("unknown func %s#%d\n", func_id_name(func_id), func_id); + verbose(env, "unknown func %s#%d\n", func_id_name(func_id), + func_id); return -EINVAL; } /* eBPF programs must be GPL compatible to use GPL-ed functions */ if (!env->prog->gpl_compatible && fn->gpl_only) { - verbose("cannot call GPL only function from proprietary program\n"); + verbose(env, "cannot call GPL only function from proprietary program\n"); return -EINVAL; } @@ -1716,7 +1737,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) */ err = check_raw_mode(fn); if (err) { - verbose("kernel subsystem misconfigured func %s#%d\n", + verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); return err; } @@ -1749,14 +1770,14 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { - mark_reg_not_init(regs, caller_saved[i]); + mark_reg_not_init(env, regs, caller_saved[i]); check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); } /* update return register (already marked as written above) */ if (fn->ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ - mark_reg_unknown(regs, BPF_REG_0); + mark_reg_unknown(env, regs, BPF_REG_0); } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { @@ -1764,14 +1785,15 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ - mark_reg_known_zero(regs, BPF_REG_0); + mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].off = 0; /* remember map_ptr, so that check_map_access() * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() */ if (meta.map_ptr == NULL) { - verbose("kernel subsystem misconfigured verifier\n"); + verbose(env, + "kernel subsystem misconfigured verifier\n"); return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; @@ -1782,12 +1804,12 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) else if (insn_aux->map_ptr != meta.map_ptr) insn_aux->map_ptr = BPF_MAP_PTR_POISON; } else { - verbose("unknown return type %d of func %s#%d\n", + verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); return -EINVAL; } - err = check_map_func_compatibility(meta.map_ptr, func_id); + err = check_map_func_compatibility(env, meta.map_ptr, func_id); if (err) return err; @@ -1846,39 +1868,42 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg = ®s[dst]; if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: known but bad sbounds\n"); + print_verifier_state(env, &env->cur_state); + verbose(env, + "verifier internal error: known but bad sbounds\n"); return -EINVAL; } if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: known but bad ubounds\n"); + print_verifier_state(env, &env->cur_state); + verbose(env, + "verifier internal error: known but bad ubounds\n"); return -EINVAL; } if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops on pointers produce (meaningless) scalars */ if (!env->allow_ptr_leaks) - verbose("R%d 32-bit pointer arithmetic prohibited\n", + verbose(env, + "R%d 32-bit pointer arithmetic prohibited\n", dst); return -EACCES; } if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { if (!env->allow_ptr_leaks) - verbose("R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", + verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", dst); return -EACCES; } if (ptr_reg->type == CONST_PTR_TO_MAP) { if (!env->allow_ptr_leaks) - verbose("R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", + verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", dst); return -EACCES; } if (ptr_reg->type == PTR_TO_PACKET_END) { if (!env->allow_ptr_leaks) - verbose("R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", + verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", dst); return -EACCES; } @@ -1943,7 +1968,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ if (!env->allow_ptr_leaks) - verbose("R%d tried to subtract pointer from scalar\n", + verbose(env, "R%d tried to subtract pointer from scalar\n", dst); return -EACCES; } @@ -1953,7 +1978,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, */ if (ptr_reg->type == PTR_TO_STACK) { if (!env->allow_ptr_leaks) - verbose("R%d subtraction from stack pointer prohibited\n", + verbose(env, "R%d subtraction from stack pointer prohibited\n", dst); return -EACCES; } @@ -2008,13 +2033,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * ptr &= ~3 which would reduce min_value by 3.) */ if (!env->allow_ptr_leaks) - verbose("R%d bitwise operator %s on pointer prohibited\n", + verbose(env, "R%d bitwise operator %s on pointer prohibited\n", dst, bpf_alu_string[opcode >> 4]); return -EACCES; default: /* other operators (e.g. MUL,LSH) produce non-pointer results */ if (!env->allow_ptr_leaks) - verbose("R%d pointer arithmetic with %s operator prohibited\n", + verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", dst, bpf_alu_string[opcode >> 4]); return -EACCES; } @@ -2180,7 +2205,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* Shifts greater than 63 are undefined. This includes * shifts by a negative number. */ - mark_reg_unknown(regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->dst_reg); break; } /* We lose all sign bit information (except what we can pick @@ -2208,7 +2233,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* Shifts greater than 63 are undefined. This includes * shifts by a negative number. */ - mark_reg_unknown(regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->dst_reg); break; } /* BPF_RSH is an unsigned shift, so make the appropriate casts */ @@ -2236,7 +2261,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); break; default: - mark_reg_unknown(regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->dst_reg); break; } @@ -2268,12 +2293,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, * an arbitrary scalar. */ if (!env->allow_ptr_leaks) { - verbose("R%d pointer %s pointer prohibited\n", + verbose(env, "R%d pointer %s pointer prohibited\n", insn->dst_reg, bpf_alu_string[opcode >> 4]); return -EACCES; } - mark_reg_unknown(regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->dst_reg); return 0; } else { /* scalar += pointer @@ -2325,13 +2350,13 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: unexpected ptr_reg\n"); + print_verifier_state(env, &env->cur_state); + verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: no src_reg\n"); + print_verifier_state(env, &env->cur_state); + verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); @@ -2349,14 +2374,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (BPF_SRC(insn->code) != 0 || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) { - verbose("BPF_NEG uses reserved fields\n"); + verbose(env, "BPF_NEG uses reserved fields\n"); return -EINVAL; } } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0 || (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || BPF_CLASS(insn->code) == BPF_ALU64) { - verbose("BPF_END uses reserved fields\n"); + verbose(env, "BPF_END uses reserved fields\n"); return -EINVAL; } } @@ -2367,7 +2392,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; if (is_pointer_value(env, insn->dst_reg)) { - verbose("R%d pointer arithmetic prohibited\n", + verbose(env, "R%d pointer arithmetic prohibited\n", insn->dst_reg); return -EACCES; } @@ -2381,7 +2406,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0 || insn->off != 0) { - verbose("BPF_MOV uses reserved fields\n"); + verbose(env, "BPF_MOV uses reserved fields\n"); return -EINVAL; } @@ -2391,7 +2416,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0) { - verbose("BPF_MOV uses reserved fields\n"); + verbose(env, "BPF_MOV uses reserved fields\n"); return -EINVAL; } } @@ -2411,11 +2436,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { - verbose("R%d partial copy of pointer\n", + verbose(env, + "R%d partial copy of pointer\n", insn->src_reg); return -EACCES; } - mark_reg_unknown(regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->dst_reg); /* high 32 bits are known zero. */ regs[insn->dst_reg].var_off = tnum_cast( regs[insn->dst_reg].var_off, 4); @@ -2430,14 +2456,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } } else if (opcode > BPF_END) { - verbose("invalid BPF_ALU opcode %x\n", opcode); + verbose(env, "invalid BPF_ALU opcode %x\n", opcode); return -EINVAL; } else { /* all other ALU ops: and, sub, xor, add, ... */ if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0 || insn->off != 0) { - verbose("BPF_ALU uses reserved fields\n"); + verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; } /* check src1 operand */ @@ -2446,7 +2472,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0) { - verbose("BPF_ALU uses reserved fields\n"); + verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; } } @@ -2458,7 +2484,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if ((opcode == BPF_MOD || opcode == BPF_DIV) && BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { - verbose("div by zero\n"); + verbose(env, "div by zero\n"); return -EINVAL; } @@ -2467,7 +2493,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; if (insn->imm < 0 || insn->imm >= size) { - verbose("invalid shift %d\n", insn->imm); + verbose(env, "invalid shift %d\n", insn->imm); return -EINVAL; } } @@ -2820,13 +2846,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, int err; if (opcode > BPF_JSLE) { - verbose("invalid BPF_JMP opcode %x\n", opcode); + verbose(env, "invalid BPF_JMP opcode %x\n", opcode); return -EINVAL; } if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0) { - verbose("BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP uses reserved fields\n"); return -EINVAL; } @@ -2836,13 +2862,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return err; if (is_pointer_value(env, insn->src_reg)) { - verbose("R%d pointer comparison prohibited\n", + verbose(env, "R%d pointer comparison prohibited\n", insn->src_reg); return -EACCES; } } else { if (insn->src_reg != BPF_REG_0) { - verbose("BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP uses reserved fields\n"); return -EINVAL; } } @@ -2954,11 +2980,12 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, find_good_pkt_pointers(this_branch, ®s[insn->src_reg], PTR_TO_PACKET_META); } else if (is_pointer_value(env, insn->dst_reg)) { - verbose("R%d pointer comparison prohibited\n", insn->dst_reg); + verbose(env, "R%d pointer comparison prohibited\n", + insn->dst_reg); return -EACCES; } - if (verifier_log.level) - print_verifier_state(this_branch); + if (env->log.level) + print_verifier_state(env, this_branch); return 0; } @@ -2977,11 +3004,11 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) int err; if (BPF_SIZE(insn->code) != BPF_DW) { - verbose("invalid BPF_LD_IMM insn\n"); + verbose(env, "invalid BPF_LD_IMM insn\n"); return -EINVAL; } if (insn->off != 0) { - verbose("BPF_LD_IMM64 uses reserved fields\n"); + verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); return -EINVAL; } @@ -3039,14 +3066,14 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) int i, err; if (!may_access_skb(env->prog->type)) { - verbose("BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); + verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); return -EINVAL; } if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { - verbose("BPF_LD_[ABS|IND] uses reserved fields\n"); + verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); return -EINVAL; } @@ -3056,7 +3083,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; if (regs[BPF_REG_6].type != PTR_TO_CTX) { - verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); + verbose(env, + "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); return -EINVAL; } @@ -3069,7 +3097,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) /* reset caller saved regs to unreadable */ for (i = 0; i < CALLER_SAVED_REGS; i++) { - mark_reg_not_init(regs, caller_saved[i]); + mark_reg_not_init(env, regs, caller_saved[i]); check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); } @@ -3077,7 +3105,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) * the value fetched from the packet. * Already marked as written above. */ - mark_reg_unknown(regs, BPF_REG_0); + mark_reg_unknown(env, regs, BPF_REG_0); return 0; } @@ -3097,22 +3125,22 @@ static int check_return_code(struct bpf_verifier_env *env) reg = &env->cur_state.regs[BPF_REG_0]; if (reg->type != SCALAR_VALUE) { - verbose("At program exit the register R0 is not a known value (%s)\n", + verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str[reg->type]); return -EINVAL; } if (!tnum_in(range, reg->var_off)) { - verbose("At program exit the register R0 "); + verbose(env, "At program exit the register R0 "); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("has value %s", tn_buf); + verbose(env, "has value %s", tn_buf); } else { - verbose("has unknown scalar value"); + verbose(env, "has unknown scalar value"); } - verbose(" should have been 0 or 1\n"); + verbose(env, " should have been 0 or 1\n"); return -EINVAL; } return 0; @@ -3178,7 +3206,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) return 0; if (w < 0 || w >= env->prog->len) { - verbose("jump out of range from insn %d to %d\n", t, w); + verbose(env, "jump out of range from insn %d to %d\n", t, w); return -EINVAL; } @@ -3195,13 +3223,13 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) insn_stack[cur_stack++] = w; return 1; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { - verbose("back-edge from insn %d to %d\n", t, w); + verbose(env, "back-edge from insn %d to %d\n", t, w); return -EINVAL; } else if (insn_state[w] == EXPLORED) { /* forward- or cross-edge */ insn_state[t] = DISCOVERED | e; } else { - verbose("insn state internal bug\n"); + verbose(env, "insn state internal bug\n"); return -EFAULT; } return 0; @@ -3295,7 +3323,7 @@ peek_stack: mark_explored: insn_state[t] = EXPLORED; if (cur_stack-- <= 0) { - verbose("pop stack internal bug\n"); + verbose(env, "pop stack internal bug\n"); ret = -EFAULT; goto err_free; } @@ -3304,7 +3332,7 @@ mark_explored: check_state: for (i = 0; i < insn_cnt; i++) { if (insn_state[i] != EXPLORED) { - verbose("unreachable insn %d\n", i); + verbose(env, "unreachable insn %d\n", i); ret = -EINVAL; goto err_free; } @@ -3685,7 +3713,7 @@ static int do_check(struct bpf_verifier_env *env) int insn_processed = 0; bool do_print_state = false; - init_reg_state(regs); + init_reg_state(env, regs); state->parent = NULL; insn_idx = 0; for (;;) { @@ -3694,7 +3722,7 @@ static int do_check(struct bpf_verifier_env *env) int err; if (insn_idx >= insn_cnt) { - verbose("invalid insn idx %d insn_cnt %d\n", + verbose(env, "invalid insn idx %d insn_cnt %d\n", insn_idx, insn_cnt); return -EFAULT; } @@ -3703,7 +3731,8 @@ static int do_check(struct bpf_verifier_env *env) class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { - verbose("BPF program is too large. Processed %d insn\n", + verbose(env, + "BPF program is too large. Processed %d insn\n", insn_processed); return -E2BIG; } @@ -3713,12 +3742,12 @@ static int do_check(struct bpf_verifier_env *env) return err; if (err == 1) { /* found equivalent state, can prune the search */ - if (verifier_log.level) { + if (env->log.level) { if (do_print_state) - verbose("\nfrom %d to %d: safe\n", + verbose(env, "\nfrom %d to %d: safe\n", prev_insn_idx, insn_idx); else - verbose("%d: safe\n", insn_idx); + verbose(env, "%d: safe\n", insn_idx); } goto process_bpf_exit; } @@ -3726,19 +3755,18 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (verifier_log.level > 1 || - (verifier_log.level && do_print_state)) { - if (verifier_log.level > 1) - verbose("%d:", insn_idx); + if (env->log.level > 1 || (env->log.level && do_print_state)) { + if (env->log.level > 1) + verbose(env, "%d:", insn_idx); else - verbose("\nfrom %d to %d:", + verbose(env, "\nfrom %d to %d:", prev_insn_idx, insn_idx); - print_verifier_state(&env->cur_state); + print_verifier_state(env, &env->cur_state); do_print_state = false; } - if (verifier_log.level) { - verbose("%d: ", insn_idx); + if (env->log.level) { + verbose(env, "%d: ", insn_idx); print_bpf_insn(env, insn); } @@ -3795,7 +3823,7 @@ static int do_check(struct bpf_verifier_env *env) * src_reg == stack|map in some other branch. * Reject it. */ - verbose("same insn cannot be used with different pointers\n"); + verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -3835,14 +3863,14 @@ static int do_check(struct bpf_verifier_env *env) } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || *prev_dst_type == PTR_TO_CTX)) { - verbose("same insn cannot be used with different pointers\n"); + verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; } } else if (class == BPF_ST) { if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { - verbose("BPF_ST uses reserved fields\n"); + verbose(env, "BPF_ST uses reserved fields\n"); return -EINVAL; } /* check src operand */ @@ -3865,7 +3893,7 @@ static int do_check(struct bpf_verifier_env *env) insn->off != 0 || insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0) { - verbose("BPF_CALL uses reserved fields\n"); + verbose(env, "BPF_CALL uses reserved fields\n"); return -EINVAL; } @@ -3878,7 +3906,7 @@ static int do_check(struct bpf_verifier_env *env) insn->imm != 0 || insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0) { - verbose("BPF_JA uses reserved fields\n"); + verbose(env, "BPF_JA uses reserved fields\n"); return -EINVAL; } @@ -3890,7 +3918,7 @@ static int do_check(struct bpf_verifier_env *env) insn->imm != 0 || insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0) { - verbose("BPF_EXIT uses reserved fields\n"); + verbose(env, "BPF_EXIT uses reserved fields\n"); return -EINVAL; } @@ -3905,7 +3933,7 @@ static int do_check(struct bpf_verifier_env *env) return err; if (is_pointer_value(env, BPF_REG_0)) { - verbose("R0 leaks addr as return value\n"); + verbose(env, "R0 leaks addr as return value\n"); return -EACCES; } @@ -3940,19 +3968,19 @@ process_bpf_exit: insn_idx++; } else { - verbose("invalid BPF_LD mode\n"); + verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; } } else { - verbose("unknown insn class %d\n", class); + verbose(env, "unknown insn class %d\n", class); return -EINVAL; } insn_idx++; } - verbose("processed %d insns, stack depth %d\n", - insn_processed, env->prog->aux->stack_depth); + verbose(env, "processed %d insns, stack depth %d\n", insn_processed, + env->prog->aux->stack_depth); return 0; } @@ -3964,7 +3992,8 @@ static int check_map_prealloc(struct bpf_map *map) !(map->map_flags & BPF_F_NO_PREALLOC); } -static int check_map_prog_compatibility(struct bpf_map *map, +static int check_map_prog_compatibility(struct bpf_verifier_env *env, + struct bpf_map *map, struct bpf_prog *prog) { @@ -3975,12 +4004,12 @@ static int check_map_prog_compatibility(struct bpf_map *map, */ if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { if (!check_map_prealloc(map)) { - verbose("perf_event programs can only use preallocated hash map\n"); + verbose(env, "perf_event programs can only use preallocated hash map\n"); return -EINVAL; } if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) { - verbose("perf_event programs can only use preallocated inner hash map\n"); + verbose(env, "perf_event programs can only use preallocated inner hash map\n"); return -EINVAL; } } @@ -4003,14 +4032,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { - verbose("BPF_LDX uses reserved fields\n"); + verbose(env, "BPF_LDX uses reserved fields\n"); return -EINVAL; } if (BPF_CLASS(insn->code) == BPF_STX && ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { - verbose("BPF_STX uses reserved fields\n"); + verbose(env, "BPF_STX uses reserved fields\n"); return -EINVAL; } @@ -4021,7 +4050,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || insn[1].off != 0) { - verbose("invalid bpf_ld_imm64 insn\n"); + verbose(env, "invalid bpf_ld_imm64 insn\n"); return -EINVAL; } @@ -4030,19 +4059,20 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) goto next_insn; if (insn->src_reg != BPF_PSEUDO_MAP_FD) { - verbose("unrecognized bpf_ld_imm64 insn\n"); + verbose(env, + "unrecognized bpf_ld_imm64 insn\n"); return -EINVAL; } f = fdget(insn->imm); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose("fd %d is not pointing to valid bpf_map\n", + verbose(env, "fd %d is not pointing to valid bpf_map\n", insn->imm); return PTR_ERR(map); } - err = check_map_prog_compatibility(map, env->prog); + err = check_map_prog_compatibility(env, map, env->prog); if (err) { fdput(f); return err; @@ -4164,7 +4194,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= ARRAY_SIZE(insn_buf)) { - verbose("bpf verifier is misconfigured\n"); + verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); @@ -4212,7 +4242,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) u8 size_code; if (type == BPF_WRITE) { - verbose("bpf verifier narrow ctx access misconfigured\n"); + verbose(env, "bpf verifier narrow ctx access misconfigured\n"); return -EINVAL; } @@ -4231,7 +4261,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) &target_size); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) { - verbose("bpf verifier is misconfigured\n"); + verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@ -4313,7 +4343,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { - verbose("bpf verifier is misconfigured\n"); + verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@ -4357,7 +4387,8 @@ patch_call_imm: * programs to call them, must be real in-kernel functions */ if (!fn->func) { - verbose("kernel subsystem misconfigured func %s#%d\n", + verbose(env, + "kernel subsystem misconfigured func %s#%d\n", func_id_name(insn->imm), insn->imm); return -EFAULT; } @@ -4391,8 +4422,8 @@ static void free_states(struct bpf_verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { - struct bpf_verifer_log *log = &verifier_log; struct bpf_verifier_env *env; + struct bpf_verifer_log *log; int ret = -EINVAL; /* 'struct bpf_verifier_env' can be global, but since it's not small, @@ -4401,6 +4432,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; + log = &env->log; env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * (*prog)->len); @@ -4419,7 +4451,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) log->level = attr->log_level; log->ubuf = (char __user *) (unsigned long) attr->log_buf; log->len_total = attr->log_size; - log->len_used = 0; ret = -EINVAL; /* log attributes have to be sane */ @@ -4431,8 +4462,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) log->kbuf = vmalloc(log->len_total); if (!log->kbuf) goto err_unlock; - } else { - log->level = 0; } env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); @@ -4543,8 +4572,6 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, /* grab the mutex to protect few globals used by verifier */ mutex_lock(&bpf_verifier_lock); - verifier_log.level = 0; - env->strict_alignment = false; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; From f4ac7e0b5cc8d16004ac57ff679266d573f30f77 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:12 -0700 Subject: [PATCH 0798/2177] bpf: move instruction printing into a separate file Separate the instruction printing into a standalone source file. This way sneaky code from tools/ can compile it in directly. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/Makefile | 1 + kernel/bpf/disasm.c | 214 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/disasm.h | 32 +++++++ kernel/bpf/verifier.c | 202 +-------------------------------------- 4 files changed, 251 insertions(+), 198 deletions(-) create mode 100644 kernel/bpf/disasm.c create mode 100644 kernel/bpf/disasm.h diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 897daa005b23..53fb09f92e3f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,6 +2,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += disasm.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o ifeq ($(CONFIG_STREAM_PARSER),y) diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c new file mode 100644 index 000000000000..e682850c9715 --- /dev/null +++ b/kernel/bpf/disasm.c @@ -0,0 +1,214 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include + +#include "disasm.h" + +#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x) +static const char * const func_id_str[] = { + __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN) +}; +#undef __BPF_FUNC_STR_FN + +const char *func_id_name(int id) +{ + BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); + + if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) + return func_id_str[id]; + else + return "unknown"; +} + +const char *const bpf_class_string[8] = { + [BPF_LD] = "ld", + [BPF_LDX] = "ldx", + [BPF_ST] = "st", + [BPF_STX] = "stx", + [BPF_ALU] = "alu", + [BPF_JMP] = "jmp", + [BPF_RET] = "BUG", + [BPF_ALU64] = "alu64", +}; + +const char *const bpf_alu_string[16] = { + [BPF_ADD >> 4] = "+=", + [BPF_SUB >> 4] = "-=", + [BPF_MUL >> 4] = "*=", + [BPF_DIV >> 4] = "/=", + [BPF_OR >> 4] = "|=", + [BPF_AND >> 4] = "&=", + [BPF_LSH >> 4] = "<<=", + [BPF_RSH >> 4] = ">>=", + [BPF_NEG >> 4] = "neg", + [BPF_MOD >> 4] = "%=", + [BPF_XOR >> 4] = "^=", + [BPF_MOV >> 4] = "=", + [BPF_ARSH >> 4] = "s>>=", + [BPF_END >> 4] = "endian", +}; + +static const char *const bpf_ldst_string[] = { + [BPF_W >> 3] = "u32", + [BPF_H >> 3] = "u16", + [BPF_B >> 3] = "u8", + [BPF_DW >> 3] = "u64", +}; + +static const char *const bpf_jmp_string[16] = { + [BPF_JA >> 4] = "jmp", + [BPF_JEQ >> 4] = "==", + [BPF_JGT >> 4] = ">", + [BPF_JLT >> 4] = "<", + [BPF_JGE >> 4] = ">=", + [BPF_JLE >> 4] = "<=", + [BPF_JSET >> 4] = "&", + [BPF_JNE >> 4] = "!=", + [BPF_JSGT >> 4] = "s>", + [BPF_JSLT >> 4] = "s<", + [BPF_JSGE >> 4] = "s>=", + [BPF_JSLE >> 4] = "s<=", + [BPF_CALL >> 4] = "call", + [BPF_EXIT >> 4] = "exit", +}; + +static void print_bpf_end_insn(bpf_insn_print_cb verbose, + struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, + BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", + insn->imm, insn->dst_reg); +} + +void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, + const struct bpf_insn *insn, bool allow_ptr_leaks) +{ + u8 class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + if (BPF_OP(insn->code) == BPF_END) { + if (class == BPF_ALU64) + verbose(env, "BUG_alu64_%02x\n", insn->code); + else + print_bpf_end_insn(verbose, env, insn); + } else if (BPF_OP(insn->code) == BPF_NEG) { + verbose(env, "(%02x) r%d = %s-r%d\n", + insn->code, insn->dst_reg, + class == BPF_ALU ? "(u32) " : "", + insn->dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X) { + verbose(env, "(%02x) %sr%d %s %sr%d\n", + insn->code, class == BPF_ALU ? "(u32) " : "", + insn->dst_reg, + bpf_alu_string[BPF_OP(insn->code) >> 4], + class == BPF_ALU ? "(u32) " : "", + insn->src_reg); + } else { + verbose(env, "(%02x) %sr%d %s %s%d\n", + insn->code, class == BPF_ALU ? "(u32) " : "", + insn->dst_reg, + bpf_alu_string[BPF_OP(insn->code) >> 4], + class == BPF_ALU ? "(u32) " : "", + insn->imm); + } + } else if (class == BPF_STX) { + if (BPF_MODE(insn->code) == BPF_MEM) + verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->src_reg); + else if (BPF_MODE(insn->code) == BPF_XADD) + verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, + insn->src_reg); + else + verbose(env, "BUG_%02x\n", insn->code); + } else if (class == BPF_ST) { + if (BPF_MODE(insn->code) != BPF_MEM) { + verbose(env, "BUG_st_%02x\n", insn->code); + return; + } + verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (class == BPF_LDX) { + if (BPF_MODE(insn->code) != BPF_MEM) { + verbose(env, "BUG_ldx_%02x\n", insn->code); + return; + } + verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n", + insn->code, insn->dst_reg, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->src_reg, insn->off); + } else if (class == BPF_LD) { + if (BPF_MODE(insn->code) == BPF_ABS) { + verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->imm); + } else if (BPF_MODE(insn->code) == BPF_IND) { + verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->src_reg, insn->imm); + } else if (BPF_MODE(insn->code) == BPF_IMM && + BPF_SIZE(insn->code) == BPF_DW) { + /* At this point, we already made sure that the second + * part of the ldimm64 insn is accessible. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + + if (map_ptr && !allow_ptr_leaks) + imm = 0; + + verbose(env, "(%02x) r%d = 0x%llx\n", insn->code, + insn->dst_reg, (unsigned long long)imm); + } else { + verbose(env, "BUG_ld_%02x\n", insn->code); + return; + } + } else if (class == BPF_JMP) { + u8 opcode = BPF_OP(insn->code); + + if (opcode == BPF_CALL) { + verbose(env, "(%02x) call %s#%d\n", insn->code, + func_id_name(insn->imm), insn->imm); + } else if (insn->code == (BPF_JMP | BPF_JA)) { + verbose(env, "(%02x) goto pc%+d\n", + insn->code, insn->off); + } else if (insn->code == (BPF_JMP | BPF_EXIT)) { + verbose(env, "(%02x) exit\n", insn->code); + } else if (BPF_SRC(insn->code) == BPF_X) { + verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n", + insn->code, insn->dst_reg, + bpf_jmp_string[BPF_OP(insn->code) >> 4], + insn->src_reg, insn->off); + } else { + verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n", + insn->code, insn->dst_reg, + bpf_jmp_string[BPF_OP(insn->code) >> 4], + insn->imm, insn->off); + } + } else { + verbose(env, "(%02x) %s\n", + insn->code, bpf_class_string[class]); + } +} diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h new file mode 100644 index 000000000000..8de977e420b6 --- /dev/null +++ b/kernel/bpf/disasm.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef __BPF_DISASM_H__ +#define __BPF_DISASM_H__ + +#include +#include +#include + +extern const char *const bpf_alu_string[16]; +extern const char *const bpf_class_string[8]; + +const char *func_id_name(int id); + +struct bpf_verifier_env; +typedef void (*bpf_insn_print_cb)(struct bpf_verifier_env *env, + const char *, ...); +void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, + const struct bpf_insn *insn, bool allow_ptr_leaks); + +#endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a352f93cd4b2..274c6582ec39 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21,6 +21,8 @@ #include #include +#include "disasm.h" + /* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. * All paths of conditional branches are analyzed until 'bpf_exit' insn. @@ -194,22 +196,6 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x) -static const char * const func_id_str[] = { - __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN) -}; -#undef __BPF_FUNC_STR_FN - -static const char *func_id_name(int id) -{ - BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); - - if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) - return func_id_str[id]; - else - return "unknown"; -} - static void print_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *state) { @@ -278,187 +264,6 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "\n"); } -static const char *const bpf_class_string[] = { - [BPF_LD] = "ld", - [BPF_LDX] = "ldx", - [BPF_ST] = "st", - [BPF_STX] = "stx", - [BPF_ALU] = "alu", - [BPF_JMP] = "jmp", - [BPF_RET] = "BUG", - [BPF_ALU64] = "alu64", -}; - -static const char *const bpf_alu_string[16] = { - [BPF_ADD >> 4] = "+=", - [BPF_SUB >> 4] = "-=", - [BPF_MUL >> 4] = "*=", - [BPF_DIV >> 4] = "/=", - [BPF_OR >> 4] = "|=", - [BPF_AND >> 4] = "&=", - [BPF_LSH >> 4] = "<<=", - [BPF_RSH >> 4] = ">>=", - [BPF_NEG >> 4] = "neg", - [BPF_MOD >> 4] = "%=", - [BPF_XOR >> 4] = "^=", - [BPF_MOV >> 4] = "=", - [BPF_ARSH >> 4] = "s>>=", - [BPF_END >> 4] = "endian", -}; - -static const char *const bpf_ldst_string[] = { - [BPF_W >> 3] = "u32", - [BPF_H >> 3] = "u16", - [BPF_B >> 3] = "u8", - [BPF_DW >> 3] = "u64", -}; - -static const char *const bpf_jmp_string[16] = { - [BPF_JA >> 4] = "jmp", - [BPF_JEQ >> 4] = "==", - [BPF_JGT >> 4] = ">", - [BPF_JLT >> 4] = "<", - [BPF_JGE >> 4] = ">=", - [BPF_JLE >> 4] = "<=", - [BPF_JSET >> 4] = "&", - [BPF_JNE >> 4] = "!=", - [BPF_JSGT >> 4] = "s>", - [BPF_JSLT >> 4] = "s<", - [BPF_JSGE >> 4] = "s>=", - [BPF_JSLE >> 4] = "s<=", - [BPF_CALL >> 4] = "call", - [BPF_EXIT >> 4] = "exit", -}; - -static void print_bpf_end_insn(struct bpf_verifier_env *env, - const struct bpf_insn *insn) -{ - verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, - BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", - insn->imm, insn->dst_reg); -} - -static void print_bpf_insn(struct bpf_verifier_env *env, - const struct bpf_insn *insn) -{ - u8 class = BPF_CLASS(insn->code); - - if (class == BPF_ALU || class == BPF_ALU64) { - if (BPF_OP(insn->code) == BPF_END) { - if (class == BPF_ALU64) - verbose(env, "BUG_alu64_%02x\n", insn->code); - else - print_bpf_end_insn(env, insn); - } else if (BPF_OP(insn->code) == BPF_NEG) { - verbose(env, "(%02x) r%d = %s-r%d\n", - insn->code, insn->dst_reg, - class == BPF_ALU ? "(u32) " : "", - insn->dst_reg); - } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(env, "(%02x) %sr%d %s %sr%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", - insn->dst_reg, - bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", - insn->src_reg); - } else { - verbose(env, "(%02x) %sr%d %s %s%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", - insn->dst_reg, - bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", - insn->imm); - } - } else if (class == BPF_STX) { - if (BPF_MODE(insn->code) == BPF_MEM) - verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->src_reg); - else if (BPF_MODE(insn->code) == BPF_XADD) - verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, insn->off, - insn->src_reg); - else - verbose(env, "BUG_%02x\n", insn->code); - } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { - verbose(env, "BUG_st_%02x\n", insn->code); - return; - } - verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); - } else if (class == BPF_LDX) { - if (BPF_MODE(insn->code) != BPF_MEM) { - verbose(env, "BUG_ldx_%02x\n", insn->code); - return; - } - verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n", - insn->code, insn->dst_reg, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->src_reg, insn->off); - } else if (class == BPF_LD) { - if (BPF_MODE(insn->code) == BPF_ABS) { - verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->imm); - } else if (BPF_MODE(insn->code) == BPF_IND) { - verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->src_reg, insn->imm); - } else if (BPF_MODE(insn->code) == BPF_IMM && - BPF_SIZE(insn->code) == BPF_DW) { - /* At this point, we already made sure that the second - * part of the ldimm64 insn is accessible. - */ - u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; - bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; - - if (map_ptr && !env->allow_ptr_leaks) - imm = 0; - - verbose(env, "(%02x) r%d = 0x%llx\n", insn->code, - insn->dst_reg, (unsigned long long)imm); - } else { - verbose(env, "BUG_ld_%02x\n", insn->code); - return; - } - } else if (class == BPF_JMP) { - u8 opcode = BPF_OP(insn->code); - - if (opcode == BPF_CALL) { - verbose(env, "(%02x) call %s#%d\n", insn->code, - func_id_name(insn->imm), insn->imm); - } else if (insn->code == (BPF_JMP | BPF_JA)) { - verbose(env, "(%02x) goto pc%+d\n", - insn->code, insn->off); - } else if (insn->code == (BPF_JMP | BPF_EXIT)) { - verbose(env, "(%02x) exit\n", insn->code); - } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n", - insn->code, insn->dst_reg, - bpf_jmp_string[BPF_OP(insn->code) >> 4], - insn->src_reg, insn->off); - } else { - verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n", - insn->code, insn->dst_reg, - bpf_jmp_string[BPF_OP(insn->code) >> 4], - insn->imm, insn->off); - } - } else { - verbose(env, "(%02x) %s\n", - insn->code, bpf_class_string[class]); - } -} - static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { struct bpf_verifier_stack_elem *elem; @@ -3767,7 +3572,8 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level) { verbose(env, "%d: ", insn_idx); - print_bpf_insn(env, insn); + print_bpf_insn(verbose, env, insn, + env->allow_ptr_leaks); } err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); From c9c35995bcf812ee8136f634c25bc6ccc3021d4c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:13 -0700 Subject: [PATCH 0799/2177] tools: bpftool: use the kernel's instruction printer Compile the instruction printer from kernel/bpf and use it for disassembling "translated" eBPF code. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- .../bpftool/Documentation/bpftool-prog.rst | 11 +++-- tools/bpf/bpftool/Makefile | 7 ++- tools/bpf/bpftool/main.h | 10 ++--- tools/bpf/bpftool/prog.c | 44 +++++++++++++++---- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 57fc4b9924ea..04d12f768f06 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -11,7 +11,7 @@ SYNOPSIS ======== | **bpftool** prog show [*PROG*] -| **bpftool** prog dump xlated *PROG* file *FILE* +| **bpftool** prog dump xlated *PROG* [file *FILE*] [opcodes] | **bpftool** prog dump jited *PROG* [file *FILE*] [opcodes] | **bpftool** prog pin *PROG* *FILE* | **bpftool** prog help @@ -28,9 +28,12 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). - **bpftool prog dump xlated** *PROG* **file** *FILE* - Dump eBPF instructions of the program from the kernel to a - file. + **bpftool prog dump xlated** *PROG* [**file** *FILE*] [**opcodes**] + Dump eBPF instructions of the program from the kernel. + If *FILE* is specified image will be written to a file, + otherwise it will be disassembled and printed to stdout. + + **opcodes** controls if raw opcodes will be printed. **bpftool prog dump jited** *PROG* [**file** *FILE*] [**opcodes**] Dump jited image (host machine code) of the program. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8705ee44664d..4f339824ca57 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -51,7 +51,7 @@ CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf +CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ LIBS = -lelf -lbfd -lopcodes $(LIBBPF) include $(wildcard *.d) @@ -59,7 +59,10 @@ include $(wildcard *.d) all: $(OUTPUT)bpftool SRCS=$(wildcard *.c) -OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) +OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o + +$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 85d2d7870a58..8e809b2bb311 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -36,11 +36,12 @@ #ifndef __BPF_TOOL_H #define __BPF_TOOL_H +/* BFD and kernel.h both define GCC_VERSION, differently */ +#undef GCC_VERSION #include #include #include - -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#include #define err(msg...) fprintf(stderr, "Error: " msg) #define warn(msg...) fprintf(stderr, "Warning: " msg) @@ -48,11 +49,6 @@ #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) -#define min(a, b) \ - ({ typeof(a) _a = (a); typeof(b) _b = (b); _a > _b ? _b : _a; }) -#define max(a, b) \ - ({ typeof(a) _a = (a); typeof(b) _b = (b); _a < _b ? _b : _a; }) - #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) #define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; }) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 421ba89ce86a..9e2681c83717 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include "main.h" +#include "disasm.h" static const char * const prog_type_name[] = { [BPF_PROG_TYPE_UNSPEC] = "unspec", @@ -297,11 +299,39 @@ static int do_show(int argc, char **argv) return 0; } +static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); +} + +static void dump_xlated(void *buf, unsigned int len, bool opcodes) +{ + struct bpf_insn *insn = buf; + unsigned int i; + + for (i = 0; i < len / sizeof(*insn); i++) { + printf("% 4d: ", i); + print_bpf_insn(print_insn, NULL, insn + i, true); + + if (opcodes) { + printf(" "); + print_hex(insn + i, 8, " "); + printf("\n"); + } + + if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) + i++; + } +} + static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; __u32 len = sizeof(info); - bool can_disasm = false; unsigned int buf_size; char *filepath = NULL; bool opcodes = false; @@ -315,7 +345,6 @@ static int do_dump(int argc, char **argv) if (is_prefix(*argv, "jited")) { member_len = &info.jited_prog_len; member_ptr = &info.jited_prog_insns; - can_disasm = true; } else if (is_prefix(*argv, "xlated")) { member_len = &info.xlated_prog_len; member_ptr = &info.xlated_prog_insns; @@ -346,10 +375,6 @@ static int do_dump(int argc, char **argv) NEXT_ARG(); } - if (!filepath && !can_disasm) { - err("expected 'file' got %s\n", *argv); - return -1; - } if (argc) { usage(); return -1; @@ -409,7 +434,10 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - disasm_print_insn(buf, *member_len, opcodes); + if (member_len == &info.jited_prog_len) + disasm_print_insn(buf, *member_len, opcodes); + else + dump_xlated(buf, *member_len, opcodes); } free(buf); @@ -430,7 +458,7 @@ static int do_help(int argc, char **argv) { fprintf(stderr, "Usage: %s %s show [PROG]\n" - " %s %s dump xlated PROG file FILE\n" + " %s %s dump xlated PROG [file FILE] [opcodes]\n" " %s %s dump jited PROG [file FILE] [opcodes]\n" " %s %s pin PROG FILE\n" " %s %s help\n" From d66f2b91f95b56e31772b9faa0d036cd2e53cb02 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:14 -0700 Subject: [PATCH 0800/2177] bpf: don't rely on the verifier lock for metadata_dst allocation bpf_skb_set_tunnel_*() functions require allocation of per-cpu metadata_dst. The allocation happens upon verification of the first program using those helpers. In preparation for removing the verifier lock, use cmpxchg() to make sure we only allocate the metadata_dsts once. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 1 + net/core/dst.c | 16 ++++++++++++++++ net/core/filter.c | 16 +++++++++------- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 9fba2ebf6dda..87a0bb8d449f 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -87,6 +87,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); diff --git a/net/core/dst.c b/net/core/dst.c index a6c47da7d0f8..8b2eafac984d 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); + +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) +{ + int cpu; + +#ifdef CONFIG_DST_CACHE + for_each_possible_cpu(cpu) { + struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); + + if (one_md_dst->type == METADATA_IP_TUNNEL) + dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); + } +#endif + free_percpu(md_dst); +} +EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); diff --git a/net/core/filter.c b/net/core/filter.c index b7e8caa1e790..140fa9f9c0f4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -2987,14 +2988,15 @@ static const struct bpf_func_proto * bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) { if (!md_dst) { - /* Race is not possible, since it's called from verifier - * that is holding verifier mutex. - */ - md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, - METADATA_IP_TUNNEL, - GFP_KERNEL); - if (!md_dst) + struct metadata_dst __percpu *tmp; + + tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, + METADATA_IP_TUNNEL, + GFP_KERNEL); + if (!tmp) return NULL; + if (cmpxchg(&md_dst, NULL, tmp)) + metadata_dst_free_percpu(tmp); } switch (which) { From a2a7d5701052542cd2260e7659b12443e0a74733 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:15 -0700 Subject: [PATCH 0801/2177] bpf: write back the verifier log buffer as it gets filled Verifier log buffer can be quite large (up to 16MB currently). As Eric Dumazet points out if we allow multiple verification requests to proceed simultaneously, malicious user may use the verifier as a way of allocating large amounts of unswappable memory to OOM the host. Switch to a strategy of allocating a smaller buffer (1024B) and writing it out into the user buffer after every print. While at it remove the old BUG_ON(). This is in preparation of the global verifier lock removal. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 4 +++- kernel/bpf/verifier.c | 41 +++++++++++++++++------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5ddb9a626a51..f00ef751c1c5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,9 +115,11 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +#define BPF_VERIFIER_TMP_LOG_SIZE 1024 + struct bpf_verifer_log { u32 level; - char *kbuf; + char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; char __user *ubuf; u32 len_used; u32 len_total; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 274c6582ec39..2cdbcc4f8f6b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -165,15 +165,26 @@ static __printf(2, 3) void verbose(struct bpf_verifier_env *env, const char *fmt, ...) { struct bpf_verifer_log *log = &env->log; + unsigned int n; va_list args; - if (!log->level || bpf_verifier_log_full(log)) + if (!log->level || !log->ubuf || bpf_verifier_log_full(log)) return; va_start(args, fmt); - log->len_used += vscnprintf(log->kbuf + log->len_used, - log->len_total - log->len_used, fmt, args); + n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); va_end(args); + + WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, + "verifier log line truncated - local buffer too short\n"); + + n = min(log->len_total - log->len_used - 1, n); + log->kbuf[n] = '\0'; + + if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) + log->len_used += n; + else + log->ubuf = NULL; } static bool type_is_pkt_pointer(enum bpf_reg_type type) @@ -4263,11 +4274,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || !log->level || !log->ubuf) goto err_unlock; - - ret = -ENOMEM; - log->kbuf = vmalloc(log->len_total); - if (!log->kbuf) - goto err_unlock; } env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); @@ -4304,18 +4310,11 @@ skip_full_check: if (ret == 0) ret = fixup_bpf_calls(env); - if (log->level && bpf_verifier_log_full(log)) { - BUG_ON(log->len_used >= log->len_total); - /* verifier log exceeded user supplied buffer */ + if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; - /* fall through to return what was recorded */ - } - - /* copy verifier log back to user space including trailing zero */ - if (log->level && copy_to_user(log->ubuf, log->kbuf, - log->len_used + 1) != 0) { + if (log->level && !log->ubuf) { ret = -EFAULT; - goto free_log_buf; + goto err_release_maps; } if (ret == 0 && env->used_map_cnt) { @@ -4326,7 +4325,7 @@ skip_full_check: if (!env->prog->aux->used_maps) { ret = -ENOMEM; - goto free_log_buf; + goto err_release_maps; } memcpy(env->prog->aux->used_maps, env->used_maps, @@ -4339,9 +4338,7 @@ skip_full_check: convert_pseudo_ld_imm64(env); } -free_log_buf: - if (log->level) - vfree(log->kbuf); +err_release_maps: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. From 9f77fad3c2e3b923984fc617ea45b60264115fc3 Mon Sep 17 00:00:00 2001 From: Tim Hansen Date: Mon, 9 Oct 2017 11:37:59 -0400 Subject: [PATCH 0802/2177] net/core: Fix BUG to BUG_ON conditionals. Fix BUG() calls to use BUG_ON(conditional) macros. This was found using make coccicheck M=net/core on linux next tag next-2017092 Signed-off-by: Tim Hansen Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 822a90e56aea..40717501cbdd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1350,8 +1350,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) /* Set the tail pointer and length */ skb_put(n, skb->len); - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); copy_skb_header(n, skb); return n; @@ -1449,8 +1448,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, BUG_ON(nhead < 0); - if (skb_shared(skb)) - BUG(); + BUG_ON(skb_shared(skb)); size = SKB_DATA_ALIGN(size); @@ -1595,9 +1593,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, + skb->len + head_copy_len)); copy_skb_header(n, skb); @@ -1878,8 +1875,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) return NULL; } - if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) - BUG(); + BUG_ON(skb_copy_bits(skb, skb_headlen(skb), + skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. From 96ac18f14a5a721dc4233f1c6ebd07e103ae5a63 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 10 Oct 2017 12:44:13 +0530 Subject: [PATCH 0803/2177] cxgb4: Add support for new flash parts Add support for new flash parts identification, and also cleanup the flash Part identifying and decoding code. Based on the original work of Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 140 +++++++++++++++++---- 1 file changed, 118 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index b65ce26ff72f..b3fd1f457639 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -8205,7 +8205,7 @@ struct flash_desc { u32 size_mb; }; -static int get_flash_params(struct adapter *adap) +static int t4_get_flash_params(struct adapter *adap) { /* Table for non-Numonix supported flash parts. Numonix parts are left * to the preexisting code. All flash parts have 64KB sectors. @@ -8214,40 +8214,136 @@ static int get_flash_params(struct adapter *adap) { 0x150201, 4 << 20 }, /* Spansion 4MB S25FL032P */ }; + unsigned int part, manufacturer; + unsigned int density, size; + u32 flashid = 0; int ret; - u32 info; + + /* Issue a Read ID Command to the Flash part. We decode supported + * Flash parts and their sizes from this. There's a newer Query + * Command which can retrieve detailed geometry information but many + * Flash parts don't support it. + */ ret = sf1_write(adap, 1, 1, 0, SF_RD_ID); if (!ret) - ret = sf1_read(adap, 3, 0, 1, &info); + ret = sf1_read(adap, 3, 0, 1, &flashid); t4_write_reg(adap, SF_OP_A, 0); /* unlock SF */ if (ret) return ret; - for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret) - if (supported_flash[ret].vendor_and_model_id == info) { - adap->params.sf_size = supported_flash[ret].size_mb; + /* Check to see if it's one of our non-standard supported Flash parts. + */ + for (part = 0; part < ARRAY_SIZE(supported_flash); part++) + if (supported_flash[part].vendor_and_model_id == flashid) { + adap->params.sf_size = supported_flash[part].size_mb; adap->params.sf_nsec = adap->params.sf_size / SF_SEC_SIZE; - return 0; + goto found; } - if ((info & 0xff) != 0x20) /* not a Numonix flash */ - return -EINVAL; - info >>= 16; /* log2 of size */ - if (info >= 0x14 && info < 0x18) - adap->params.sf_nsec = 1 << (info - 16); - else if (info == 0x18) - adap->params.sf_nsec = 64; - else - return -EINVAL; - adap->params.sf_size = 1 << info; - adap->params.sf_fw_start = - t4_read_reg(adap, CIM_BOOT_CFG_A) & BOOTADDR_M; + /* Decode Flash part size. The code below looks repetative with + * common encodings, but that's not guaranteed in the JEDEC + * specification for the Read JADEC ID command. The only thing that + * we're guaranteed by the JADEC specification is where the + * Manufacturer ID is in the returned result. After that each + * Manufacturer ~could~ encode things completely differently. + * Note, all Flash parts must have 64KB sectors. + */ + manufacturer = flashid & 0xff; + switch (manufacturer) { + case 0x20: { /* Micron/Numonix */ + /* This Density -> Size decoding table is taken from Micron + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x14: /* 1MB */ + size = 1 << 20; + break; + case 0x15: /* 2MB */ + size = 1 << 21; + break; + case 0x16: /* 4MB */ + size = 1 << 22; + break; + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + case 0x19: /* 32MB */ + size = 1 << 25; + break; + case 0x20: /* 64MB */ + size = 1 << 26; + break; + case 0x21: /* 128MB */ + size = 1 << 27; + break; + case 0x22: /* 256MB */ + size = 1 << 28; + break; + default: + dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + break; + } + case 0xc2: { /* Macronix */ + /* This Density -> Size decoding table is taken from Macronix + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + default: + dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + } + case 0xef: { /* Winbond */ + /* This Density -> Size decoding table is taken from Winbond + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + default: + dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + break; + } + default: + dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n", + flashid); + return -EINVAL; + } + + /* Store decoded Flash size and fall through into vetting code. */ + adap->params.sf_size = size; + adap->params.sf_nsec = size / SF_SEC_SIZE; + +found: if (adap->params.sf_size < FLASH_MIN_SIZE) - dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n", - adap->params.sf_size, FLASH_MIN_SIZE); + dev_warn(adap->pdev_dev, "WARNING: Flash Part ID %#x, size %#x < %#x\n", + flashid, adap->params.sf_size, FLASH_MIN_SIZE); return 0; } @@ -8285,7 +8381,7 @@ int t4_prep_adapter(struct adapter *adapter) get_pci_mode(adapter, &adapter->params.pci); pl_rev = REV_G(t4_read_reg(adapter, PL_REV_A)); - ret = get_flash_params(adapter); + ret = t4_get_flash_params(adapter); if (ret < 0) { dev_err(adapter->pdev_dev, "error %d identifying flash\n", ret); return ret; From 652faa98ec383c25296fb8493f17060a2c7e3438 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 10 Oct 2017 12:45:02 +0530 Subject: [PATCH 0804/2177] cxgb4: add new T5 pci device id's Add 0x50aa and 0x50ab T5 device id's. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 633e9751a25e..8c22bb8c9fbf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -181,6 +181,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */ CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */ CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */ + CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */ /* T6 adapters: */ From ee83f77645332ce86863e5cef8dd3372b8ee4b87 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 10 Oct 2017 16:42:03 +0800 Subject: [PATCH 0805/2177] net: hns3: fixes the ring index in hns3_fini_ring This patch fixes the ring index in hns3_fini_ring. Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 26bbc91add65..acb82cfc1b9a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2661,7 +2661,7 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv) out_when_alloc_ring_memory: for (j = i - 1; j >= 0; j--) - hns3_fini_ring(priv->ring_data[i].ring); + hns3_fini_ring(priv->ring_data[j].ring); return -ENOMEM; } From 5668abda0931c61f823b21b1612e1c77b617a734 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 10 Oct 2017 16:42:04 +0800 Subject: [PATCH 0806/2177] net: hns3: add support for set_ringparam This patch supports the ethtool's set_ringparam(). Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_enet.c | 4 +- .../hisilicon/hns3/hns3pf/hns3_enet.h | 4 + .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 75 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index acb82cfc1b9a..ba550c1b5b01 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2637,7 +2637,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring) } } -static int hns3_init_all_ring(struct hns3_nic_priv *priv) +int hns3_init_all_ring(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; int ring_num = h->kinfo.num_tqps * 2; @@ -2666,7 +2666,7 @@ out_when_alloc_ring_memory: return -ENOMEM; } -static int hns3_uninit_all_ring(struct hns3_nic_priv *priv) +int hns3_uninit_all_ring(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; int i; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index dd8d40ca1dcc..66599890b4d4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -76,6 +76,8 @@ enum hns3_nic_state { #define HNS3_RING_NAME_LEN 16 #define HNS3_BUFFER_SIZE_2048 2048 #define HNS3_RING_MAX_PENDING 32768 +#define HNS3_RING_MIN_PENDING 8 +#define HNS3_RING_BD_MULTIPLE 8 #define HNS3_MAX_MTU 9728 #define HNS3_BD_SIZE_512_TYPE 0 @@ -593,6 +595,8 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) void hns3_ethtool_set_ops(struct net_device *netdev); int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); +int hns3_init_all_ring(struct hns3_nic_priv *priv); +int hns3_uninit_all_ring(struct hns3_nic_priv *priv); #ifdef CONFIG_HNS3_DCB void hns3_dcbnl_setup(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 060bacebf86a..1c5d003ecf29 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -459,10 +459,85 @@ static int hns3_get_rxnfc(struct net_device *netdev, return 0; } +int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, u32 new_desc_num) +{ + struct hnae3_handle *h = priv->ae_handle; + int i; + + h->kinfo.num_desc = new_desc_num; + + for (i = 0; i < h->kinfo.num_tqps * 2; i++) + priv->ring_data[i].ring->desc_num = new_desc_num; + + return hns3_init_all_ring(priv); +} + +int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + bool if_running = netif_running(ndev); + u32 old_desc_num, new_desc_num; + int ret; + + if (param->rx_mini_pending || param->rx_jumbo_pending) + return -EINVAL; + + if (param->tx_pending != param->rx_pending) { + netdev_err(ndev, + "Descriptors of tx and rx must be equal"); + return -EINVAL; + } + + if (param->tx_pending > HNS3_RING_MAX_PENDING || + param->tx_pending < HNS3_RING_MIN_PENDING) { + netdev_err(ndev, + "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n", + param->tx_pending, HNS3_RING_MIN_PENDING, + HNS3_RING_MAX_PENDING); + return -EINVAL; + } + + new_desc_num = param->tx_pending; + + /* Hardware requires that its descriptors must be multiple of eight */ + new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE); + old_desc_num = h->kinfo.num_desc; + if (old_desc_num == new_desc_num) + return 0; + + netdev_info(ndev, + "Changing descriptor count from %d to %d.\n", + old_desc_num, new_desc_num); + + if (if_running) + dev_close(ndev); + + ret = hns3_uninit_all_ring(priv); + if (ret) + return ret; + + ret = hns3_change_all_ring_bd_num(priv, new_desc_num); + if (ret) { + ret = hns3_change_all_ring_bd_num(priv, old_desc_num); + if (ret) { + netdev_err(ndev, + "Revert to old bd num fail, ret=%d.\n", ret); + return ret; + } + } + + if (if_running) + ret = dev_open(ndev); + + return ret; +} + static const struct ethtool_ops hns3_ethtool_ops = { .get_drvinfo = hns3_get_drvinfo, .get_link = hns3_get_link, .get_ringparam = hns3_get_ringparam, + .set_ringparam = hns3_set_ringparam, .get_pauseparam = hns3_get_pauseparam, .get_strings = hns3_get_strings, .get_ethtool_stats = hns3_get_stats, From f7db940afc0a70f72ffcb6bb9c0ad15e6c5349c1 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 10 Oct 2017 16:42:05 +0800 Subject: [PATCH 0807/2177] net: hns3: add support for set_rxnfc This patch supports the ethtool's set_rxnfc(). Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 + .../hisilicon/hns3/hns3pf/hclge_cmd.c | 9 ++ .../hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../hisilicon/hns3/hns3pf/hclge_main.c | 95 ++++++++++++++++++- .../hisilicon/hns3/hns3pf/hclge_main.h | 8 ++ .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 16 ++++ 6 files changed, 129 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c677530841cf..d952d6213024 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -339,6 +339,8 @@ struct hnae3_ae_ops { u8 *hfunc); int (*set_rss)(struct hnae3_handle *handle, const u32 *indir, const u8 *key, const u8 hfunc); + int (*set_rss_tuple)(struct hnae3_handle *handle, + struct ethtool_rxnfc *cmd); int (*get_tc_size)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 8ecd80744767..60960e588b5f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -85,6 +85,15 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type) return 0; } +void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read) +{ + desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); + if (is_read) + desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR); + else + desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); +} + void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 8f3ba02aea3c..b4373345c2b4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -739,6 +739,7 @@ struct hclge_hw; int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num); void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read); +void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read); int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, struct hclge_promisc_param *param); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c91c779aeeed..5b5e52c7fde0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2595,8 +2595,6 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid, static int hclge_set_rss_input_tuple(struct hclge_dev *hdev) { -#define HCLGE_RSS_INPUT_TUPLE_OTHER 0xf -#define HCLGE_RSS_INPUT_TUPLE_SCTP 0x1f struct hclge_rss_input_tuple_cmd *req; struct hclge_desc desc; int ret; @@ -2677,6 +2675,98 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, return ret; } +static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc) +{ + u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGE_S_PORT_BIT : 0; + + if (nfc->data & RXH_L4_B_2_3) + hash_sets |= HCLGE_D_PORT_BIT; + else + hash_sets &= ~HCLGE_D_PORT_BIT; + + if (nfc->data & RXH_IP_SRC) + hash_sets |= HCLGE_S_IP_BIT; + else + hash_sets &= ~HCLGE_S_IP_BIT; + + if (nfc->data & RXH_IP_DST) + hash_sets |= HCLGE_D_IP_BIT; + else + hash_sets &= ~HCLGE_D_IP_BIT; + + if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW) + hash_sets |= HCLGE_V_TAG_BIT; + + return hash_sets; +} + +static int hclge_set_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_rss_input_tuple_cmd *req; + struct hclge_desc desc; + u8 tuple_sets; + int ret; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + req = (struct hclge_rss_input_tuple_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Read rss tuple fail, status = %d\n", ret); + return ret; + } + + hclge_cmd_reuse_desc(&desc, false); + + tuple_sets = hclge_get_rss_hash_bits(nfc); + switch (nfc->flow_type) { + case TCP_V4_FLOW: + req->ipv4_tcp_en = tuple_sets; + break; + case TCP_V6_FLOW: + req->ipv6_tcp_en = tuple_sets; + break; + case UDP_V4_FLOW: + req->ipv4_udp_en = tuple_sets; + break; + case UDP_V6_FLOW: + req->ipv6_udp_en = tuple_sets; + break; + case SCTP_V4_FLOW: + req->ipv4_sctp_en = tuple_sets; + break; + case SCTP_V6_FLOW: + if ((nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + + req->ipv6_sctp_en = tuple_sets; + break; + case IPV4_FLOW: + req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + break; + case IPV6_FLOW: + req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + break; + default: + return -EINVAL; + } + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "Set rss tuple fail, status = %d\n", ret); + + return ret; +} + static int hclge_get_tc_size(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -4344,6 +4434,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rss_indir_size = hclge_get_rss_indir_size, .get_rss = hclge_get_rss, .set_rss = hclge_set_rss, + .set_rss_tuple = hclge_set_rss_tuple, .get_tc_size = hclge_get_tc_size, .get_mac_addr = hclge_get_mac_addr, .set_mac_addr = hclge_set_mac_addr, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 79c1a06cb941..a7c018c7b0ec 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -41,6 +41,14 @@ #define HCLGE_RSS_CFG_TBL_NUM \ (HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE) +#define HCLGE_RSS_INPUT_TUPLE_OTHER GENMASK(3, 0) +#define HCLGE_RSS_INPUT_TUPLE_SCTP GENMASK(4, 0) +#define HCLGE_D_PORT_BIT BIT(0) +#define HCLGE_S_PORT_BIT BIT(1) +#define HCLGE_D_IP_BIT BIT(2) +#define HCLGE_S_IP_BIT BIT(3) +#define HCLGE_V_TAG_BIT BIT(4) + #define HCLGE_RSS_TC_SIZE_0 1 #define HCLGE_RSS_TC_SIZE_1 2 #define HCLGE_RSS_TC_SIZE_2 4 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 1c5d003ecf29..f0e88e00a1f6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -533,6 +533,21 @@ int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param) return ret; } +static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple) + return -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return h->ae_algo->ops->set_rss_tuple(h, cmd); + default: + return -EOPNOTSUPP; + } +} + static const struct ethtool_ops hns3_ethtool_ops = { .get_drvinfo = hns3_get_drvinfo, .get_link = hns3_get_link, @@ -543,6 +558,7 @@ static const struct ethtool_ops hns3_ethtool_ops = { .get_ethtool_stats = hns3_get_stats, .get_sset_count = hns3_get_sset_count, .get_rxnfc = hns3_get_rxnfc, + .set_rxnfc = hns3_set_rxnfc, .get_rxfh_key_size = hns3_get_rss_key_size, .get_rxfh_indir_size = hns3_get_rss_indir_size, .get_rxfh = hns3_get_rss, From 07d2995425eb8eb4874b94bf62fb1490a2014d76 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 10 Oct 2017 16:42:06 +0800 Subject: [PATCH 0808/2177] net: hns3: add support for ETHTOOL_GRXFH This patch add support for ethtool's ETHTOOL_GRXFH in hns3_get_rxnfc(). Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 + .../hisilicon/hns3/hns3pf/hclge_main.c | 64 +++++++++++++++++++ .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 2 + 3 files changed, 68 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d952d6213024..575f50df340c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -341,6 +341,8 @@ struct hnae3_ae_ops { const u8 *key, const u8 hfunc); int (*set_rss_tuple)(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd); + int (*get_rss_tuple)(struct hnae3_handle *handle, + struct ethtool_rxnfc *cmd); int (*get_tc_size)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 5b5e52c7fde0..c322b4534148 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2767,6 +2767,69 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, return ret; } +static int hclge_get_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_rss_input_tuple_cmd *req; + struct hclge_desc desc; + u8 tuple_sets; + int ret; + + nfc->data = 0; + + req = (struct hclge_rss_input_tuple_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Read rss tuple fail, status = %d\n", ret); + return ret; + } + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + tuple_sets = req->ipv4_tcp_en; + break; + case UDP_V4_FLOW: + tuple_sets = req->ipv4_udp_en; + break; + case TCP_V6_FLOW: + tuple_sets = req->ipv6_tcp_en; + break; + case UDP_V6_FLOW: + tuple_sets = req->ipv6_udp_en; + break; + case SCTP_V4_FLOW: + tuple_sets = req->ipv4_sctp_en; + break; + case SCTP_V6_FLOW: + tuple_sets = req->ipv6_sctp_en; + break; + case IPV4_FLOW: + case IPV6_FLOW: + tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT; + break; + default: + return -EINVAL; + } + + if (!tuple_sets) + return 0; + + if (tuple_sets & HCLGE_D_PORT_BIT) + nfc->data |= RXH_L4_B_2_3; + if (tuple_sets & HCLGE_S_PORT_BIT) + nfc->data |= RXH_L4_B_0_1; + if (tuple_sets & HCLGE_D_IP_BIT) + nfc->data |= RXH_IP_DST; + if (tuple_sets & HCLGE_S_IP_BIT) + nfc->data |= RXH_IP_SRC; + + return 0; +} + static int hclge_get_tc_size(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -4435,6 +4498,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rss = hclge_get_rss, .set_rss = hclge_set_rss, .set_rss_tuple = hclge_set_rss_tuple, + .get_rss_tuple = hclge_get_rss_tuple, .get_tc_size = hclge_get_tc_size, .get_mac_addr = hclge_get_mac_addr, .set_mac_addr = hclge_set_mac_addr, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index f0e88e00a1f6..b64fbd3c369a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -452,6 +452,8 @@ static int hns3_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: cmd->data = h->ae_algo->ops->get_tc_size(h); break; + case ETHTOOL_GRXFH: + return h->ae_algo->ops->get_rss_tuple(h, cmd); default: return -EOPNOTSUPP; } From abf11d04fd9dbec778219d76a9d38c36f65fbc24 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 10 Oct 2017 16:42:07 +0800 Subject: [PATCH 0809/2177] net: hns3: fix the ring count for ETHTOOL_GRXRINGS This patch fix the ring count for ETHTOOL_GRXRINGS. Ring count not TC size should be return for command "ethtool -n ethx". Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index b64fbd3c369a..9b36ce081f62 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -450,7 +450,7 @@ static int hns3_get_rxnfc(struct net_device *netdev, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = h->ae_algo->ops->get_tc_size(h); + cmd->data = h->kinfo.num_tc * h->kinfo.rss_size; break; case ETHTOOL_GRXFH: return h->ae_algo->ops->get_rss_tuple(h, cmd); From 7e1dc5e926d57a5bc4ac97d6e061e2fe29c266c0 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Tue, 10 Oct 2017 12:28:33 +0300 Subject: [PATCH 0810/2177] net/mlx4_en: Limit the number of TX rings Limit the number of TX rings per UP by the number of cores in the system. Signed-off-by: Inbar Karmy Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3d4e4a5d00d1..e9432bc1c1bc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1748,7 +1748,7 @@ static void mlx4_en_get_channels(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); channel->max_rx = MAX_RX_RINGS; - channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; + channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up; channel->rx_count = priv->rx_ring_num; channel->tx_count = priv->tx_ring_num[TX] / @@ -1777,7 +1777,7 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; if (channel->tx_count * priv->prof->num_up + xdp_count > - MAX_TX_RINGS) { + priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 686e18de9a97..2c2965497ed3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -153,7 +153,7 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) int i; params->udp_rss = udp_rss; - params->num_tx_rings_p_up = mlx4_low_memory_profile() ? + params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ? MLX4_EN_MIN_TX_RING_P_UP : min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP); @@ -170,8 +170,8 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; - params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up; - params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * + params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up; + params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up * params->prof[i].num_up; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9c218f1cfc6c..e4c7a80ef5a8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3305,7 +3305,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME; priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); - priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; + priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up; priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fdb3ad0cbe54..245e9ea09ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -399,7 +399,7 @@ struct mlx4_en_profile { u32 active_ports; u32 small_pkt_int; u8 no_reset; - u8 num_tx_rings_p_up; + u8 max_num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; From b8d394367a631c2d749b3114e04dfb4d09624ddf Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Tue, 10 Oct 2017 12:28:34 +0300 Subject: [PATCH 0811/2177] net/mlx4_en: Limit the number of RX rings Limit the number of RX rings by the number of cores in the system. Signed-off-by: Inbar Karmy Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index e9432bc1c1bc..bf1f04164885 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1742,12 +1742,17 @@ static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } +static int mlx4_en_get_max_num_rx_rings(struct net_device *dev) +{ + return min_t(int, num_online_cpus(), MAX_RX_RINGS); +} + static void mlx4_en_get_channels(struct net_device *dev, struct ethtool_channels *channel) { struct mlx4_en_priv *priv = netdev_priv(dev); - channel->max_rx = MAX_RX_RINGS; + channel->max_rx = mlx4_en_get_max_num_rx_rings(dev); channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up; channel->rx_count = priv->rx_ring_num; From 80a8dc75ee674a111fea2ae1d02ff96535a309c2 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Tue, 10 Oct 2017 12:28:35 +0300 Subject: [PATCH 0812/2177] net/mlx4_en: Increase number of default RX rings Remove limitation of netif_get_num_default_rss_queues() from logic of RX rings default number. Signed-off-by: Inbar Karmy Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 8f9cb8abc497..a7866954d106 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -254,8 +254,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : - min_t(int, num_of_eqs, - netif_get_num_default_rss_queues()); + min_t(int, num_of_eqs, num_online_cpus()); mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(num_rx_rings); } From 8f88f74a1641f57607a2417669e0e02b768796b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Oct 2017 16:18:05 +0200 Subject: [PATCH 0813/2177] selftests: rtnetlink: test RTM_GETNETCONF exercise RTM_GETNETCONF call path for unspec, inet and inet6 families, they are DOIT_UNLOCKED candidates. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e8c86c416ed0..a8a8cdf726b2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -37,6 +37,26 @@ kci_del_dummy() check_err $? } +kci_test_netconf() +{ + dev="$1" + r=$ret + + ip netconf show dev "$dev" > /dev/null + check_err $? + + for f in 4 6; do + ip -$f netconf show dev "$dev" > /dev/null + check_err $? + done + + if [ $ret -ne 0 ] ;then + echo "FAIL: ip netconf show $dev" + test $r -eq 0 && ret=0 + return 1 + fi +} + # add a bridge with vlans on top kci_test_bridge() { @@ -63,6 +83,11 @@ kci_test_bridge() check_err $? ip r s t all > /dev/null check_err $? + + for name in "$devbr" "$vlandev" "$devdummy" ; do + kci_test_netconf "$name" + done + ip -6 addr del dev "$vlandev" dead:42::1234/64 check_err $? @@ -100,6 +125,9 @@ kci_test_gre() check_err $? ip addr > /dev/null check_err $? + + kci_test_netconf "$gredev" + ip addr del dev "$devdummy" 10.23.7.11/24 check_err $? From b88d12e4a4ff5b99dcaf9b35a30ad2597cb2fd8a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Oct 2017 17:10:04 +0200 Subject: [PATCH 0814/2177] rtnetlink: bridge: use ext_ack instead of printk We can now piggyback error strings to userspace via extended acks rather than using printk. Before: bridge fdb add 01:02:03:04:05:06 dev br0 vlan 4095 RTNETLINK answers: Invalid argument After: bridge fdb add 01:02:03:04:05:06 dev br0 vlan 4095 Error: invalid vlan id. v3: drop 'RTM_' prefixes, suggested by David Ahern, they are not useful, the add/del in bridge command line is enough. Also reword error in response to malformed/bad vlan id attribute size. Cc: David Ahern Signed-off-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e84d108cfee4..6a09f3d575af 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3066,21 +3066,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_add); -static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) +static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid, + struct netlink_ext_ack *extack) { u16 vid = 0; if (vlan_attr) { if (nla_len(vlan_attr) != sizeof(u16)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n"); + NL_SET_ERR_MSG(extack, "invalid vlan attribute size"); return -EINVAL; } vid = nla_get_u16(vlan_attr); if (!vid || vid >= VLAN_VID_MASK) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n", - vid); + NL_SET_ERR_MSG(extack, "invalid vlan id"); return -EINVAL; } } @@ -3105,24 +3105,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3209,24 +3209,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3666,7 +3666,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } @@ -3741,7 +3741,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } From 833e0e2f24fd0525090878f71e129a8a4cb8bf78 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2017 15:05:39 -0700 Subject: [PATCH 0815/2177] net: dst: move cpu inside ifdef to avoid compilation warning If CONFIG_DST_CACHE is not selected cpu variable will be unused and we will see a compilation warning. Move it under the ifdef. Reported-by: kbuild test robot Fixes: d66f2b91f95b ("bpf: don't rely on the verifier lock for metadata_dst allocation") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index 8b2eafac984d..662a2d4a3d19 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -325,9 +325,9 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) { +#ifdef CONFIG_DST_CACHE int cpu; -#ifdef CONFIG_DST_CACHE for_each_possible_cpu(cpu) { struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); From b8226962b1c49c784aeddb9d2fafbf53dfdc2190 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Tue, 10 Oct 2017 16:54:44 -0400 Subject: [PATCH 0816/2177] openvswitch: add ct_clear action This adds a ct_clear action for clearing conntrack state. ct_clear is currently implemented in OVS userspace, but is not backed by an action in the kernel datapath. This is useful for flows that may modify a packet tuple after a ct lookup has already occurred. Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 ++ net/openvswitch/actions.c | 4 ++++ net/openvswitch/conntrack.c | 11 +++++++++++ net/openvswitch/conntrack.h | 7 +++++++ net/openvswitch/flow_netlink.c | 5 +++++ 5 files changed, 29 insertions(+) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efdbfbfd3ee2..0cd6f8833147 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -807,6 +807,7 @@ struct ovs_action_push_eth { * packet. * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the * packet. + * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -836,6 +837,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ OVS_ACTION_ATTR_POP_ETH, /* No argument. */ + OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a54a556fcdb5..a551232daf61 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1203,6 +1203,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err == -EINPROGRESS ? 0 : err; break; + case OVS_ACTION_ATTR_CT_CLEAR: + err = ovs_ct_clear(skb, key); + break; + case OVS_ACTION_ATTR_PUSH_ETH: err = push_eth(skb, key, nla_data(a)); break; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index d558e882ca0c..fe861e2f0deb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1129,6 +1129,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, return err; } +int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) +{ + if (skb_nfct(skb)) { + nf_conntrack_put(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + ovs_ct_fill_key(skb, key); + } + + return 0; +} + static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, const struct sw_flow_key *key, bool log) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index bc7efd1867ab..399dfdd2c4f9 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *); int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, const struct ovs_conntrack_info *); +int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key); void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *swkey, @@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, return -ENOTSUPP; } +static inline int ovs_ct_clear(struct sk_buff *skb, + struct sw_flow_key *key) +{ + return -ENOTSUPP; +} + static inline void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fc0ca9a89b8e..dc0d79092e74 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -76,6 +76,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) break; case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_CT_CLEAR: case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: @@ -2528,6 +2529,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, + [OVS_ACTION_ATTR_CT_CLEAR] = 0, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, @@ -2669,6 +2671,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_CT_CLEAR: + break; + case OVS_ACTION_ATTR_PUSH_ETH: /* Disallow pushing an Ethernet header if one * is already present */ From 8c03145a2e2d98d44dba4bf16ab34636eb60b834 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Oct 2017 09:57:59 +0200 Subject: [PATCH 0817/2177] MAINTAINERS: update Johannes Berg's entries Update my MAINTAINERS file entries to list all the right files. Since I'm also the de-facto wireless extensions maintainer, there's little point in excluding those. Signed-off-by: Johannes Berg --- MAINTAINERS | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f0c37be4e04a..e90cdecd7b5d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3329,17 +3329,22 @@ S: Maintained F: drivers/auxdisplay/cfag12864bfb.c F: include/linux/cfag12864b.h -CFG80211 and NL80211 +802.11 (including CFG80211/NL80211) M: Johannes Berg L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained +F: net/wireless/ F: include/uapi/linux/nl80211.h +F: include/linux/ieee80211.h +F: include/net/wext.h F: include/net/cfg80211.h -F: net/wireless/* -X: net/wireless/wext* +F: include/net/iw_handler.h +F: include/net/ieee80211_radiotap.h +F: Documentation/driver-api/80211/cfg80211.rst +F: Documentation/networking/regulatory.txt CHAR and MISC DRIVERS M: Arnd Bergmann @@ -8207,6 +8212,7 @@ F: Documentation/networking/mac80211-injection.txt F: include/net/mac80211.h F: net/mac80211/ F: drivers/net/wireless/mac80211_hwsim.[ch] +F: Documentation/networking/mac80211_hwsim/README MAILBOX API M: Jassi Brar @@ -11491,6 +11497,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: Documentation/rfkill.txt +F: Documentation/ABI/stable/sysfs-class-rfkill F: net/rfkill/ RHASHTABLE From 4133da73067af0417c623eb4ad5e85081ccbf4b4 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Tue, 10 Oct 2017 22:31:49 -0400 Subject: [PATCH 0818/2177] mac80211: aead api to reduce redundancy Currently, the aes_ccm.c and aes_gcm.c are almost line by line copy of each other. This patch reduce code redundancy by moving the code in these two files to crypto/aead_api.c to make it a higher level aead api. The file aes_ccm.c and aes_gcm.c are removed and all the functions there are now implemented in their headers using the newly added aead api. Signed-off-by: Xiang Gao Signed-off-by: Johannes Berg --- net/mac80211/Makefile | 3 +- net/mac80211/{aes_ccm.c => aead_api.c} | 40 ++++----- net/mac80211/aead_api.h | 27 ++++++ net/mac80211/aes_ccm.h | 42 +++++++--- net/mac80211/aes_gcm.c | 109 ------------------------- net/mac80211/aes_gcm.h | 38 +++++++-- net/mac80211/wpa.c | 4 +- 7 files changed, 111 insertions(+), 152 deletions(-) rename net/mac80211/{aes_ccm.c => aead_api.c} (67%) create mode 100644 net/mac80211/aead_api.h delete mode 100644 net/mac80211/aes_gcm.c diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 282912245938..80f25ff2f24b 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -6,6 +6,7 @@ mac80211-y := \ driver-ops.o \ sta_info.o \ wep.o \ + aead_api.o \ wpa.o \ scan.o offchannel.o \ ht.o agg-tx.o agg-rx.o \ @@ -15,8 +16,6 @@ mac80211-y := \ rate.o \ michael.o \ tkip.o \ - aes_ccm.o \ - aes_gcm.o \ aes_cmac.o \ aes_gmac.o \ fils_aead.o \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c similarity index 67% rename from net/mac80211/aes_ccm.c rename to net/mac80211/aead_api.c index a4e0d59a40dd..347f13953b2c 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aead_api.c @@ -1,6 +1,7 @@ /* * Copyright 2003-2004, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2014-2015, Qualcomm Atheros, Inc. * * Rewrite: Copyright (C) 2013 Linaro Ltd * @@ -12,30 +13,29 @@ #include #include #include +#include #include -#include -#include "key.h" -#include "aes_ccm.h" +#include "aead_api.h" -int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, + u8 *data, size_t data_len, u8 *mic) { + size_t mic_len = tfm->authsize; struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); u8 *__aad; - aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC); if (!aead_req) return -ENOMEM; __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, CCM_AAD_LEN); + memcpy(__aad, aad, aad_len); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); + sg_set_buf(&sg[0], __aad, aad_len); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, return 0; } -int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, + u8 *data, size_t data_len, u8 *mic) { + size_t mic_len = tfm->authsize; struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); @@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, if (data_len == 0) return -EINVAL; - aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC); if (!aead_req) return -ENOMEM; __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, CCM_AAD_LEN); + memcpy(__aad, aad, aad_len); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); + sg_set_buf(&sg[0], __aad, aad_len); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, return err; } -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], - size_t key_len, - size_t mic_len) +struct crypto_aead * +aead_key_setup_encrypt(const char *alg, const u8 key[], + size_t key_len, size_t mic_len) { struct crypto_aead *tfm; int err; - tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) return tfm; @@ -109,7 +109,7 @@ free_aead: return ERR_PTR(err); } -void ieee80211_aes_key_free(struct crypto_aead *tfm) +void aead_key_free(struct crypto_aead *tfm) { crypto_free_aead(tfm); } diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h new file mode 100644 index 000000000000..5e39ea843bbf --- /dev/null +++ b/net/mac80211/aead_api.h @@ -0,0 +1,27 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _AEAD_API_H +#define _AEAD_API_H + +#include +#include + +struct crypto_aead * +aead_key_setup_encrypt(const char *alg, const u8 key[], + size_t key_len, size_t mic_len); + +int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + size_t aad_len, u8 *data, + size_t data_len, u8 *mic); + +int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + size_t aad_len, u8 *data, + size_t data_len, u8 *mic); + +void aead_key_free(struct crypto_aead *tfm); + +#endif /* _AEAD_API_H */ diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index fcd3254c5cf0..e9b7ca0bde5b 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -10,19 +10,39 @@ #ifndef AES_CCM_H #define AES_CCM_H -#include +#include "aead_api.h" #define CCM_AAD_LEN 32 -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], - size_t key_len, - size_t mic_len); -int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); -int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); -void ieee80211_aes_key_free(struct crypto_aead *tfm); +static inline struct crypto_aead * +ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len) +{ + return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len); +} + +static inline int +ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, + u8 *b_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_encrypt(tfm, b_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline int +ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, + u8 *b_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_decrypt(tfm, b_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline void ieee80211_aes_key_free(struct crypto_aead *tfm) +{ + return aead_key_free(tfm); +} #endif /* AES_CCM_H */ diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c deleted file mode 100644 index 8a4397cc1b08..000000000000 --- a/net/mac80211/aes_gcm.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2014-2015, Qualcomm Atheros, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include -#include "key.h" -#include "aes_gcm.h" - -int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) -{ - struct scatterlist sg[3]; - struct aead_request *aead_req; - int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); - u8 *__aad; - - aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); - if (!aead_req) - return -ENOMEM; - - __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, GCM_AAD_LEN); - - sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); - sg_set_buf(&sg[1], data, data_len); - sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); - - aead_request_set_tfm(aead_req, tfm); - aead_request_set_crypt(aead_req, sg, sg, data_len, j_0); - aead_request_set_ad(aead_req, sg[0].length); - - crypto_aead_encrypt(aead_req); - kzfree(aead_req); - return 0; -} - -int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) -{ - struct scatterlist sg[3]; - struct aead_request *aead_req; - int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); - u8 *__aad; - int err; - - if (data_len == 0) - return -EINVAL; - - aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); - if (!aead_req) - return -ENOMEM; - - __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, GCM_AAD_LEN); - - sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); - sg_set_buf(&sg[1], data, data_len); - sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); - - aead_request_set_tfm(aead_req, tfm); - aead_request_set_crypt(aead_req, sg, sg, - data_len + IEEE80211_GCMP_MIC_LEN, j_0); - aead_request_set_ad(aead_req, sg[0].length); - - err = crypto_aead_decrypt(aead_req); - kzfree(aead_req); - - return err; -} - -struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], - size_t key_len) -{ - struct crypto_aead *tfm; - int err; - - tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return tfm; - - err = crypto_aead_setkey(tfm, key, key_len); - if (err) - goto free_aead; - err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); - if (err) - goto free_aead; - - return tfm; - -free_aead: - crypto_free_aead(tfm); - return ERR_PTR(err); -} - -void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) -{ - crypto_free_aead(tfm); -} diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h index 55aed5352494..d2b096033009 100644 --- a/net/mac80211/aes_gcm.h +++ b/net/mac80211/aes_gcm.h @@ -9,16 +9,38 @@ #ifndef AES_GCM_H #define AES_GCM_H -#include +#include "aead_api.h" #define GCM_AAD_LEN 32 -int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); -int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); -struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], - size_t key_len); -void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); +static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, + u8 *j_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_encrypt(tfm, j_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, + u8 *j_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_decrypt(tfm, j_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline struct crypto_aead * +ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len) +{ + return aead_key_setup_encrypt("gcm(aes)", key, + key_len, IEEE80211_GCMP_MIC_LEN); +} + +static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) +{ + return aead_key_free(tfm); +} #endif /* AES_GCM_H */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 0d722ea98a1b..b58722d9de37 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, mic_len), mic_len); + skb_put(skb, mic_len)); } @@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, key->u.ccmp.tfm, b_0, aad, skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, data_len, - skb->data + skb->len - mic_len, mic_len)) + skb->data + skb->len - mic_len)) return RX_DROP_UNUSABLE; } From 8c418b5b15747eda05d086e80fa0a767982fbf37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Oct 2017 11:53:32 +0200 Subject: [PATCH 0819/2177] fq: support filtering a given tin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add to the FQ API a way to filter a given tin, in order to remove frames that fulfil certain criteria according to a filter function. This will be used by mac80211 to remove frames belonging to an AP VLAN interface that's being removed. Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq.h | 7 +++++ include/net/fq_impl.h | 72 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/include/net/fq.h b/include/net/fq.h index 6d8521a30c5c..ac944a686840 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -90,6 +90,13 @@ typedef void fq_skb_free_t(struct fq *, struct fq_flow *, struct sk_buff *); +/* Return %true to filter (drop) the frame. */ +typedef bool fq_skb_filter_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *, + void *); + typedef struct fq_flow *fq_flow_get_default_t(struct fq *, struct fq_tin *, int idx, diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 4e6131cd3f43..8b237e4afee6 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -12,24 +12,22 @@ /* functions that are embedded into includer */ -static struct sk_buff *fq_flow_dequeue(struct fq *fq, - struct fq_flow *flow) +static void fq_adjust_removal(struct fq *fq, + struct fq_flow *flow, + struct sk_buff *skb) { struct fq_tin *tin = flow->tin; - struct fq_flow *i; - struct sk_buff *skb; - - lockdep_assert_held(&fq->lock); - - skb = __skb_dequeue(&flow->queue); - if (!skb) - return NULL; tin->backlog_bytes -= skb->len; tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; fq->memory_usage -= skb->truesize; +} + +static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +{ + struct fq_flow *i; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -43,6 +41,21 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, list_move_tail(&flow->backlogchain, &i->backlogchain); } +} + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + fq_adjust_removal(fq, flow, skb); + fq_rejigger_backlog(fq, flow); return skb; } @@ -188,6 +201,45 @@ static void fq_tin_enqueue(struct fq *fq, } } +static void fq_flow_filter(struct fq *fq, + struct fq_flow *flow, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_tin *tin = flow->tin; + struct sk_buff *skb, *tmp; + + lockdep_assert_held(&fq->lock); + + skb_queue_walk_safe(&flow->queue, skb, tmp) { + if (!filter_func(fq, tin, flow, skb, filter_data)) + continue; + + __skb_unlink(skb, &flow->queue); + fq_adjust_removal(fq, flow, skb); + free_func(fq, tin, flow, skb); + } + + fq_rejigger_backlog(fq, flow); +} + +static void fq_tin_filter(struct fq *fq, + struct fq_tin *tin, + fq_skb_filter_t filter_func, + void *filter_data, + fq_skb_free_t free_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + list_for_each_entry(flow, &tin->new_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); + list_for_each_entry(flow, &tin->old_flows, flowchain) + fq_flow_filter(fq, flow, filter_func, filter_data, free_func); +} + static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) From 2a9e25796b289f71c0802eca46005c750c57af95 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Oct 2017 11:53:33 +0200 Subject: [PATCH 0820/2177] mac80211: only remove AP VLAN frames from TXQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When removing an AP VLAN interface, mac80211 currently purges the entire TXQ for the AP interface. Fix this by using the FQ API introduced in the previous patch to filter frames. Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 25 +++---------------------- net/mac80211/tx.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9675814f64db..68f874e73561 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2009,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct txq_info *txq, int tid); void ieee80211_txq_purge(struct ieee80211_local *local, struct txq_info *txqi); +void ieee80211_txq_remove_vlan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *bssid, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2619daa29961..13b16f90e1cf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -793,9 +793,7 @@ static int ieee80211_open(struct net_device *dev) static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { - struct ieee80211_sub_if_data *txq_sdata = sdata; struct ieee80211_local *local = sdata->local; - struct fq *fq = &local->fq; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; @@ -939,9 +937,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - txq_sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, u.ap); - mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); mutex_unlock(&local->mtx); @@ -998,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, skb_queue_purge(&sdata->skb_queue); } - sdata->bss = NULL; - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_walk_safe(&local->pending[i], skb, tmp) { @@ -1012,22 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (txq_sdata->vif.txq) { - struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + ieee80211_txq_remove_vlan(local, sdata); - /* - * FIXME FIXME - * - * We really shouldn't purge the *entire* txqi since that - * contains frames for the other AP_VLANs (and possibly - * the AP itself) as well, but there's no API in FQ now - * to be able to filter. - */ - - spin_lock_bh(&fq->lock); - ieee80211_txq_purge(local, txqi); - spin_unlock_bh(&fq->lock); - } + sdata->bss = NULL; if (local->open_count == 0) ieee80211_clear_tx_pending(local); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 94826680cf2b..7b8154474b9e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1396,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, fq_flow_get_default_func); } +static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin, + struct fq_flow *flow, struct sk_buff *skb, + void *data) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + return info->control.vif == data; +} + +void ieee80211_txq_remove_vlan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + struct fq *fq = &local->fq; + struct txq_info *txqi; + struct fq_tin *tin; + struct ieee80211_sub_if_data *ap; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) + return; + + ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + + if (!ap->vif.txq) + return; + + txqi = to_txq_info(ap->vif.txq); + tin = &txqi->tin; + + spin_lock_bh(&fq->lock); + fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif, + fq_skb_free_func); + spin_unlock_bh(&fq->lock); +} + void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct txq_info *txqi, int tid) From 007f6c5e6eb45c81ee89368a5f226572ae638831 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Oct 2015 11:22:58 +0200 Subject: [PATCH 0821/2177] cfg80211: support loading regulatory database as firmware file As the current regulatory database is only about 4k big, and already difficult to extend, we decided that overall it would be better to get rid of the complications with CRDA and load the database into the kernel directly, but in a new format that is extensible. The new file format can be extended since it carries a length field on all the structs that need to be extensible. In order to be able to request firmware when the module initializes, move cfg80211 from subsys_initcall() to the later fs_initcall(); the firmware loader is at the same level but linked earlier, so it can be called from there. Otherwise, when both the firmware loader and cfg80211 are built-in, the request will crash the kernel. We also need to be before device_initcall() so that cfg80211 is available for devices when they initialize. Signed-off-by: Johannes Berg --- Documentation/networking/regulatory.txt | 8 + net/wireless/Kconfig | 4 +- net/wireless/core.c | 2 +- net/wireless/reg.c | 294 ++++++++++++++++++++++-- 4 files changed, 284 insertions(+), 24 deletions(-) diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt index 7818b5fe448b..46c8d8b1cc66 100644 --- a/Documentation/networking/regulatory.txt +++ b/Documentation/networking/regulatory.txt @@ -19,6 +19,14 @@ core regulatory domain all wireless devices should adhere to. How to get regulatory domains to the kernel ------------------------------------------- +When the regulatory domain is first set up, the kernel will request a +database file (regulatory.db) containing all the regulatory rules. It +will then use that database when it needs to look up the rules for a +given country. + +How to get regulatory domains to the kernel (old CRDA solution) +--------------------------------------------------------------- + Userspace gets a regulatory domain in the kernel by having a userspace agent build it and send it via nl80211. Only expected regulatory domains will be respected by the kernel. diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 6c606120abfe..24eec5516649 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -19,6 +19,7 @@ config WEXT_PRIV config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL + select FW_LOADER ---help--- cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. @@ -167,7 +168,8 @@ config CFG80211_CRDA_SUPPORT depends on CFG80211 help You should enable this option unless you know for sure you have no - need for it, for example when using internal regdb (above.) + need for it, for example when using internal regdb (above) or the + database loaded as a firmware file. If unsure, say Y. diff --git a/net/wireless/core.c b/net/wireless/core.c index 7b33e8c366bc..fdde0d98fde1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1384,7 +1384,7 @@ out_fail_sysfs: out_fail_pernet: return err; } -subsys_initcall(cfg80211_init); +fs_initcall(cfg80211_init); static void __exit cfg80211_exit(void) { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e94f6934a0e..e9aeb05aaf3e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include "core.h" #include "reg.h" @@ -100,7 +101,7 @@ static struct regulatory_request core_request_world = { static struct regulatory_request __rcu *last_request = (void __force __rcu *)&core_request_world; -/* To trigger userspace events */ +/* To trigger userspace events and load firmware */ static struct platform_device *reg_pdev; /* @@ -443,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd) return regd; } -#ifdef CONFIG_CFG80211_INTERNAL_REGDB struct reg_regdb_apply_request { struct list_head list; const struct ieee80211_regdomain *regdom; @@ -475,41 +475,44 @@ static void reg_regdb_apply(struct work_struct *work) static DECLARE_WORK(reg_regdb_work, reg_regdb_apply); -static int reg_query_builtin(const char *alpha2) +static int reg_schedule_apply(const struct ieee80211_regdomain *regdom) { - const struct ieee80211_regdomain *regdom = NULL; struct reg_regdb_apply_request *request; - unsigned int i; - - for (i = 0; i < reg_regdb_size; i++) { - if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) { - regdom = reg_regdb[i]; - break; - } - } - - if (!regdom) - return -ENODATA; request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL); - if (!request) - return -ENOMEM; - - request->regdom = reg_copy_regd(regdom); - if (IS_ERR_OR_NULL(request->regdom)) { - kfree(request); + if (!request) { + kfree(regdom); return -ENOMEM; } + request->regdom = regdom; + mutex_lock(®_regdb_apply_mutex); list_add_tail(&request->list, ®_regdb_apply_list); mutex_unlock(®_regdb_apply_mutex); schedule_work(®_regdb_work); - return 0; } +#ifdef CONFIG_CFG80211_INTERNAL_REGDB +static int reg_query_builtin(const char *alpha2) +{ + const struct ieee80211_regdomain *regdom = NULL; + unsigned int i; + + for (i = 0; i < reg_regdb_size; i++) { + if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) { + regdom = reg_copy_regd(reg_regdb[i]); + break; + } + } + if (!regdom) + return -ENODATA; + + return reg_schedule_apply(regdom); +} + /* Feel free to add any other sanity checks here */ static void reg_regdb_size_check(void) { @@ -599,12 +602,256 @@ static inline int call_crda(const char *alpha2) } #endif /* CONFIG_CFG80211_CRDA_SUPPORT */ +/* code to directly load a firmware database through request_firmware */ +static const struct fwdb_header *regdb; + +struct fwdb_country { + u8 alpha2[2]; + __be16 coll_ptr; + /* this struct cannot be extended */ +} __packed __aligned(4); + +struct fwdb_collection { + u8 len; + u8 n_rules; + u8 dfs_region; + /* no optional data yet */ + /* aligned to 2, then followed by __be16 array of rule pointers */ +} __packed __aligned(4); + +enum fwdb_flags { + FWDB_FLAG_NO_OFDM = BIT(0), + FWDB_FLAG_NO_OUTDOOR = BIT(1), + FWDB_FLAG_DFS = BIT(2), + FWDB_FLAG_NO_IR = BIT(3), + FWDB_FLAG_AUTO_BW = BIT(4), +}; + +struct fwdb_rule { + u8 len; + u8 flags; + __be16 max_eirp; + __be32 start, end, max_bw; + /* start of optional data */ + __be16 cac_timeout; +} __packed __aligned(4); + +#define FWDB_MAGIC 0x52474442 +#define FWDB_VERSION 20 + +struct fwdb_header { + __be32 magic; + __be32 version; + struct fwdb_country country[]; +} __packed __aligned(4); + +static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr) +{ + struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2)); + + if ((u8 *)rule + sizeof(rule->len) > data + size) + return false; + + /* mandatory fields */ + if (rule->len < offsetofend(struct fwdb_rule, max_bw)) + return false; + + return true; +} + +static bool valid_country(const u8 *data, unsigned int size, + const struct fwdb_country *country) +{ + unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; + struct fwdb_collection *coll = (void *)(data + ptr); + __be16 *rules_ptr; + unsigned int i; + + /* make sure we can read len/n_rules */ + if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size) + return false; + + /* make sure base struct and all rules fit */ + if ((u8 *)coll + ALIGN(coll->len, 2) + + (coll->n_rules * 2) > data + size) + return false; + + /* mandatory fields must exist */ + if (coll->len < offsetofend(struct fwdb_collection, dfs_region)) + return false; + + rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2)); + + for (i = 0; i < coll->n_rules; i++) { + u16 rule_ptr = be16_to_cpu(rules_ptr[i]); + + if (!valid_rule(data, size, rule_ptr)) + return false; + } + + return true; +} + +static bool valid_regdb(const u8 *data, unsigned int size) +{ + const struct fwdb_header *hdr = (void *)data; + const struct fwdb_country *country; + + if (size < sizeof(*hdr)) + return false; + + if (hdr->magic != cpu_to_be32(FWDB_MAGIC)) + return false; + + if (hdr->version != cpu_to_be32(FWDB_VERSION)) + return false; + + country = &hdr->country[0]; + while ((u8 *)(country + 1) <= data + size) { + if (!country->coll_ptr) + break; + if (!valid_country(data, size, country)) + return false; + country++; + } + + return true; +} + +static int regdb_query_country(const struct fwdb_header *db, + const struct fwdb_country *country) +{ + unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; + struct fwdb_collection *coll = (void *)((u8 *)db + ptr); + struct ieee80211_regdomain *regdom; + unsigned int size_of_regd; + unsigned int i; + + size_of_regd = + sizeof(struct ieee80211_regdomain) + + coll->n_rules * sizeof(struct ieee80211_reg_rule); + + regdom = kzalloc(size_of_regd, GFP_KERNEL); + if (!regdom) + return -ENOMEM; + + regdom->n_reg_rules = coll->n_rules; + regdom->alpha2[0] = country->alpha2[0]; + regdom->alpha2[1] = country->alpha2[1]; + regdom->dfs_region = coll->dfs_region; + + for (i = 0; i < regdom->n_reg_rules; i++) { + __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2)); + unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2; + struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr); + struct ieee80211_reg_rule *rrule = ®dom->reg_rules[i]; + + rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start); + rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end); + rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw); + + rrule->power_rule.max_antenna_gain = 0; + rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp); + + rrule->flags = 0; + if (rule->flags & FWDB_FLAG_NO_OFDM) + rrule->flags |= NL80211_RRF_NO_OFDM; + if (rule->flags & FWDB_FLAG_NO_OUTDOOR) + rrule->flags |= NL80211_RRF_NO_OUTDOOR; + if (rule->flags & FWDB_FLAG_DFS) + rrule->flags |= NL80211_RRF_DFS; + if (rule->flags & FWDB_FLAG_NO_IR) + rrule->flags |= NL80211_RRF_NO_IR; + if (rule->flags & FWDB_FLAG_AUTO_BW) + rrule->flags |= NL80211_RRF_AUTO_BW; + + rrule->dfs_cac_ms = 0; + + /* handle optional data */ + if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout)) + rrule->dfs_cac_ms = + 1000 * be16_to_cpu(rule->cac_timeout); + } + + return reg_schedule_apply(regdom); +} + +static int query_regdb(const char *alpha2) +{ + const struct fwdb_header *hdr = regdb; + const struct fwdb_country *country; + + if (IS_ERR(regdb)) + return PTR_ERR(regdb); + + country = &hdr->country[0]; + while (country->coll_ptr) { + if (alpha2_equal(alpha2, country->alpha2)) + return regdb_query_country(regdb, country); + country++; + } + + return -ENODATA; +} + +static void regdb_fw_cb(const struct firmware *fw, void *context) +{ + void *db; + + if (!fw) { + pr_info("failed to load regulatory.db\n"); + regdb = ERR_PTR(-ENODATA); + goto restore; + } + + if (!valid_regdb(fw->data, fw->size)) { + pr_info("loaded regulatory.db is malformed\n"); + release_firmware(fw); + regdb = ERR_PTR(-EINVAL); + goto restore; + } + + db = kmemdup(fw->data, fw->size, GFP_KERNEL); + release_firmware(fw); + + if (!db) + goto restore; + regdb = db; + + if (query_regdb(context)) + goto restore; + goto free; + restore: + rtnl_lock(); + restore_regulatory_settings(true); + rtnl_unlock(); + free: + kfree(context); +} + +static int query_regdb_file(const char *alpha2) +{ + if (regdb) + return query_regdb(alpha2); + + alpha2 = kmemdup(alpha2, 2, GFP_KERNEL); + if (!alpha2) + return -ENOMEM; + + return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", + ®_pdev->dev, GFP_KERNEL, + (void *)alpha2, regdb_fw_cb); +} + static bool reg_query_database(struct regulatory_request *request) { /* query internal regulatory database (if it exists) */ if (reg_query_builtin(request->alpha2) == 0) return true; + if (query_regdb_file(request->alpha2) == 0) + return true; + if (call_crda(request->alpha2) == 0) return true; @@ -3360,4 +3607,7 @@ void regulatory_exit(void) list_del(®_request->list); kfree(reg_request); } + + if (!IS_ERR_OR_NULL(regdb)) + kfree(regdb); } From 1ea4ff3e9f0b8d53e680a2bb9e8e644bf03aeb4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Sep 2017 16:07:22 +0200 Subject: [PATCH 0822/2177] cfg80211: support reloading regulatory database If the regulatory database is loaded, and then updated, it may be necessary to reload it. Add an nl80211 command to do this. Note that this just reloads the database, it doesn't re-apply the rules from it immediately. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++ net/wireless/nl80211.c | 11 +++++ net/wireless/reg.c | 82 +++++++++++++++++++++++++++--------- net/wireless/reg.h | 6 +++ 4 files changed, 82 insertions(+), 21 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 95832ce03a44..f882fe1f9709 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -990,6 +990,8 @@ * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed * &NL80211_CMD_DISCONNECT should be indicated instead. * + * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1194,6 +1196,8 @@ enum nl80211_commands { NL80211_CMD_PORT_AUTHORIZED, + NL80211_CMD_RELOAD_REGDB, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5129342151e6..67a03f2885a4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5678,6 +5678,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) } } +static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info) +{ + return reg_reload_regdb(); +} + static int nl80211_get_mesh_config(struct sk_buff *skb, struct genl_info *info) { @@ -12708,6 +12713,12 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_RELOAD_REGDB, + .doit = nl80211_reload_regdb, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, .doit = nl80211_get_mesh_config, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e9aeb05aaf3e..180addda52af 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -781,6 +781,8 @@ static int query_regdb(const char *alpha2) const struct fwdb_header *hdr = regdb; const struct fwdb_country *country; + ASSERT_RTNL(); + if (IS_ERR(regdb)) return PTR_ERR(regdb); @@ -796,41 +798,47 @@ static int query_regdb(const char *alpha2) static void regdb_fw_cb(const struct firmware *fw, void *context) { + int set_error = 0; + bool restore = true; void *db; if (!fw) { pr_info("failed to load regulatory.db\n"); - regdb = ERR_PTR(-ENODATA); - goto restore; - } - - if (!valid_regdb(fw->data, fw->size)) { + set_error = -ENODATA; + } else if (!valid_regdb(fw->data, fw->size)) { pr_info("loaded regulatory.db is malformed\n"); - release_firmware(fw); - regdb = ERR_PTR(-EINVAL); - goto restore; + set_error = -EINVAL; } - db = kmemdup(fw->data, fw->size, GFP_KERNEL); - release_firmware(fw); - - if (!db) - goto restore; - regdb = db; - - if (query_regdb(context)) - goto restore; - goto free; - restore: rtnl_lock(); - restore_regulatory_settings(true); + if (WARN_ON(regdb && !IS_ERR(regdb))) { + /* just restore and free new db */ + } else if (set_error) { + regdb = ERR_PTR(set_error); + } else if (fw) { + db = kmemdup(fw->data, fw->size, GFP_KERNEL); + if (db) { + regdb = db; + restore = context && query_regdb(context); + } else { + restore = true; + } + } + + if (restore) + restore_regulatory_settings(true); + rtnl_unlock(); - free: + kfree(context); + + release_firmware(fw); } static int query_regdb_file(const char *alpha2) { + ASSERT_RTNL(); + if (regdb) return query_regdb(alpha2); @@ -843,6 +851,38 @@ static int query_regdb_file(const char *alpha2) (void *)alpha2, regdb_fw_cb); } +int reg_reload_regdb(void) +{ + const struct firmware *fw; + void *db; + int err; + + err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); + if (err) + return err; + + if (!valid_regdb(fw->data, fw->size)) { + err = -ENODATA; + goto out; + } + + db = kmemdup(fw->data, fw->size, GFP_KERNEL); + if (!db) { + err = -ENOMEM; + goto out; + } + + rtnl_lock(); + if (!IS_ERR_OR_NULL(regdb)) + kfree(regdb); + regdb = db; + rtnl_unlock(); + + out: + release_firmware(fw); + return err; +} + static bool reg_query_database(struct regulatory_request *request) { /* query internal regulatory database (if it exists) */ diff --git a/net/wireless/reg.h b/net/wireless/reg.h index ca7fedf2e7a1..9529c522611a 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -179,4 +179,10 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 */ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); + +/** + * reg_reload_regdb - reload the regulatory.db firmware file + */ +int reg_reload_regdb(void); + #endif /* __NET_WIRELESS_REG_H */ From c8c240e284b3d821011b4f680b3eaa99569b3756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Oct 2015 14:35:41 +0200 Subject: [PATCH 0823/2177] cfg80211: reg: remove support for built-in regdb Parsing and building C structures from a regdb is no longer needed since the "firmware" file (regulatory.db) can be linked into the kernel image to achieve the same effect. Signed-off-by: Johannes Berg --- Documentation/networking/regulatory.txt | 22 +--- net/wireless/.gitignore | 1 - net/wireless/Kconfig | 24 +--- net/wireless/Makefile | 6 - net/wireless/db.txt | 17 --- net/wireless/genregdb.awk | 158 ------------------------ net/wireless/reg.c | 39 ------ net/wireless/regdb.h | 23 ---- 8 files changed, 3 insertions(+), 287 deletions(-) delete mode 100644 net/wireless/.gitignore delete mode 100644 net/wireless/db.txt delete mode 100644 net/wireless/genregdb.awk delete mode 100644 net/wireless/regdb.h diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt index 46c8d8b1cc66..381e5b23d61d 100644 --- a/Documentation/networking/regulatory.txt +++ b/Documentation/networking/regulatory.txt @@ -200,23 +200,5 @@ Then in some part of your code after your wiphy has been registered: Statically compiled regulatory database --------------------------------------- -In most situations the userland solution using CRDA as described -above is the preferred solution. However in some cases a set of -rules built into the kernel itself may be desirable. To account -for this situation, a configuration option has been provided -(i.e. CONFIG_CFG80211_INTERNAL_REGDB). With this option enabled, -the wireless database information contained in net/wireless/db.txt is -used to generate a data structure encoded in net/wireless/regdb.c. -That option also enables code in net/wireless/reg.c which queries -the data in regdb.c as an alternative to using CRDA. - -The file net/wireless/db.txt should be kept up-to-date with the db.txt -file available in the git repository here: - - git://git.kernel.org/pub/scm/linux/kernel/git/sforshee/wireless-regdb.git - -Again, most users in most situations should be using the CRDA package -provided with their distribution, and in most other situations users -should be building and using CRDA on their own rather than using -this option. If you are not absolutely sure that you should be using -CONFIG_CFG80211_INTERNAL_REGDB then _DO_NOT_USE_IT_. +When a database should be fixed into the kernel, it can be provided as a +firmware file at build time that is then linked into the kernel. diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore deleted file mode 100644 index c33451b896d9..000000000000 --- a/net/wireless/.gitignore +++ /dev/null @@ -1 +0,0 @@ -regdb.c diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 24eec5516649..f050030055c5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -140,30 +140,8 @@ config CFG80211_DEBUGFS If unsure, say N. -config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EXPERT - default n - depends on CFG80211 - ---help--- - This option generates an internal data structure representing - the wireless regulatory rules described in net/wireless/db.txt - and includes code to query that database. This is an alternative - to using CRDA for defining regulatory rules for the kernel. - - Using this option requires some parsing of the db.txt at build time, - the parser will be upkept with the latest wireless-regdb updates but - older wireless-regdb formats will be ignored. The parser may later - be replaced to avoid issues with conflicts on versions of - wireless-regdb. - - For details see: - - http://wireless.kernel.org/en/developers/Regulatory - - Most distributions have a CRDA package. So if unsure, say N. - config CFG80211_CRDA_SUPPORT - bool "support CRDA" if CFG80211_INTERNAL_REGDB + bool "support CRDA" if EXPERT default y depends on CFG80211 help diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d06e5015751a..5f20dac5d8c6 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -14,11 +14,5 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o cfg80211-$(CONFIG_OF) += of.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o -cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o CFLAGS_trace.o := -I$(src) - -$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk - @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@ - -clean-files := regdb.c diff --git a/net/wireless/db.txt b/net/wireless/db.txt deleted file mode 100644 index a2fc3a09ccdc..000000000000 --- a/net/wireless/db.txt +++ /dev/null @@ -1,17 +0,0 @@ -# -# This file is a placeholder to prevent accidental build breakage if someone -# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to -# enable that build option. -# -# You should be using CRDA instead. It is even better if you use the CRDA -# package provided by your distribution, since they will probably keep it -# up-to-date on your behalf. -# -# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will -# need to replace this file with one containing appropriately formatted -# regulatory rules that cover the regulatory domains you will be using. Your -# best option is to extract the db.txt file from the wireless-regdb git -# repository: -# -# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git -# diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk deleted file mode 100644 index baf2426b555a..000000000000 --- a/net/wireless/genregdb.awk +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/awk -f -# -# genregdb.awk -- generate regdb.c from db.txt -# -# Actually, it reads from stdin (presumed to be db.txt) and writes -# to stdout (presumed to be regdb.c), but close enough... -# -# Copyright 2009 John W. Linville -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -BEGIN { - active = 0 - rules = 0; - print "/*" - print " * DO NOT EDIT -- file generated from data in db.txt" - print " */" - print "" - print "#include " - print "#include " - print "#include \"regdb.h\"" - print "" - regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" -} - -function parse_country_head() { - country=$2 - sub(/:/, "", country) - printf "static const struct ieee80211_regdomain regdom_%s = {\n", country - printf "\t.alpha2 = \"%s\",\n", country - if ($NF ~ /DFS-ETSI/) - printf "\t.dfs_region = NL80211_DFS_ETSI,\n" - else if ($NF ~ /DFS-FCC/) - printf "\t.dfs_region = NL80211_DFS_FCC,\n" - else if ($NF ~ /DFS-JP/) - printf "\t.dfs_region = NL80211_DFS_JP,\n" - printf "\t.reg_rules = {\n" - active = 1 - regdb = regdb "\t®dom_" country ",\n" -} - -function parse_reg_rule() -{ - flag_starts_at = 7 - - start = $1 - sub(/\(/, "", start) - end = $3 - bw = $5 - sub(/\),/, "", bw) - gain = 0 - power = $6 - # power might be in mW... - units = $7 - dfs_cac = 0 - - sub(/\(/, "", power) - sub(/\),/, "", power) - sub(/\),/, "", units) - sub(/\)/, "", units) - - if (units == "mW") { - flag_starts_at = 8 - power = 10 * log(power)/log(10) - if ($8 ~ /[[:digit:]]/) { - flag_starts_at = 9 - dfs_cac = $8 - } - } else { - if ($7 ~ /[[:digit:]]/) { - flag_starts_at = 8 - dfs_cac = $7 - } - } - sub(/\(/, "", dfs_cac) - sub(/\),/, "", dfs_cac) - flagstr = "" - for (i=flag_starts_at; i<=NF; i++) - flagstr = flagstr $i - split(flagstr, flagarray, ",") - flags = "" - for (arg in flagarray) { - if (flagarray[arg] == "NO-OFDM") { - flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | " - } else if (flagarray[arg] == "NO-CCK") { - flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | " - } else if (flagarray[arg] == "NO-INDOOR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | " - } else if (flagarray[arg] == "NO-OUTDOOR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | " - } else if (flagarray[arg] == "DFS") { - flags = flags "\n\t\t\tNL80211_RRF_DFS | " - } else if (flagarray[arg] == "PTP-ONLY") { - flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | " - } else if (flagarray[arg] == "PTMP-ONLY") { - flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | " - } else if (flagarray[arg] == "PASSIVE-SCAN") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "NO-IBSS") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "NO-IR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "AUTO-BW") { - flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | " - } - - } - flags = flags "0" - printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags - rules++ -} - -function print_tail_country() -{ - active = 0 - printf "\t},\n" - printf "\t.n_reg_rules = %d\n", rules - printf "};\n\n" - rules = 0; -} - -/^[ \t]*#/ { - # Ignore -} - -!active && /^[ \t]*$/ { - # Ignore -} - -!active && /country/ { - parse_country_head() -} - -active && /^[ \t]*\(/ { - parse_reg_rule() -} - -active && /^[ \t]*$/ { - print_tail_country() -} - -END { - if (active) - print_tail_country() - print regdb "};" - print "" - print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);" -} diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 180addda52af..ebf8267ffbc9 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -59,7 +59,6 @@ #include "core.h" #include "reg.h" #include "rdev-ops.h" -#include "regdb.h" #include "nl80211.h" /* @@ -495,38 +494,6 @@ static int reg_schedule_apply(const struct ieee80211_regdomain *regdom) return 0; } -#ifdef CONFIG_CFG80211_INTERNAL_REGDB -static int reg_query_builtin(const char *alpha2) -{ - const struct ieee80211_regdomain *regdom = NULL; - unsigned int i; - - for (i = 0; i < reg_regdb_size; i++) { - if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) { - regdom = reg_copy_regd(reg_regdb[i]); - break; - } - } - if (!regdom) - return -ENODATA; - - return reg_schedule_apply(regdom); -} - -/* Feel free to add any other sanity checks here */ -static void reg_regdb_size_check(void) -{ - /* We should ideally BUILD_BUG_ON() but then random builds would fail */ - WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it..."); -} -#else -static inline void reg_regdb_size_check(void) {} -static inline int reg_query_builtin(const char *alpha2) -{ - return -ENODATA; -} -#endif /* CONFIG_CFG80211_INTERNAL_REGDB */ - #ifdef CONFIG_CFG80211_CRDA_SUPPORT /* Max number of consecutive attempts to communicate with CRDA */ #define REG_MAX_CRDA_TIMEOUTS 10 @@ -885,10 +852,6 @@ int reg_reload_regdb(void) static bool reg_query_database(struct regulatory_request *request) { - /* query internal regulatory database (if it exists) */ - if (reg_query_builtin(request->alpha2) == 0) - return true; - if (query_regdb_file(request->alpha2) == 0) return true; @@ -3580,8 +3543,6 @@ int __init regulatory_init(void) spin_lock_init(®_pending_beacons_lock); spin_lock_init(®_indoor_lock); - reg_regdb_size_check(); - rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h deleted file mode 100644 index 3279cfcefb0c..000000000000 --- a/net/wireless/regdb.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __REGDB_H__ -#define __REGDB_H__ - -/* - * Copyright 2009 John W. Linville - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -extern const struct ieee80211_regdomain *reg_regdb[]; -extern int reg_regdb_size; - -#endif /* __REGDB_H__ */ From 90a53e4432b12288316efaa5f308adafb8d304b0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Sep 2017 22:21:08 +0200 Subject: [PATCH 0824/2177] cfg80211: implement regdb signature checking Currently CRDA implements the signature checking, and the previous commits added the ability to load the whole regulatory database into the kernel. However, we really can't lose the signature checking, so implement it in the kernel by loading a detached signature (regulatory.db.p7s) and check it against built-in keys. Signed-off-by: Johannes Berg --- net/wireless/.gitignore | 2 + net/wireless/Kconfig | 30 ++++++++ net/wireless/Makefile | 22 ++++++ net/wireless/certs/sforshee.x509 | Bin 0 -> 680 bytes net/wireless/reg.c | 121 ++++++++++++++++++++++++++++++- net/wireless/reg.h | 8 ++ 6 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 net/wireless/.gitignore create mode 100644 net/wireless/certs/sforshee.x509 diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore new file mode 100644 index 000000000000..61cbc304a3d3 --- /dev/null +++ b/net/wireless/.gitignore @@ -0,0 +1,2 @@ +shipped-certs.c +extra-certs.c diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index f050030055c5..da91bb547db3 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -83,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS you are a wireless researcher and are working in a controlled and approved environment by your local regulatory agency. +config CFG80211_REQUIRE_SIGNED_REGDB + bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS + default y + select SYSTEM_DATA_VERIFICATION + help + Require that in addition to the "regulatory.db" file a + "regulatory.db.p7s" can be loaded with a valid PKCS#7 + signature for the regulatory.db file made by one of the + keys in the certs/ directory. + +config CFG80211_USE_KERNEL_REGDB_KEYS + bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS + default y + depends on CFG80211_REQUIRE_SIGNED_REGDB + help + Allow the regulatory database to be signed by one of the keys for + which certificates are part of the kernel sources + (in net/wireless/certs/). + + This is currently only Seth Forshee's key, who is the regulatory + database maintainer. + +config CFG80211_EXTRA_REGDB_KEYDIR + string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS + depends on CFG80211_REQUIRE_SIGNED_REGDB + help + If selected, point to a directory with DER-encoded X.509 + certificates like in the kernel sources (net/wireless/certs/) + that shall be accepted for a signed regulatory database. + config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 5f20dac5d8c6..219baea57e4e 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -16,3 +16,25 @@ cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o CFLAGS_trace.o := -I$(src) + +cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o +ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) +cfg80211-y += extra-certs.o +endif + +$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) + @echo " GEN $@" + @echo '#include "reg.h"' > $@ + @echo 'const u8 shipped_regdb_certs[] = {' >> $@ + @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done + @echo '};' >> $@ + @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ + +$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ + $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) + @echo " GEN $@" + @echo '#include "reg.h"' > $@ + @echo 'const u8 extra_regdb_certs[] = {' >> $@ + @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done + @echo '};' >> $@ + @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509 new file mode 100644 index 0000000000000000000000000000000000000000..c6f8f9d6b98839048822ebbe27ecb831614ccf3d GIT binary patch literal 680 zcmXqLVp?L*#Mr~c$*`&SzWchL=aw7rvTPs z1zb!`jEoFh9UjloXt8AeWO+SJ$I(J`P2JMwLe;tnH5qsF?QdR>w3uI$6?BMMOSdlV zi`-_R0)^-+(~WEkyRD@;#6_|bkA!zm6O;L?a+RC&XNF+Q?^A(17hNT93Al9K{8zT} zZ-TA_x5m^>y01EB?6?@F_#s&SBUAoMx7m~9H74+{G5eLFTo@kq?abx-wOTi&i(Oyu zQg3}-}=;a=vvvRoQQq5|7x<;&pS~YD#Y&_6&F5Z@hi_o39R~2i%lCEQp;`DZKFij>Y=beQfq8 zwmr$x_+%2JY;Sbn*;@WJ=R-@}i!U>_Zs%4CQ>mTLxstDKo_X|~T&9~nCjzn_MSd1z zd$q}FYs9}@7aPN+-fyz#i1@bZh+cP;`je$EmYhnDSyPmLIA8d%u4(1nTO z{kHgKW!NWvf$y~!0w?Rc|ETrmY6%tMs`ay$*Vg}|u{qP^VMD|2N9%W9Gx#VQ(ylyn seju}tYb{f1@#??lr<~!nO89FdqAzB=Qc?bNz{Y;&cMH;1idBjL01XcsegFUf literal 0 HcmV?d00001 diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ebf8267ffbc9..58319c82ecb3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -659,6 +660,115 @@ static bool valid_country(const u8 *data, unsigned int size, return true; } +#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB +static struct key *builtin_regdb_keys; + +static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen) +{ + const u8 *end = p + buflen; + size_t plen; + key_ref_t key; + + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1), + "asymmetric", NULL, p, plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); +} + +static int __init load_builtin_regdb_keys(void) +{ + builtin_regdb_keys = + keyring_alloc(".builtin_regdb_keys", + KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(builtin_regdb_keys)) + return PTR_ERR(builtin_regdb_keys); + + pr_notice("Loading compiled-in X.509 certificates for regulatory database\n"); + +#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS + load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len); +#endif +#ifdef CFG80211_EXTRA_REGDB_KEYDIR + if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0') + load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len); +#endif + + return 0; +} + +static bool regdb_has_valid_signature(const u8 *data, unsigned int size) +{ + const struct firmware *sig; + bool result; + + if (request_firmware(&sig, "regulatory.db.p7s", ®_pdev->dev)) + return false; + + result = verify_pkcs7_signature(data, size, sig->data, sig->size, + builtin_regdb_keys, + VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL) == 0; + + release_firmware(sig); + + return result; +} + +static void free_regdb_keyring(void) +{ + key_put(builtin_regdb_keys); +} +#else +static int load_builtin_regdb_keys(void) +{ + return 0; +} + +static bool regdb_has_valid_signature(const u8 *data, unsigned int size) +{ + return true; +} + +static void free_regdb_keyring(void) +{ +} +#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */ + static bool valid_regdb(const u8 *data, unsigned int size) { const struct fwdb_header *hdr = (void *)data; @@ -673,6 +783,9 @@ static bool valid_regdb(const u8 *data, unsigned int size) if (hdr->version != cpu_to_be32(FWDB_VERSION)) return false; + if (!regdb_has_valid_signature(data, size)) + return false; + country = &hdr->country[0]; while ((u8 *)(country + 1) <= data + size) { if (!country->coll_ptr) @@ -773,7 +886,7 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) pr_info("failed to load regulatory.db\n"); set_error = -ENODATA; } else if (!valid_regdb(fw->data, fw->size)) { - pr_info("loaded regulatory.db is malformed\n"); + pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n"); set_error = -EINVAL; } @@ -3535,6 +3648,10 @@ int __init regulatory_init(void) { int err = 0; + err = load_builtin_regdb_keys(); + if (err) + return err; + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); @@ -3611,4 +3728,6 @@ void regulatory_exit(void) if (!IS_ERR_OR_NULL(regdb)) kfree(regdb); + + free_regdb_keyring(); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 9529c522611a..9ceeb5f3a7cb 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,5 +1,8 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H + +#include + /* * Copyright 2008-2011 Luis R. Rodriguez * @@ -185,4 +188,9 @@ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); */ int reg_reload_regdb(void); +extern const u8 shipped_regdb_certs[]; +extern unsigned int shipped_regdb_certs_len; +extern const u8 extra_regdb_certs[]; +extern unsigned int extra_regdb_certs_len; + #endif /* __NET_WIRELESS_REG_H */ From 9e97964d5e500d8d0df94e1a79ad715ad4f9c995 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Oct 2017 15:46:45 +0200 Subject: [PATCH 0825/2177] mac80211: use crypto_aead_authsize() Evidently this API is intended to be used to isolate against API changes, so use it instead of accessing ->authsize. Signed-off-by: Johannes Berg --- net/mac80211/aead_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/aead_api.c b/net/mac80211/aead_api.c index 347f13953b2c..160f9df30402 100644 --- a/net/mac80211/aead_api.c +++ b/net/mac80211/aead_api.c @@ -21,7 +21,7 @@ int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, u8 *data, size_t data_len, u8 *mic) { - size_t mic_len = tfm->authsize; + size_t mic_len = crypto_aead_authsize(tfm); struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); @@ -52,7 +52,7 @@ int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, u8 *data, size_t data_len, u8 *mic) { - size_t mic_len = tfm->authsize; + size_t mic_len = crypto_aead_authsize(tfm); struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); From 81a1905382a380beeba201ce41276afd9035dc64 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:46:21 +0200 Subject: [PATCH 0826/2177] Bluetooth: hci_bcm: fix build error without CONFIG_PM This was introduced by the rework adding PM support: drivers/bluetooth/hci_bcm.c: In function 'bcm_device_exists': drivers/bluetooth/hci_bcm.c:156:22: error: 'struct bcm_device' has no member named 'hu' if (device && device->hu && device->hu->serdev) ^~ The pointer is not available otherwise, so I'm enclosing all references in an #ifdef here. Fixes: 8a92056837fd ("Bluetooth: hci_bcm: Add (runtime)pm support to the serdev driver") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 16c2eaaaf72b..c69da0e8b79f 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -152,9 +152,11 @@ static bool bcm_device_exists(struct bcm_device *device) { struct list_head *p; +#ifdef CONFIG_PM /* Devices using serdev always exist */ if (device && device->hu && device->hu->serdev) return true; +#endif list_for_each(p, &bcm_device_list) { struct bcm_device *dev = list_entry(p, struct bcm_device, list); @@ -973,7 +975,9 @@ static int bcm_serdev_probe(struct serdev_device *serdev) return -ENOMEM; bcmdev->dev = &serdev->dev; +#ifdef CONFIG_PM bcmdev->hu = &bcmdev->serdev_hu; +#endif bcmdev->serdev_hu.serdev = serdev; serdev_device_set_drvdata(serdev, bcmdev); From 05e89fb576f580ac95e7a5d00bdb34830b09671a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:47:54 +0200 Subject: [PATCH 0827/2177] Bluetooth: BT_HCIUART now depends on SERIAL_DEV_BUS It is no longer possible to build BT_HCIUART into the kernel when SERIAL_DEV_BUS is a loadable module, even if none of the SERIAL_DEV_BUS based implementations are selected: drivers/bluetooth/hci_ldisc.o: In function `hci_uart_set_flow_control': hci_ldisc.c:(.text+0xb40): undefined reference to `serdev_device_set_flow_control' hci_ldisc.c:(.text+0xb5c): undefined reference to `serdev_device_set_tiocm' This adds a dependency to avoid the broken configuration. Fixes: 7841d554809b ("Bluetooth: hci_uart_set_flow_control: Fix NULL deref when using serdev") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 082e1c7329de..6475f8c0d3b2 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -65,6 +65,7 @@ config BT_HCIBTSDIO config BT_HCIUART tristate "HCI UART driver" + depends on SERIAL_DEV_BUS || !SERIAL_DEV_BUS depends on TTY help Bluetooth HCI UART driver. @@ -79,7 +80,6 @@ config BT_HCIUART config BT_HCIUART_SERDEV bool depends on SERIAL_DEV_BUS && BT_HCIUART - depends on SERIAL_DEV_BUS=y || SERIAL_DEV_BUS=BT_HCIUART default y config BT_HCIUART_H4 From 4a269818a7eb8577d32d8b2879099c689ddbd856 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2017 13:27:29 -0700 Subject: [PATCH 0828/2177] tcp: fix tcp_unlink_write_queue() Yury reported crash with this signature : [ 554.034021] [] 0xffff80003ccd5a58 [ 554.034156] [] skb_release_all+0x14/0x30 [ 554.034288] [] __kfree_skb+0x14/0x28 [ 554.034409] [] tcp_sendmsg_locked+0x4dc/0xcc8 [ 554.034541] [] tcp_sendmsg+0x34/0x58 [ 554.034659] [] inet_sendmsg+0x2c/0xf8 [ 554.034783] [] sock_sendmsg+0x18/0x30 [ 554.034928] [] SyS_sendto+0x84/0xf8 Problem is that skb->destructor contains garbage, and this is because I accidentally removed tcp_skb_tsorted_anchor_cleanup() from tcp_unlink_write_queue() This would trigger with a write(fd, , len) attempt, and we will add to packetdrill this capability to avoid future regressions. Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Reported-by: Yury Norov Tested-by: Yury Norov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5a95e5886b55..15163454174b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1712,6 +1712,7 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { + tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } From f7e3bfa14daa35ea393ad6389c9e01e61196e93f Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 10 Oct 2017 14:49:52 +0200 Subject: [PATCH 0829/2177] net: dsa: lan9303: Move tag setup to new lan9303_setup_tagging Prepare for next patch: Move tag setup from lan9303_separate_ports() to new function lan9303_setup_tagging() Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 07355db2ad81..2215ec1fbe1e 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -157,6 +157,7 @@ # define LAN9303_SWE_PORT_MIRROR_ENABLE_RX_MIRRORING BIT(1) # define LAN9303_SWE_PORT_MIRROR_ENABLE_TX_MIRRORING BIT(0) #define LAN9303_SWE_INGRESS_PORT_TYPE 0x1847 +#define LAN9303_SWE_INGRESS_PORT_TYPE_VLAN 3 #define LAN9303_BM_CFG 0x1c00 #define LAN9303_BM_EGRSS_PORT_TYPE 0x1c0c # define LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT2 (BIT(17) | BIT(16)) @@ -510,11 +511,30 @@ static int lan9303_enable_processing_port(struct lan9303 *chip, LAN9303_MAC_TX_CFG_X_TX_ENABLE); } +/* forward special tagged packets from port 0 to port 1 *or* port 2 */ +static int lan9303_setup_tagging(struct lan9303 *chip) +{ + int ret; + u32 val; + /* enable defining the destination port via special VLAN tagging + * for port 0 + */ + ret = lan9303_write_switch_reg(chip, LAN9303_SWE_INGRESS_PORT_TYPE, + LAN9303_SWE_INGRESS_PORT_TYPE_VLAN); + if (ret) + return ret; + + /* tag incoming packets at port 1 and 2 on their way to port 0 to be + * able to discover their source port + */ + val = LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0; + return lan9303_write_switch_reg(chip, LAN9303_BM_EGRSS_PORT_TYPE, val); +} + /* We want a special working switch: * - do not forward packets between port 1 and 2 * - forward everything from port 1 to port 0 * - forward everything from port 2 to port 0 - * - forward special tagged packets from port 0 to port 1 *or* port 2 */ static int lan9303_separate_ports(struct lan9303 *chip) { @@ -529,22 +549,6 @@ static int lan9303_separate_ports(struct lan9303 *chip) if (ret) return ret; - /* enable defining the destination port via special VLAN tagging - * for port 0 - */ - ret = lan9303_write_switch_reg(chip, LAN9303_SWE_INGRESS_PORT_TYPE, - 0x03); - if (ret) - return ret; - - /* tag incoming packets at port 1 and 2 on their way to port 0 to be - * able to discover their source port - */ - ret = lan9303_write_switch_reg(chip, LAN9303_BM_EGRSS_PORT_TYPE, - LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0); - if (ret) - return ret; - /* prevent port 1 and 2 from forwarding packets by their own */ return lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE, LAN9303_SWE_PORT_STATE_FORWARDING_PORT0 | @@ -644,6 +648,10 @@ static int lan9303_setup(struct dsa_switch *ds) return -EINVAL; } + ret = lan9303_setup_tagging(chip); + if (ret) + dev_err(chip->dev, "failed to setup port tagging %d\n", ret); + ret = lan9303_separate_ports(chip); if (ret) dev_err(chip->dev, "failed to separate ports %d\n", ret); From d99a86ae83d28ad08bcd46570d81033b48db2ca0 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 10 Oct 2017 14:49:53 +0200 Subject: [PATCH 0830/2177] net: dsa: lan9303: Add basic offloading of unicast traffic When both user ports are joined to the same bridge, the normal HW MAC learning is enabled. This means that unicast traffic is forwarded in HW. If one of the user ports leave the bridge, the ports goes back to the initial separated operation. Port separation relies on disabled HW MAC learning. Hence the condition that both ports must join same bridge. Add brigde methods port_bridge_join, port_bridge_leave and port_stp_state_set. Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 82 ++++++++++++++++++++++++++++++++++ drivers/net/dsa/lan9303.h | 2 + 2 files changed, 84 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 2215ec1fbe1e..fecfe1fe67ea 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "lan9303.h" @@ -146,6 +147,7 @@ # define LAN9303_SWE_PORT_STATE_FORWARDING_PORT0 (0) # define LAN9303_SWE_PORT_STATE_LEARNING_PORT0 BIT(1) # define LAN9303_SWE_PORT_STATE_BLOCKING_PORT0 BIT(0) +# define LAN9303_SWE_PORT_STATE_DISABLED_PORT0 (3) #define LAN9303_SWE_PORT_MIRROR 0x1846 # define LAN9303_SWE_PORT_MIRROR_SNIFF_ALL BIT(8) # define LAN9303_SWE_PORT_MIRROR_SNIFFER_PORT2 BIT(7) @@ -156,6 +158,7 @@ # define LAN9303_SWE_PORT_MIRROR_MIRRORED_PORT0 BIT(2) # define LAN9303_SWE_PORT_MIRROR_ENABLE_RX_MIRRORING BIT(1) # define LAN9303_SWE_PORT_MIRROR_ENABLE_TX_MIRRORING BIT(0) +# define LAN9303_SWE_PORT_MIRROR_DISABLED 0 #define LAN9303_SWE_INGRESS_PORT_TYPE 0x1847 #define LAN9303_SWE_INGRESS_PORT_TYPE_VLAN 3 #define LAN9303_BM_CFG 0x1c00 @@ -556,6 +559,16 @@ static int lan9303_separate_ports(struct lan9303 *chip) LAN9303_SWE_PORT_STATE_BLOCKING_PORT2); } +static void lan9303_bridge_ports(struct lan9303 *chip) +{ + /* ports bridged: remove mirroring */ + lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_MIRROR, + LAN9303_SWE_PORT_MIRROR_DISABLED); + + lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE, + chip->swe_port_state); +} + static int lan9303_handle_reset(struct lan9303 *chip) { if (!chip->reset_gpio) @@ -844,6 +857,72 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port, } } +static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *br) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(port %d)\n", __func__, port); + if (ds->ports[1].bridge_dev == ds->ports[2].bridge_dev) { + lan9303_bridge_ports(chip); + chip->is_bridged = true; /* unleash stp_state_set() */ + } + + return 0; +} + +static void lan9303_port_bridge_leave(struct dsa_switch *ds, int port, + struct net_device *br) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(port %d)\n", __func__, port); + if (chip->is_bridged) { + lan9303_separate_ports(chip); + chip->is_bridged = false; + } +} + +static void lan9303_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) +{ + int portmask, portstate; + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(port %d, state %d)\n", + __func__, port, state); + + switch (state) { + case BR_STATE_DISABLED: + portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0; + break; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + portstate = LAN9303_SWE_PORT_STATE_BLOCKING_PORT0; + break; + case BR_STATE_LEARNING: + portstate = LAN9303_SWE_PORT_STATE_LEARNING_PORT0; + break; + case BR_STATE_FORWARDING: + portstate = LAN9303_SWE_PORT_STATE_FORWARDING_PORT0; + break; + default: + portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0; + dev_err(chip->dev, "unknown stp state: port %d, state %d\n", + port, state); + } + + portmask = 0x3 << (port * 2); + portstate <<= (port * 2); + + chip->swe_port_state = (chip->swe_port_state & ~portmask) | portstate; + + if (chip->is_bridged) + lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE, + chip->swe_port_state); + /* else: touching SWE_PORT_STATE would break port separation */ +} + static const struct dsa_switch_ops lan9303_switch_ops = { .get_tag_protocol = lan9303_get_tag_protocol, .setup = lan9303_setup, @@ -855,6 +934,9 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .get_sset_count = lan9303_get_sset_count, .port_enable = lan9303_port_enable, .port_disable = lan9303_port_disable, + .port_bridge_join = lan9303_port_bridge_join, + .port_bridge_leave = lan9303_port_bridge_leave, + .port_stp_state_set = lan9303_port_stp_state_set, }; static int lan9303_register_switch(struct lan9303 *chip) diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index 4d8be555ff4d..68ecd544b658 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -21,6 +21,8 @@ struct lan9303 { struct dsa_switch *ds; struct mutex indirect_mutex; /* protect indexed register access */ const struct lan9303_phy_ops *ops; + bool is_bridged; /* true if port 1 and 2 are bridged */ + u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ }; extern const struct regmap_access_table lan9303_register_set; From 87461f7a58ab694e638ac52afa543b427751a9d0 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 10 Oct 2017 12:42:55 -0500 Subject: [PATCH 0831/2177] net: phy: DP83822 initial driver submission Add support for the TI DP83822 10/100Mbit ethernet phy. The DP83822 provides flexibility to connect to a MAC through a standard MII, RMII or RGMII interface. In addition the DP83822 needs to be removed from the DP83848 driver as the WoL support is added here for this device. Datasheet: http://www.ti.com/product/DP83822I/datasheet Signed-off-by: Dan Murphy Acked-by: Andrew F. Davis Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 5 + drivers/net/phy/Makefile | 1 + drivers/net/phy/dp83822.c | 344 ++++++++++++++++++++++++++++++++++++++ drivers/net/phy/dp83848.c | 3 - 4 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 drivers/net/phy/dp83822.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index e2cf8ffc5c6f..8125412c8814 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -277,6 +277,11 @@ config DAVICOM_PHY ---help--- Currently supports dm9161e and dm9131 +config DP83822_PHY + tristate "Texas Instruments DP83822 PHY" + ---help--- + Supports the DP83822 PHY. + config DP83848_PHY tristate "Texas Instruments DP83848 PHY" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 1404ad3b77c7..4b983be83566 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_CICADA_PHY) += cicada.o obj-$(CONFIG_CORTINA_PHY) += cortina.o obj-$(CONFIG_DAVICOM_PHY) += davicom.o obj-$(CONFIG_DP83640_PHY) += dp83640.o +obj-$(CONFIG_DP83822_PHY) += dp83822.o obj-$(CONFIG_DP83848_PHY) += dp83848.o obj-$(CONFIG_DP83867_PHY) += dp83867.o obj-$(CONFIG_FIXED_PHY) += fixed_phy.o diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c new file mode 100644 index 000000000000..14335d14e9e4 --- /dev/null +++ b/drivers/net/phy/dp83822.c @@ -0,0 +1,344 @@ +/* + * Driver for the Texas Instruments DP83822 PHY + * + * Copyright (C) 2017 Texas Instruments Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DP83822_PHY_ID 0x2000a240 +#define DP83822_DEVADDR 0x1f + +#define MII_DP83822_PHYSCR 0x11 +#define MII_DP83822_MISR1 0x12 +#define MII_DP83822_MISR2 0x13 +#define MII_DP83822_RESET_CTRL 0x1f + +#define DP83822_HW_RESET BIT(15) +#define DP83822_SW_RESET BIT(14) + +/* PHYSCR Register Fields */ +#define DP83822_PHYSCR_INT_OE BIT(0) /* Interrupt Output Enable */ +#define DP83822_PHYSCR_INTEN BIT(1) /* Interrupt Enable */ + +/* MISR1 bits */ +#define DP83822_RX_ERR_HF_INT_EN BIT(0) +#define DP83822_FALSE_CARRIER_HF_INT_EN BIT(1) +#define DP83822_ANEG_COMPLETE_INT_EN BIT(2) +#define DP83822_DUP_MODE_CHANGE_INT_EN BIT(3) +#define DP83822_SPEED_CHANGED_INT_EN BIT(4) +#define DP83822_LINK_STAT_INT_EN BIT(5) +#define DP83822_ENERGY_DET_INT_EN BIT(6) +#define DP83822_LINK_QUAL_INT_EN BIT(7) + +/* MISR2 bits */ +#define DP83822_JABBER_DET_INT_EN BIT(0) +#define DP83822_WOL_PKT_INT_EN BIT(1) +#define DP83822_SLEEP_MODE_INT_EN BIT(2) +#define DP83822_MDI_XOVER_INT_EN BIT(3) +#define DP83822_LB_FIFO_INT_EN BIT(4) +#define DP83822_PAGE_RX_INT_EN BIT(5) +#define DP83822_ANEG_ERR_INT_EN BIT(6) +#define DP83822_EEE_ERROR_CHANGE_INT_EN BIT(7) + +/* INT_STAT1 bits */ +#define DP83822_WOL_INT_EN BIT(4) +#define DP83822_WOL_INT_STAT BIT(12) + +#define MII_DP83822_RXSOP1 0x04a5 +#define MII_DP83822_RXSOP2 0x04a6 +#define MII_DP83822_RXSOP3 0x04a7 + +/* WoL Registers */ +#define MII_DP83822_WOL_CFG 0x04a0 +#define MII_DP83822_WOL_STAT 0x04a1 +#define MII_DP83822_WOL_DA1 0x04a2 +#define MII_DP83822_WOL_DA2 0x04a3 +#define MII_DP83822_WOL_DA3 0x04a4 + +/* WoL bits */ +#define DP83822_WOL_MAGIC_EN BIT(0) +#define DP83822_WOL_SECURE_ON BIT(5) +#define DP83822_WOL_EN BIT(7) +#define DP83822_WOL_INDICATION_SEL BIT(8) +#define DP83822_WOL_CLR_INDICATION BIT(11) + +static int dp83822_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, MII_DP83822_MISR1); + if (err < 0) + return err; + + err = phy_read(phydev, MII_DP83822_MISR2); + if (err < 0) + return err; + + return 0; +} + +static int dp83822_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + struct net_device *ndev = phydev->attached_dev; + u16 value; + const u8 *mac; + + if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { + mac = (const u8 *)ndev->dev_addr; + + if (!is_valid_ether_addr(mac)) + return -EINVAL; + + /* MAC addresses start with byte 5, but stored in mac[0]. + * 822 PHYs store bytes 4|5, 2|3, 0|1 + */ + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA1, + (mac[1] << 8) | mac[0]); + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA2, + (mac[3] << 8) | mac[2]); + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA3, + (mac[5] << 8) | mac[4]); + + value = phy_read_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_WOL_CFG); + if (wol->wolopts & WAKE_MAGIC) + value |= DP83822_WOL_MAGIC_EN; + else + value &= ~DP83822_WOL_MAGIC_EN; + + if (wol->wolopts & WAKE_MAGICSECURE) { + phy_write_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP1, + (wol->sopass[1] << 8) | wol->sopass[0]); + phy_write_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP2, + (wol->sopass[3] << 8) | wol->sopass[2]); + phy_write_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP3, + (wol->sopass[5] << 8) | wol->sopass[4]); + value |= DP83822_WOL_SECURE_ON; + } else { + value &= ~DP83822_WOL_SECURE_ON; + } + + value |= (DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL | + DP83822_WOL_CLR_INDICATION); + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, + value); + } else { + value = phy_read_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_WOL_CFG); + value &= ~DP83822_WOL_EN; + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, + value); + } + + return 0; +} + +static void dp83822_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int value; + u16 sopass_val; + + wol->supported = (WAKE_MAGIC | WAKE_MAGICSECURE); + wol->wolopts = 0; + + value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + + if (value & DP83822_WOL_MAGIC_EN) + wol->wolopts |= WAKE_MAGIC; + + if (value & DP83822_WOL_SECURE_ON) { + sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP1); + wol->sopass[0] = (sopass_val & 0xff); + wol->sopass[1] = (sopass_val >> 8); + + sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP2); + wol->sopass[2] = (sopass_val & 0xff); + wol->sopass[3] = (sopass_val >> 8); + + sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RXSOP3); + wol->sopass[4] = (sopass_val & 0xff); + wol->sopass[5] = (sopass_val >> 8); + + wol->wolopts |= WAKE_MAGICSECURE; + } + + /* WoL is not enabled so set wolopts to 0 */ + if (!(value & DP83822_WOL_EN)) + wol->wolopts = 0; +} + +static int dp83822_config_intr(struct phy_device *phydev) +{ + int misr_status; + int physcr_status; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + misr_status = phy_read(phydev, MII_DP83822_MISR1); + if (misr_status < 0) + return misr_status; + + misr_status |= (DP83822_RX_ERR_HF_INT_EN | + DP83822_FALSE_CARRIER_HF_INT_EN | + DP83822_ANEG_COMPLETE_INT_EN | + DP83822_DUP_MODE_CHANGE_INT_EN | + DP83822_SPEED_CHANGED_INT_EN | + DP83822_LINK_STAT_INT_EN | + DP83822_ENERGY_DET_INT_EN | + DP83822_LINK_QUAL_INT_EN); + + err = phy_write(phydev, MII_DP83822_MISR1, misr_status); + if (err < 0) + return err; + + misr_status = phy_read(phydev, MII_DP83822_MISR2); + if (misr_status < 0) + return misr_status; + + misr_status |= (DP83822_JABBER_DET_INT_EN | + DP83822_WOL_PKT_INT_EN | + DP83822_SLEEP_MODE_INT_EN | + DP83822_MDI_XOVER_INT_EN | + DP83822_LB_FIFO_INT_EN | + DP83822_PAGE_RX_INT_EN | + DP83822_ANEG_ERR_INT_EN | + DP83822_EEE_ERROR_CHANGE_INT_EN); + + err = phy_write(phydev, MII_DP83822_MISR2, misr_status); + if (err < 0) + return err; + + physcr_status = phy_read(phydev, MII_DP83822_PHYSCR); + if (physcr_status < 0) + return physcr_status; + + physcr_status |= DP83822_PHYSCR_INT_OE | DP83822_PHYSCR_INTEN; + + } else { + err = phy_write(phydev, MII_DP83822_MISR1, 0); + if (err < 0) + return err; + + err = phy_write(phydev, MII_DP83822_MISR1, 0); + if (err < 0) + return err; + + physcr_status = phy_read(phydev, MII_DP83822_PHYSCR); + if (physcr_status < 0) + return physcr_status; + + physcr_status &= ~DP83822_PHYSCR_INTEN; + } + + return phy_write(phydev, MII_DP83822_PHYSCR, physcr_status); +} + +static int dp83822_config_init(struct phy_device *phydev) +{ + int err; + int value; + + err = genphy_config_init(phydev); + if (err < 0) + return err; + + value = DP83822_WOL_MAGIC_EN | DP83822_WOL_SECURE_ON | DP83822_WOL_EN; + + return phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, + value); +} + +static int dp83822_phy_reset(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_DP83822_RESET_CTRL, DP83822_HW_RESET); + if (err < 0) + return err; + + dp83822_config_init(phydev); + + return 0; +} + +static int dp83822_suspend(struct phy_device *phydev) +{ + int value; + + value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + + if (!(value & DP83822_WOL_EN)) + genphy_suspend(phydev); + + return 0; +} + +static int dp83822_resume(struct phy_device *phydev) +{ + int value; + + genphy_resume(phydev); + + value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value | + DP83822_WOL_CLR_INDICATION); + + return 0; +} + +static struct phy_driver dp83822_driver[] = { + { + .phy_id = DP83822_PHY_ID, + .phy_id_mask = 0xfffffff0, + .name = "TI DP83822", + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = dp83822_config_init, + .soft_reset = dp83822_phy_reset, + .get_wol = dp83822_get_wol, + .set_wol = dp83822_set_wol, + .ack_interrupt = dp83822_ack_interrupt, + .config_intr = dp83822_config_intr, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = dp83822_suspend, + .resume = dp83822_resume, + }, +}; +module_phy_driver(dp83822_driver); + +static struct mdio_device_id __maybe_unused dp83822_tbl[] = { + { DP83822_PHY_ID, 0xfffffff0 }, + { }, +}; +MODULE_DEVICE_TABLE(mdio, dp83822_tbl); + +MODULE_DESCRIPTION("Texas Instruments DP83822 PHY driver"); +MODULE_AUTHOR("Dan Murphy Date: Tue, 10 Oct 2017 12:42:56 -0500 Subject: [PATCH 0832/2177] net: phy: at803x: Change error to EINVAL for invalid MAC Change the return error code to EINVAL if the MAC address is not valid in the set_wol function. Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index c1e52b9dc58d..5f93e6add563 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev, mac = (const u8 *) ndev->dev_addr; if (!is_valid_ether_addr(mac)) - return -EFAULT; + return -EINVAL; for (i = 0; i < 3; i++) { phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, From b784ecba51da465bc15fdd1d6715479f3adc560e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Oct 2017 12:25:48 -0700 Subject: [PATCH 0833/2177] atm: idt77105: Drop needless setup_timer() Calling setup_timer() is redundant when DEFINE_TIMER() has been used. Cc: Chas Williams <3chas3@gmail.com> Cc: linux-atm-general@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/atm/idt77105.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index 082aa02abc57..d781b3f87693 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -306,11 +306,9 @@ static int idt77105_start(struct atm_dev *dev) if (start_timer) { start_timer = 0; - setup_timer(&stats_timer, idt77105_stats_timer_func, 0UL); stats_timer.expires = jiffies+IDT77105_STATS_TIMER_PERIOD; add_timer(&stats_timer); - setup_timer(&restart_timer, idt77105_restart_timer_func, 0UL); restart_timer.expires = jiffies+IDT77105_RESTART_TIMER_PERIOD; add_timer(&restart_timer); } From 7822b0836d2121d7de3d0f9ec636338d7496e5dc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Oct 2017 02:35:23 +0000 Subject: [PATCH 0834/2177] net: hns3: make local functions static Fixes the following sparse warnings: drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c:464:5: warning: symbol 'hns3_change_all_ring_bd_num' was not declared. Should it be static? drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c:477:5: warning: symbol 'hns3_set_ringparam' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 9b36ce081f62..ddbd7f30c6a4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -461,7 +461,8 @@ static int hns3_get_rxnfc(struct net_device *netdev, return 0; } -int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, u32 new_desc_num) +static int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, + u32 new_desc_num) { struct hnae3_handle *h = priv->ae_handle; int i; @@ -474,7 +475,8 @@ int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, u32 new_desc_num) return hns3_init_all_ring(priv); } -int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param) +static int hns3_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *param) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; From ae85bfa87821b9fa60bf846c09a6a0056d87cdb2 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:17 -0700 Subject: [PATCH 0835/2177] net: qrtr: Invoke sk_error_report() after setting sk_err Rather than manually waking up any context sleeping on the sock to signal an error we should call sk_error_report(). This has the added benefit that in-kernel consumers can override this notification with its own callback. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c2f5c13550c0..7e4b49a8349e 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -541,7 +541,7 @@ static void qrtr_reset_ports(void) sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; - wake_up_interruptible(sk_sleep(&ipc->sk)); + ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } mutex_unlock(&qrtr_port_lock); From 28978713c51b0a70acf748f76f9d6d2d20dcf980 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:18 -0700 Subject: [PATCH 0836/2177] net: qrtr: Move constants to header file The constants are used by both the name server and clients, so clarify their value and move them to the uapi header. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/uapi/linux/qrtr.h | 3 +++ net/qrtr/qrtr.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h index 9d76c566f66e..63e8803e4d90 100644 --- a/include/uapi/linux/qrtr.h +++ b/include/uapi/linux/qrtr.h @@ -4,6 +4,9 @@ #include #include +#define QRTR_NODE_BCAST 0xffffffffu +#define QRTR_PORT_CTRL 0xfffffffeu + struct sockaddr_qrtr { __kernel_sa_family_t sq_family; __u32 sq_node; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7e4b49a8349e..15981abc042c 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -61,8 +61,6 @@ struct qrtr_hdr { } __packed; #define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) -#define QRTR_NODE_BCAST ((unsigned int)-1) -#define QRTR_PORT_CTRL ((unsigned int)-2) struct qrtr_sock { /* WARNING: sk must be the first member */ From da7653f0faabbe45eb2d3fd6e4b400fe003e81ae Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:19 -0700 Subject: [PATCH 0837/2177] net: qrtr: Add control packet definition to uapi The QMUX protocol specification defines structure of the special control packet messages being sent between handlers of the control port. Add these to the uapi header, as this structure and the associated types are shared between the kernel and all userspace handlers of control messages. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/uapi/linux/qrtr.h | 32 ++++++++++++++++++++++++++++++++ net/qrtr/qrtr.c | 12 ------------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h index 63e8803e4d90..179af64846e0 100644 --- a/include/uapi/linux/qrtr.h +++ b/include/uapi/linux/qrtr.h @@ -13,4 +13,36 @@ struct sockaddr_qrtr { __u32 sq_port; }; +enum qrtr_pkt_type { + QRTR_TYPE_DATA = 1, + QRTR_TYPE_HELLO = 2, + QRTR_TYPE_BYE = 3, + QRTR_TYPE_NEW_SERVER = 4, + QRTR_TYPE_DEL_SERVER = 5, + QRTR_TYPE_DEL_CLIENT = 6, + QRTR_TYPE_RESUME_TX = 7, + QRTR_TYPE_EXIT = 8, + QRTR_TYPE_PING = 9, + QRTR_TYPE_NEW_LOOKUP = 10, + QRTR_TYPE_DEL_LOOKUP = 11, +}; + +struct qrtr_ctrl_pkt { + __le32 cmd; + + union { + struct { + __le32 service; + __le32 instance; + __le32 node; + __le32 port; + } server; + + struct { + __le32 node; + __le32 port; + } client; + }; +} __packed; + #endif /* _LINUX_QRTR_H */ diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 15981abc042c..d85ca7170b8f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -26,18 +26,6 @@ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff -enum qrtr_pkt_type { - QRTR_TYPE_DATA = 1, - QRTR_TYPE_HELLO = 2, - QRTR_TYPE_BYE = 3, - QRTR_TYPE_NEW_SERVER = 4, - QRTR_TYPE_DEL_SERVER = 5, - QRTR_TYPE_DEL_CLIENT = 6, - QRTR_TYPE_RESUME_TX = 7, - QRTR_TYPE_EXIT = 8, - QRTR_TYPE_PING = 9, -}; - /** * struct qrtr_hdr - (I|R)PCrouter packet header * @version: protocol version From e7044482c8ac5081f4775063995647787d5082a4 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:20 -0700 Subject: [PATCH 0838/2177] net: qrtr: Pass source and destination to enqueue functions Defer writing the message header to the skb until its time to enqueue the packet. As the receive path is reworked to decode the message header as it's received from the transport and only pass around the payload in the skb this change means that we do not have to fill out the full message header just to decode it immediately in qrtr_local_enqueue(). In the future this change also makes it possible to prepend message headers based on the version of each link. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 120 ++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 51 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index d85ca7170b8f..82dc83789310 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -97,8 +97,12 @@ struct qrtr_node { struct list_head item; }; -static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); -static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to); +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to); /* Release node resources and free the node. * @@ -136,10 +140,27 @@ static void qrtr_node_release(struct qrtr_node *node) } /* Pass an outgoing packet socket buffer to the endpoint driver. */ -static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { + struct qrtr_hdr *hdr; + size_t len = skb->len; int rc = -ENODEV; + hdr = skb_push(skb, QRTR_HDR_SIZE); + hdr->version = cpu_to_le32(QRTR_PROTO_VER); + hdr->type = cpu_to_le32(type); + hdr->src_node_id = cpu_to_le32(from->sq_node); + hdr->src_port_id = cpu_to_le32(from->sq_port); + hdr->dst_node_id = cpu_to_le32(to->sq_node); + hdr->dst_port_id = cpu_to_le32(to->sq_port); + + hdr->size = cpu_to_le32(len); + hdr->confirm_rx = 0; + + skb_put_padto(skb, ALIGN(len, 4)); + mutex_lock(&node->ep_lock); if (node->ep) rc = node->ep->xmit(node->ep, skb); @@ -237,23 +258,13 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_post); static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, u32 src_node, u32 dst_node) { - struct qrtr_hdr *hdr; struct sk_buff *skb; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) return NULL; - skb_reset_transport_header(skb); - hdr = skb_put(skb, QRTR_HDR_SIZE); - hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->type = cpu_to_le32(type); - hdr->src_node_id = cpu_to_le32(src_node); - hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); - hdr->confirm_rx = cpu_to_le32(0); - hdr->size = cpu_to_le32(pkt_len); - hdr->dst_node_id = cpu_to_le32(dst_node); - hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); + skb_reserve(skb, QRTR_HDR_SIZE); return skb; } @@ -326,6 +337,8 @@ static void qrtr_port_put(struct qrtr_sock *ipc); static void qrtr_node_rx_work(struct work_struct *work) { struct qrtr_node *node = container_of(work, struct qrtr_node, work); + struct sockaddr_qrtr dst; + struct sockaddr_qrtr src; struct sk_buff *skb; while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { @@ -341,6 +354,11 @@ static void qrtr_node_rx_work(struct work_struct *work) dst_port = le32_to_cpu(phdr->dst_port_id); confirm = !!phdr->confirm_rx; + src.sq_node = src_node; + src.sq_port = le32_to_cpu(phdr->src_port_id); + dst.sq_node = dst_node; + dst.sq_port = dst_port; + qrtr_node_assign(node, src_node); ipc = qrtr_port_lookup(dst_port); @@ -357,7 +375,9 @@ static void qrtr_node_rx_work(struct work_struct *work) skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); if (!skb) break; - if (qrtr_node_enqueue(node, skb)) + + if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, + &dst, &src)) break; } } @@ -407,6 +427,8 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register); void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; + struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL}; + struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL}; struct sk_buff *skb; mutex_lock(&node->ep_lock); @@ -416,7 +438,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) /* Notify the local controller about the event */ skb = qrtr_alloc_local_bye(node->nid); if (skb) - qrtr_local_enqueue(NULL, skb); + qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); qrtr_node_release(node); ep->node = NULL; @@ -454,11 +476,17 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) { struct sk_buff *skb; int port = ipc->us.sq_port; + struct sockaddr_qrtr to; + + to.sq_family = AF_QIPCRTR; + to.sq_node = QRTR_NODE_BCAST; + to.sq_port = QRTR_PORT_CTRL; skb = qrtr_alloc_del_client(&ipc->us); if (skb) { skb_set_owner_w(skb, &ipc->sk); - qrtr_bcast_enqueue(NULL, skb); + qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us, + &to); } if (port == QRTR_PORT_CTRL) @@ -606,19 +634,25 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) } /* Queue packet to local peer socket. */ -static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { - const struct qrtr_hdr *phdr; struct qrtr_sock *ipc; + struct qrtr_hdr *phdr; - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - - ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); + ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ kfree_skb(skb); return -ENODEV; } + phdr = skb_push(skb, QRTR_HDR_SIZE); + skb_reset_transport_header(skb); + + phdr->src_node_id = cpu_to_le32(from->sq_node); + phdr->src_port_id = cpu_to_le32(from->sq_port); + if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); kfree_skb(skb); @@ -631,7 +665,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) } /* Queue packet for broadcast. */ -static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { struct sk_buff *skbn; @@ -641,11 +677,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) if (!skbn) break; skb_set_owner_w(skbn, skb->sk); - qrtr_node_enqueue(node, skbn); + qrtr_node_enqueue(node, skbn, type, from, to); } mutex_unlock(&qrtr_node_lock); - qrtr_local_enqueue(node, skb); + qrtr_local_enqueue(node, skb, type, from, to); return 0; } @@ -653,13 +689,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); - int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); + int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int, + struct sockaddr_qrtr *, struct sockaddr_qrtr *); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct qrtr_node *node; - struct qrtr_hdr *hdr; struct sk_buff *skb; size_t plen; + u32 type = QRTR_TYPE_DATA; int rc; if (msg->msg_flags & ~(MSG_DONTWAIT)) @@ -713,32 +750,14 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (!skb) goto out_node; - skb_reset_transport_header(skb); - skb_put(skb, len + QRTR_HDR_SIZE); + skb_reserve(skb, QRTR_HDR_SIZE); - hdr = (struct qrtr_hdr *)skb_transport_header(skb); - hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); - hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); - hdr->confirm_rx = cpu_to_le32(0); - hdr->size = cpu_to_le32(len); - hdr->dst_node_id = cpu_to_le32(addr->sq_node); - hdr->dst_port_id = cpu_to_le32(addr->sq_port); - - rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, - &msg->msg_iter, len); + rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc) { kfree_skb(skb); goto out_node; } - if (plen != len) { - rc = skb_pad(skb, plen - len); - if (rc) - goto out_node; - skb_put(skb, plen - len); - } - if (ipc->us.sq_port == QRTR_PORT_CTRL) { if (len < 4) { rc = -EINVAL; @@ -747,12 +766,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } /* control messages already require the type as 'command' */ - skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); - } else { - hdr->type = cpu_to_le32(QRTR_TYPE_DATA); + skb_copy_bits(skb, 0, &type, 4); + type = le32_to_cpu(type); } - rc = enqueue_fn(node, skb); + rc = enqueue_fn(node, skb, type, &ipc->us, addr); if (rc >= 0) rc = len; From 1a7959c76641674991ba3a49f25432f1e105391e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:21 -0700 Subject: [PATCH 0839/2177] net: qrtr: Clean up control packet handling As the message header generation is deferred the internal functions for generating control packets can be simplified. This patch modifies qrtr_alloc_ctrl_packet() to, in addition to the sk_buff, return a reference to a struct qrtr_ctrl_pkt, which clarifies and simplifies the helpers to the point that these functions can be folded back into the callers. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 93 +++++++++++++++---------------------------------- 1 file changed, 29 insertions(+), 64 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 82dc83789310..a84edba7b1ef 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -255,9 +255,18 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, - u32 src_node, u32 dst_node) +/** + * qrtr_alloc_ctrl_packet() - allocate control packet skb + * @pkt: reference to qrtr_ctrl_pkt pointer + * + * Returns newly allocated sk_buff, or NULL on failure + * + * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and + * on success returns a reference to the control packet in @pkt. + */ +static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt) { + const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); @@ -265,64 +274,7 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, return NULL; skb_reserve(skb, QRTR_HDR_SIZE); - - return skb; -} - -/* Allocate and construct a resume-tx packet. */ -static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, - u32 dst_node, u32 port) -{ - const int pkt_len = 20; - struct sk_buff *skb; - __le32 *buf; - - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, - src_node, dst_node); - if (!skb) - return NULL; - - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); - buf[1] = cpu_to_le32(src_node); - buf[2] = cpu_to_le32(port); - - return skb; -} - -/* Allocate and construct a BYE message to signal remote termination */ -static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) -{ - const int pkt_len = 20; - struct sk_buff *skb; - __le32 *buf; - - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, - src_node, qrtr_local_nid); - if (!skb) - return NULL; - - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_BYE); - - return skb; -} - -static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) -{ - const int pkt_len = 20; - struct sk_buff *skb; - __le32 *buf; - - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, - sq->sq_node, QRTR_NODE_BCAST); - if (!skb) - return NULL; - - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); - buf[1] = cpu_to_le32(sq->sq_node); - buf[2] = cpu_to_le32(sq->sq_port); + *pkt = skb_put_zero(skb, pkt_len); return skb; } @@ -337,6 +289,7 @@ static void qrtr_port_put(struct qrtr_sock *ipc); static void qrtr_node_rx_work(struct work_struct *work) { struct qrtr_node *node = container_of(work, struct qrtr_node, work); + struct qrtr_ctrl_pkt *pkt; struct sockaddr_qrtr dst; struct sockaddr_qrtr src; struct sk_buff *skb; @@ -372,10 +325,14 @@ static void qrtr_node_rx_work(struct work_struct *work) } if (confirm) { - skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); + skb = qrtr_alloc_ctrl_packet(&pkt); if (!skb) break; + pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX); + pkt->client.node = cpu_to_le32(dst.sq_node); + pkt->client.port = cpu_to_le32(dst.sq_port); + if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &dst, &src)) break; @@ -429,6 +386,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) struct qrtr_node *node = ep->node; struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL}; struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL}; + struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; mutex_lock(&node->ep_lock); @@ -436,9 +394,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) mutex_unlock(&node->ep_lock); /* Notify the local controller about the event */ - skb = qrtr_alloc_local_bye(node->nid); - if (skb) + skb = qrtr_alloc_ctrl_packet(&pkt); + if (skb) { + pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); + } qrtr_node_release(node); ep->node = NULL; @@ -474,6 +434,7 @@ static void qrtr_port_put(struct qrtr_sock *ipc) /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { + struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; int port = ipc->us.sq_port; struct sockaddr_qrtr to; @@ -482,8 +443,12 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) to.sq_node = QRTR_NODE_BCAST; to.sq_port = QRTR_PORT_CTRL; - skb = qrtr_alloc_del_client(&ipc->us); + skb = qrtr_alloc_ctrl_packet(&pkt); if (skb) { + pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); + pkt->client.node = cpu_to_le32(ipc->us.sq_node); + pkt->client.port = cpu_to_le32(ipc->us.sq_port); + skb_set_owner_w(skb, &ipc->sk); qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us, &to); From f507a9b6e63b9f41b61d199c36ae046d17b5fe4b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:22 -0700 Subject: [PATCH 0840/2177] net: qrtr: Use sk_buff->cb in receive path Rather than parsing the header of incoming messages throughout the implementation do it once when we retrieve the message and store the relevant information in the "cb" member of the sk_buff. This allows us to, in a later commit, decode version 2 messages into this same structure. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 70 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index a84edba7b1ef..7bca6ec892a5 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -48,6 +48,16 @@ struct qrtr_hdr { __le32 dst_port_id; } __packed; +struct qrtr_cb { + u32 src_node; + u32 src_port; + u32 dst_node; + u32 dst_port; + + u8 type; + u8 confirm_rx; +}; + #define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) struct qrtr_sock { @@ -216,6 +226,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) struct qrtr_node *node = ep->node; const struct qrtr_hdr *phdr = data; struct sk_buff *skb; + struct qrtr_cb *cb; unsigned int psize; unsigned int size; unsigned int type; @@ -245,8 +256,15 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!skb) return -ENOMEM; - skb_reset_transport_header(skb); - skb_put_data(skb, data, len); + cb = (struct qrtr_cb *)skb->cb; + cb->src_node = le32_to_cpu(phdr->src_node_id); + cb->src_port = le32_to_cpu(phdr->src_port_id); + cb->dst_node = le32_to_cpu(phdr->dst_node_id); + cb->dst_port = le32_to_cpu(phdr->dst_port_id); + cb->type = type; + cb->confirm_rx = !!phdr->confirm_rx; + + skb_put_data(skb, data + QRTR_HDR_SIZE, size); skb_queue_tail(&node->rx_queue, skb); schedule_work(&node->work); @@ -295,26 +313,20 @@ static void qrtr_node_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { - const struct qrtr_hdr *phdr; - u32 dst_node, dst_port; struct qrtr_sock *ipc; - u32 src_node; + struct qrtr_cb *cb; int confirm; - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - src_node = le32_to_cpu(phdr->src_node_id); - dst_node = le32_to_cpu(phdr->dst_node_id); - dst_port = le32_to_cpu(phdr->dst_port_id); - confirm = !!phdr->confirm_rx; + cb = (struct qrtr_cb *)skb->cb; + src.sq_node = cb->src_node; + src.sq_port = cb->src_port; + dst.sq_node = cb->dst_node; + dst.sq_port = cb->dst_port; + confirm = !!cb->confirm_rx; - src.sq_node = src_node; - src.sq_port = le32_to_cpu(phdr->src_port_id); - dst.sq_node = dst_node; - dst.sq_port = dst_port; + qrtr_node_assign(node, cb->src_node); - qrtr_node_assign(node, src_node); - - ipc = qrtr_port_lookup(dst_port); + ipc = qrtr_port_lookup(cb->dst_port); if (!ipc) { kfree_skb(skb); } else { @@ -604,7 +616,7 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, struct sockaddr_qrtr *to) { struct qrtr_sock *ipc; - struct qrtr_hdr *phdr; + struct qrtr_cb *cb; ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ @@ -612,11 +624,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, return -ENODEV; } - phdr = skb_push(skb, QRTR_HDR_SIZE); - skb_reset_transport_header(skb); - - phdr->src_node_id = cpu_to_le32(from->sq_node); - phdr->src_port_id = cpu_to_le32(from->sq_port); + cb = (struct qrtr_cb *)skb->cb; + cb->src_node = from->sq_node; + cb->src_port = from->sq_port; if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); @@ -750,9 +760,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); - const struct qrtr_hdr *phdr; struct sock *sk = sock->sk; struct sk_buff *skb; + struct qrtr_cb *cb; int copied, rc; lock_sock(sk); @@ -769,22 +779,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return rc; } - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - copied = le32_to_cpu(phdr->size); + copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } - rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); + rc = skb_copy_datagram_msg(skb, 0, msg, copied); if (rc < 0) goto out; rc = copied; if (addr) { + cb = (struct qrtr_cb *)skb->cb; addr->sq_family = AF_QIPCRTR; - addr->sq_node = le32_to_cpu(phdr->src_node_id); - addr->sq_port = le32_to_cpu(phdr->src_port_id); + addr->sq_node = cb->src_node; + addr->sq_port = cb->src_port; msg->msg_namelen = sizeof(*addr); } @@ -877,7 +887,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: skb = skb_peek(&sk->sk_receive_queue); if (skb) - len = skb->len - QRTR_HDR_SIZE; + len = skb->len; rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: From 194ccc88297ae78d0803adad83c6dcc369787c9e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 23:45:23 -0700 Subject: [PATCH 0841/2177] net: qrtr: Support decoding incoming v2 packets Add the necessary logic for decoding incoming messages of version 2 as well. Also make sure there's room for the bigger of version 1 and 2 headers in the code allocating skbs for outgoing messages. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 134 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 39 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7bca6ec892a5..e458ece96d3d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -20,14 +20,15 @@ #include "qrtr.h" -#define QRTR_PROTO_VER 1 +#define QRTR_PROTO_VER_1 1 +#define QRTR_PROTO_VER_2 3 /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff /** - * struct qrtr_hdr - (I|R)PCrouter packet header + * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @src_node_id: source node @@ -37,7 +38,7 @@ * @dst_node_id: destination node * @dst_port_id: destination port */ -struct qrtr_hdr { +struct qrtr_hdr_v1 { __le32 version; __le32 type; __le32 src_node_id; @@ -48,6 +49,32 @@ struct qrtr_hdr { __le32 dst_port_id; } __packed; +/** + * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions + * @version: protocol version + * @type: packet type; one of QRTR_TYPE_* + * @flags: bitmask of QRTR_FLAGS_* + * @optlen: length of optional header data + * @size: length of packet, excluding this header and optlen + * @src_node_id: source node + * @src_port_id: source port + * @dst_node_id: destination node + * @dst_port_id: destination port + */ +struct qrtr_hdr_v2 { + u8 version; + u8 type; + u8 flags; + u8 optlen; + __le32 size; + __le16 src_node_id; + __le16 src_port_id; + __le16 dst_node_id; + __le16 dst_port_id; +}; + +#define QRTR_FLAGS_CONFIRM_RX BIT(0) + struct qrtr_cb { u32 src_node; u32 src_port; @@ -58,7 +85,8 @@ struct qrtr_cb { u8 confirm_rx; }; -#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) +#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \ + sizeof(struct qrtr_hdr_v2)) struct qrtr_sock { /* WARNING: sk must be the first member */ @@ -154,12 +182,12 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { - struct qrtr_hdr *hdr; + struct qrtr_hdr_v1 *hdr; size_t len = skb->len; int rc = -ENODEV; - hdr = skb_push(skb, QRTR_HDR_SIZE); - hdr->version = cpu_to_le32(QRTR_PROTO_VER); + hdr = skb_push(skb, sizeof(*hdr)); + hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(from->sq_node); hdr->src_port_id = cpu_to_le32(from->sq_port); @@ -224,32 +252,15 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) { struct qrtr_node *node = ep->node; - const struct qrtr_hdr *phdr = data; + const struct qrtr_hdr_v1 *v1; + const struct qrtr_hdr_v2 *v2; struct sk_buff *skb; struct qrtr_cb *cb; - unsigned int psize; unsigned int size; - unsigned int type; unsigned int ver; - unsigned int dst; + size_t hdrlen; - if (len < QRTR_HDR_SIZE || len & 3) - return -EINVAL; - - ver = le32_to_cpu(phdr->version); - size = le32_to_cpu(phdr->size); - type = le32_to_cpu(phdr->type); - dst = le32_to_cpu(phdr->dst_port_id); - - psize = (size + 3) & ~3; - - if (ver != QRTR_PROTO_VER) - return -EINVAL; - - if (len != psize + QRTR_HDR_SIZE) - return -EINVAL; - - if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) + if (len & 3) return -EINVAL; skb = netdev_alloc_skb(NULL, len); @@ -257,19 +268,64 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) return -ENOMEM; cb = (struct qrtr_cb *)skb->cb; - cb->src_node = le32_to_cpu(phdr->src_node_id); - cb->src_port = le32_to_cpu(phdr->src_port_id); - cb->dst_node = le32_to_cpu(phdr->dst_node_id); - cb->dst_port = le32_to_cpu(phdr->dst_port_id); - cb->type = type; - cb->confirm_rx = !!phdr->confirm_rx; - skb_put_data(skb, data + QRTR_HDR_SIZE, size); + /* Version field in v1 is little endian, so this works for both cases */ + ver = *(u8*)data; + + switch (ver) { + case QRTR_PROTO_VER_1: + v1 = data; + hdrlen = sizeof(*v1); + + cb->type = le32_to_cpu(v1->type); + cb->src_node = le32_to_cpu(v1->src_node_id); + cb->src_port = le32_to_cpu(v1->src_port_id); + cb->confirm_rx = !!v1->confirm_rx; + cb->dst_node = le32_to_cpu(v1->dst_node_id); + cb->dst_port = le32_to_cpu(v1->dst_port_id); + + size = le32_to_cpu(v1->size); + break; + case QRTR_PROTO_VER_2: + v2 = data; + hdrlen = sizeof(*v2) + v2->optlen; + + cb->type = v2->type; + cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX); + cb->src_node = le16_to_cpu(v2->src_node_id); + cb->src_port = le16_to_cpu(v2->src_port_id); + cb->dst_node = le16_to_cpu(v2->dst_node_id); + cb->dst_port = le16_to_cpu(v2->dst_port_id); + + if (cb->src_port == (u16)QRTR_PORT_CTRL) + cb->src_port = QRTR_PORT_CTRL; + if (cb->dst_port == (u16)QRTR_PORT_CTRL) + cb->dst_port = QRTR_PORT_CTRL; + + size = le32_to_cpu(v2->size); + break; + default: + pr_err("qrtr: Invalid version %d\n", ver); + goto err; + } + + if (len != ALIGN(size, 4) + hdrlen) + goto err; + + if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA) + goto err; + + skb_put_data(skb, data + hdrlen, size); skb_queue_tail(&node->rx_queue, skb); schedule_work(&node->work); return 0; + +err: + kfree_skb(skb); + return -EINVAL; + } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); @@ -287,11 +343,11 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt) const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; - skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); + skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL); if (!skb) return NULL; - skb_reserve(skb, QRTR_HDR_SIZE); + skb_reserve(skb, QRTR_HDR_MAX_SIZE); *pkt = skb_put_zero(skb, pkt_len); return skb; @@ -720,12 +776,12 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } plen = (len + 3) & ~3; - skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, + skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) goto out_node; - skb_reserve(skb, QRTR_HDR_SIZE); + skb_reserve(skb, QRTR_HDR_MAX_SIZE); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc) { From d7e6b347560d1824d7bccfa307ad34bd3f133706 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 11 Oct 2017 14:52:23 -0500 Subject: [PATCH 0842/2177] net: qcom/emac: specify the correct DMA mask The 64/32-bit DMA mask hackery in the EMAC driver is not actually necessary, and is technically not accurate. The EMAC hardware is limted to a 45-bit DMA address. Although no EMAC-enabled system can have that much DDR, an IOMMU could possible provide a larger address. Rather than play games with the DMA mappings, the driver should provide a correct value and trust the DMA/IOMMU layers to do the right thing. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index f477ba29c569..ee6f2d27502c 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -615,20 +615,11 @@ static int emac_probe(struct platform_device *pdev) u32 reg; int ret; - /* The EMAC itself is capable of 64-bit DMA, so try that first. */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + /* The TPD buffer address is limited to 45 bits. */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45)); if (ret) { - /* Some platforms may restrict the EMAC's address bus to less - * then the size of DDR. In this case, we need to try a - * smaller mask. We could try every possible smaller mask, - * but that's overkill. Instead, just fall to 32-bit, which - * should always work. - */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(&pdev->dev, "could not set DMA mask\n"); - return ret; - } + dev_err(&pdev->dev, "could not set DMA mask\n"); + return ret; } netdev = alloc_etherdev(sizeof(struct emac_adapter)); From 3958ffcd85060967a9c70bb92b21741073578d66 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 11 Oct 2017 14:52:24 -0500 Subject: [PATCH 0843/2177] net: qcom/emac: remove unused address arrays The EMAC is capable of multiple TX and RX rings, but the driver only supports one ring for each. One function had some left-over unused code that supports multiple rings, but all it did was make the code harder to read. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 3ed9033e56db..9cbb27263742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -309,22 +309,12 @@ void emac_mac_mode_config(struct emac_adapter *adpt) /* Config descriptor rings */ static void emac_mac_dma_rings_config(struct emac_adapter *adpt) { - static const unsigned short tpd_q_offset[] = { - EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO, - EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO}; - static const unsigned short rfd_q_offset[] = { - EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10, - EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13}; - static const unsigned short rrd_q_offset[] = { - EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14, - EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16}; - /* TPD (Transmit Packet Descriptor) */ writel(upper_32_bits(adpt->tx_q.tpd.dma_addr), adpt->base + EMAC_DESC_CTRL_1); writel(lower_32_bits(adpt->tx_q.tpd.dma_addr), - adpt->base + tpd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_8); writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK, adpt->base + EMAC_DESC_CTRL_9); @@ -334,9 +324,9 @@ static void emac_mac_dma_rings_config(struct emac_adapter *adpt) adpt->base + EMAC_DESC_CTRL_0); writel(lower_32_bits(adpt->rx_q.rfd.dma_addr), - adpt->base + rfd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_2); writel(lower_32_bits(adpt->rx_q.rrd.dma_addr), - adpt->base + rrd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_5); writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK, adpt->base + EMAC_DESC_CTRL_3); From df1ec1b9d0df57e96011f175418dc95b1af46821 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 11 Oct 2017 14:52:25 -0500 Subject: [PATCH 0844/2177] net: qcom/emac: enforce DMA address restrictions The EMAC has a restriction that the upper 32 bits of the base addresses for the RFD and RRD rings must be the same. The ensure that restriction, we allocate twice the space for the RRD and locate it at an appropriate address. We also re-arrange the allocations so that invalid addresses are even less likely. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 9cbb27263742..0f5ece5d9507 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -734,6 +734,11 @@ static int emac_rx_descs_alloc(struct emac_adapter *adpt) rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4); rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4); + /* Check if the RRD and RFD are aligned properly, and if not, adjust. */ + if (upper_32_bits(ring_header->dma_addr) != + upper_32_bits(ring_header->dma_addr + ALIGN(rx_q->rrd.size, 8))) + ring_header->used = ALIGN(rx_q->rrd.size, 8); + rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used; rx_q->rrd.v_addr = ring_header->v_addr + ring_header->used; ring_header->used += ALIGN(rx_q->rrd.size, 8); @@ -767,11 +772,18 @@ int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment, * hence the additional padding bytes are allocated. + * + * Also double the memory allocated for the RRD so that we can + * re-align it if necessary. The EMAC has a restriction that the + * upper 32 bits of the base addresses for the RFD and RRD rings + * must be the same. It is extremely unlikely that this is not the + * case, since the rings are only a few KB in size. However, we + * need to check for this anyway, and if the two rings are not + * compliant, then we re-align. */ - ring_header->size = num_tx_descs * (adpt->tpd_size * 4) + - num_rx_descs * (adpt->rfd_size * 4) + - num_rx_descs * (adpt->rrd_size * 4) + - 8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */ + ring_header->size = ALIGN(num_tx_descs * (adpt->tpd_size * 4), 8) + + ALIGN(num_rx_descs * (adpt->rfd_size * 4), 8) + + ALIGN(num_rx_descs * (adpt->rrd_size * 4), 8) * 2; ring_header->used = 0; ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size, @@ -780,26 +792,23 @@ int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) if (!ring_header->v_addr) return -ENOMEM; - ring_header->used = ALIGN(ring_header->dma_addr, 8) - - ring_header->dma_addr; - - ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); - if (ret) { - netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n"); - goto err_alloc_tx; - } - ret = emac_rx_descs_alloc(adpt); if (ret) { netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n"); goto err_alloc_rx; } + ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); + if (ret) { + netdev_err(adpt->netdev, "transmit queue allocation failed\n"); + goto err_alloc_tx; + } + return 0; -err_alloc_rx: - emac_tx_q_bufs_free(adpt); err_alloc_tx: + emac_rx_q_bufs_free(adpt); +err_alloc_rx: dma_free_coherent(dev, ring_header->size, ring_header->v_addr, ring_header->dma_addr); From 740d6f188fb71ae13e3e9f7208b6b3094517509d Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 11 Oct 2017 14:52:26 -0500 Subject: [PATCH 0845/2177] net: qcom/emac: clean up some TX/RX error messages Some of the error messages that are printed by the interrupt handlers are poorly written. For example, many don't include a device prefix, so there's no indication that they are EMAC errors. Also use rate limiting for all messages that could be printed from interrupt context. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 15 ++++++--------- drivers/net/ethernet/qualcomm/emac/emac.c | 8 ++++---- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 29ba37a08372..e8ab512ee7e3 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -68,10 +68,10 @@ static void emac_sgmii_link_init(struct emac_adapter *adpt) writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); } -static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) +static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u8 irq_bits) { struct emac_sgmii *phy = &adpt->phy; - u32 status; + u8 status; writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD); @@ -86,9 +86,8 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) EMAC_SGMII_PHY_INTERRUPT_STATUS, status, !(status & irq_bits), 1, SGMII_PHY_IRQ_CLR_WAIT_TIME)) { - netdev_err(adpt->netdev, - "error: failed clear SGMII irq: status:0x%x bits:0x%x\n", - status, irq_bits); + net_err_ratelimited("%s: failed to clear SGMII irq: status:0x%x bits:0x%x\n", + adpt->netdev->name, status, irq_bits); return -EIO; } @@ -109,7 +108,7 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data) { struct emac_adapter *adpt = data; struct emac_sgmii *phy = &adpt->phy; - u32 status; + u8 status; status = readl(phy->base + EMAC_SGMII_PHY_INTERRUPT_STATUS); status &= SGMII_ISR_MASK; @@ -139,10 +138,8 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data) atomic_set(&phy->decode_error_count, 0); } - if (emac_sgmii_irq_clear(adpt, status)) { - netdev_warn(adpt->netdev, "failed to clear SGMII interrupt\n"); + if (emac_sgmii_irq_clear(adpt, status)) schedule_work(&adpt->work_thread); - } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index ee6f2d27502c..70c92b649b29 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -148,9 +148,8 @@ static irqreturn_t emac_isr(int _irq, void *data) goto exit; if (status & ISR_ERROR) { - netif_warn(adpt, intr, adpt->netdev, - "warning: error irq status 0x%lx\n", - status & ISR_ERROR); + net_err_ratelimited("%s: error interrupt 0x%lx\n", + adpt->netdev->name, status & ISR_ERROR); /* reset MAC */ schedule_work(&adpt->work_thread); } @@ -169,7 +168,8 @@ static irqreturn_t emac_isr(int _irq, void *data) emac_mac_tx_process(adpt, &adpt->tx_q); if (status & ISR_OVER) - net_warn_ratelimited("warning: TX/RX overflow\n"); + net_warn_ratelimited("%s: TX/RX overflow interrupt\n", + adpt->netdev->name); exit: /* enable the interrupt */ From d8bbb07adbfab5a8e03c361c7dd67fe0003d3757 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:52 -0600 Subject: [PATCH 0846/2177] net: qualcomm: rmnet: Remove existing logic for bridge mode This will be rewritten in the following patches. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.h | 1 - .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 77 +++---------------- 2 files changed, 9 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index dde4e9f14f4a..0b0c5a79c1dc 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -34,7 +34,6 @@ struct rmnet_endpoint { */ struct rmnet_port { struct net_device *dev; - struct rmnet_endpoint local_ep; struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP]; u32 ingress_data_format; u32 egress_data_format; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 540c7622dcb1..b50f40181661 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -44,56 +44,18 @@ static void rmnet_set_skb_proto(struct sk_buff *skb) /* Generic handler */ static rx_handler_result_t -rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep) +rmnet_deliver_skb(struct sk_buff *skb) { - if (!ep->egress_dev) - kfree_skb(skb); - else - rmnet_egress_handler(skb, ep); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + rmnet_vnd_rx_fixup(skb, skb->dev); + skb->pkt_type = PACKET_HOST; + skb_set_mac_header(skb, 0); + netif_receive_skb(skb); return RX_HANDLER_CONSUMED; } -static rx_handler_result_t -rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep) -{ - switch (ep->rmnet_mode) { - case RMNET_EPMODE_NONE: - return RX_HANDLER_PASS; - - case RMNET_EPMODE_BRIDGE: - return rmnet_bridge_handler(skb, ep); - - case RMNET_EPMODE_VND: - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - rmnet_vnd_rx_fixup(skb, skb->dev); - - skb->pkt_type = PACKET_HOST; - skb_set_mac_header(skb, 0); - netif_receive_skb(skb); - return RX_HANDLER_CONSUMED; - - default: - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } -} - -static rx_handler_result_t -rmnet_ingress_deliver_packet(struct sk_buff *skb, - struct rmnet_port *port) -{ - if (!port) { - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } - - skb->dev = port->local_ep.egress_dev; - - return rmnet_deliver_skb(skb, &port->local_ep); -} - /* MAP handler */ static rx_handler_result_t @@ -130,7 +92,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, skb_pull(skb, sizeof(struct rmnet_map_header)); skb_trim(skb, len); rmnet_set_skb_proto(skb); - return rmnet_deliver_skb(skb, ep); + return rmnet_deliver_skb(skb); } static rx_handler_result_t @@ -204,29 +166,8 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port(dev); - if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) { + if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) rc = rmnet_map_ingress_handler(skb, port); - } else { - switch (ntohs(skb->protocol)) { - case ETH_P_MAP: - if (port->local_ep.rmnet_mode == - RMNET_EPMODE_BRIDGE) { - rc = rmnet_ingress_deliver_packet(skb, port); - } else { - kfree_skb(skb); - rc = RX_HANDLER_CONSUMED; - } - break; - - case ETH_P_IP: - case ETH_P_IPV6: - rc = rmnet_ingress_deliver_packet(skb, port); - break; - - default: - rc = RX_HANDLER_PASS; - } - } return rc; } From 1281726ec341702b16c3e67ea37ba485cf72ee66 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:53 -0600 Subject: [PATCH 0847/2177] net: qualcomm: rmnet: Remove some unused defines Most of these constants were used in the initial patchset where custom netlink configuration was used and hence are no longer relevant. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h index 7967198fdd90..49102f922b31 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h @@ -19,23 +19,15 @@ #define RMNET_TX_QUEUE_LEN 1000 /* Constants */ -#define RMNET_EGRESS_FORMAT__RESERVED__ BIT(0) #define RMNET_EGRESS_FORMAT_MAP BIT(1) #define RMNET_EGRESS_FORMAT_AGGREGATION BIT(2) #define RMNET_EGRESS_FORMAT_MUXING BIT(3) -#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3 BIT(4) -#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4 BIT(5) -#define RMNET_INGRESS_FIX_ETHERNET BIT(0) #define RMNET_INGRESS_FORMAT_MAP BIT(1) #define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(2) #define RMNET_INGRESS_FORMAT_DEMUXING BIT(3) #define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(4) -#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3 BIT(5) -#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4 BIT(6) -/* Pass the frame up the stack with no modifications to skb->dev */ -#define RMNET_EPMODE_NONE (0) /* Replace skb->dev to a virtual rmnet device and pass up the stack */ #define RMNET_EPMODE_VND (1) /* Pass the frame directly to another device with dev_queue_xmit() */ From 9148963201a4627a632d2c769805c9278f6d22d7 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:54 -0600 Subject: [PATCH 0848/2177] net: qualcomm: rmnet: Move rmnet_mode to rmnet_port Mode information on the real device makes it easier to route packets to rmnet device or bridged device based on the configuration. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 12 +++++------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 8403eea08d0e..85fce9c4d234 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -124,20 +124,17 @@ static int rmnet_register_real_device(struct net_device *real_dev) } static void rmnet_set_endpoint_config(struct net_device *dev, - u8 mux_id, u8 rmnet_mode, - struct net_device *egress_dev) + u8 mux_id, struct net_device *egress_dev) { struct rmnet_endpoint *ep; - netdev_dbg(dev, "id %d mode %d dev %s\n", - mux_id, rmnet_mode, egress_dev->name); + netdev_dbg(dev, "id %d dev %s\n", mux_id, egress_dev->name); ep = rmnet_get_endpoint(dev, mux_id); /* This config is cleared on every set, so its ok to not * clear it on a device delete. */ memset(ep, 0, sizeof(struct rmnet_endpoint)); - ep->rmnet_mode = rmnet_mode; ep->egress_dev = egress_dev; ep->mux_id = mux_id; } @@ -183,9 +180,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, ingress_format, egress_format); port->egress_data_format = egress_format; port->ingress_data_format = ingress_format; + port->rmnet_mode = mode; - rmnet_set_endpoint_config(real_dev, mux_id, mode, dev); - rmnet_set_endpoint_config(dev, mux_id, mode, real_dev); + rmnet_set_endpoint_config(real_dev, mux_id, dev); + rmnet_set_endpoint_config(dev, mux_id, real_dev); return 0; err2: diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 0b0c5a79c1dc..03d473f39476 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -24,7 +24,6 @@ * Exact usage of this parameter depends on the rmnet_mode. */ struct rmnet_endpoint { - u8 rmnet_mode; u8 mux_id; struct net_device *egress_dev; }; @@ -39,6 +38,7 @@ struct rmnet_port { u32 egress_data_format; struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP]; u8 nr_rmnet_devs; + u8 rmnet_mode; }; extern struct rtnl_link_ops rmnet_link_ops; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index b50f40181661..86e37cc3b52c 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -205,8 +205,7 @@ void rmnet_egress_handler(struct sk_buff *skb, } } - if (ep->rmnet_mode == RMNET_EPMODE_VND) - rmnet_vnd_tx_fixup(skb, orig_dev); + rmnet_vnd_tx_fixup(skb, orig_dev); dev_queue_xmit(skb); } From 56470c927f1ba1e101b5e5a93e02d23a14fd99b7 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:55 -0600 Subject: [PATCH 0849/2177] net: qualcomm: rmnet: Remove duplicate setting of rmnet private info The end point is set twice in the local_ep as well as the mux_id and the real_dev in the rmnet private structure. Remove the local_ep. While these elements are equivalent, rmnet_endpoint will be used only as part of the rmnet_port for muxed scenarios in VND mode. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.c | 10 ++-------- .../ethernet/qualcomm/rmnet/rmnet_config.h | 4 ---- .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 18 ++++++++++-------- .../ethernet/qualcomm/rmnet/rmnet_handlers.h | 3 +-- .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 19 ++----------------- .../net/ethernet/qualcomm/rmnet/rmnet_vnd.h | 1 - 6 files changed, 15 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 85fce9c4d234..96058bbccf71 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -67,13 +67,8 @@ rmnet_get_endpoint(struct net_device *dev, int config_id) struct rmnet_endpoint *ep; struct rmnet_port *port; - if (!rmnet_is_real_dev_registered(dev)) { - ep = rmnet_vnd_get_endpoint(dev); - } else { - port = rmnet_get_port_rtnl(dev); - - ep = &port->muxed_ep[config_id]; - } + port = rmnet_get_port_rtnl(dev); + ep = &port->muxed_ep[config_id]; return ep; } @@ -183,7 +178,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, port->rmnet_mode = mode; rmnet_set_endpoint_config(real_dev, mux_id, dev); - rmnet_set_endpoint_config(dev, mux_id, real_dev); return 0; err2: diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 03d473f39476..c5f5c6d957c0 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -20,9 +20,6 @@ #define RMNET_MAX_LOGICAL_EP 255 -/* Information about the next device to deliver the packet to. - * Exact usage of this parameter depends on the rmnet_mode. - */ struct rmnet_endpoint { u8 mux_id; struct net_device *egress_dev; @@ -44,7 +41,6 @@ struct rmnet_port { extern struct rtnl_link_ops rmnet_link_ops; struct rmnet_priv { - struct rmnet_endpoint local_ep; u8 mux_id; struct net_device *real_dev; }; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 86e37cc3b52c..e0802d32d6a5 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -116,8 +116,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb, } static int rmnet_map_egress_handler(struct sk_buff *skb, - struct rmnet_port *port, - struct rmnet_endpoint *ep, + struct rmnet_port *port, u8 mux_id, struct net_device *orig_dev) { int required_headroom, additional_header_len; @@ -136,10 +135,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, return RMNET_MAP_CONSUMED; if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) { - if (ep->mux_id == 0xff) + if (mux_id == 0xff) map_header->mux_id = 0; else - map_header->mux_id = ep->mux_id; + map_header->mux_id = mux_id; } skb->protocol = htons(ETH_P_MAP); @@ -176,14 +175,17 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) * for egress device configured in logical endpoint. Packet is then transmitted * on the egress device. */ -void rmnet_egress_handler(struct sk_buff *skb, - struct rmnet_endpoint *ep) +void rmnet_egress_handler(struct sk_buff *skb) { struct net_device *orig_dev; struct rmnet_port *port; + struct rmnet_priv *priv; + u8 mux_id; orig_dev = skb->dev; - skb->dev = ep->egress_dev; + priv = netdev_priv(orig_dev); + skb->dev = priv->real_dev; + mux_id = priv->mux_id; port = rmnet_get_port(skb->dev); if (!port) { @@ -192,7 +194,7 @@ void rmnet_egress_handler(struct sk_buff *skb, } if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) { - switch (rmnet_map_egress_handler(skb, port, ep, orig_dev)) { + switch (rmnet_map_egress_handler(skb, port, mux_id, orig_dev)) { case RMNET_MAP_CONSUMED: return; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h index f2638cf5693c..3537e4ceedb3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h @@ -18,8 +18,7 @@ #include "rmnet_config.h" -void rmnet_egress_handler(struct sk_buff *skb, - struct rmnet_endpoint *ep); +void rmnet_egress_handler(struct sk_buff *skb); rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 7f90d5587653..4ca59a4389b8 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -45,8 +45,8 @@ static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb, struct rmnet_priv *priv; priv = netdev_priv(dev); - if (priv->local_ep.egress_dev) { - rmnet_egress_handler(skb, &priv->local_ep); + if (priv->real_dev) { + rmnet_egress_handler(skb); } else { dev->stats.tx_dropped++; kfree_skb(skb); @@ -143,21 +143,6 @@ u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) return priv->mux_id; } -/* Gets the logical endpoint configuration for a RmNet virtual network device - * node. Caller should confirm that devices is a RmNet VND before calling. - */ -struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - if (!rmnet_dev) - return NULL; - - priv = netdev_priv(rmnet_dev); - - return &priv->local_ep; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 8a4042f0f6bf..cae134d35774 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -17,7 +17,6 @@ #define _RMNET_VND_H_ int rmnet_vnd_do_flow_control(struct net_device *dev, int enable); -struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev); int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, struct net_device *real_dev); From 5451237ff7a77ded1d81538e3daa76dc3ee60538 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:56 -0600 Subject: [PATCH 0850/2177] net: qualcomm: rmnet: Remove duplicate setting of rmnet_devices The rmnet_devices information is already stored in muxed_ep, so storing this in rmnet_devices[] again is redundant. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 1 - drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index c5f5c6d957c0..123ccf41fc95 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -33,7 +33,6 @@ struct rmnet_port { struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP]; u32 ingress_data_format; u32 egress_data_format; - struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP]; u8 nr_rmnet_devs; u8 rmnet_mode; }; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 4ca59a4389b8..8b8497b17f52 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -105,12 +105,12 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_priv *priv; int rc; - if (port->rmnet_devices[id]) + if (port->muxed_ep[id].egress_dev) return -EINVAL; rc = register_netdevice(rmnet_dev); if (!rc) { - port->rmnet_devices[id] = rmnet_dev; + port->muxed_ep[id].egress_dev = rmnet_dev; port->nr_rmnet_devs++; rmnet_dev->rtnl_link_ops = &rmnet_link_ops; @@ -127,10 +127,10 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, int rmnet_vnd_dellink(u8 id, struct rmnet_port *port) { - if (id >= RMNET_MAX_LOGICAL_EP || !port->rmnet_devices[id]) + if (id >= RMNET_MAX_LOGICAL_EP || !port->muxed_ep[id].egress_dev) return -EINVAL; - port->rmnet_devices[id] = NULL; + port->muxed_ep[id].egress_dev = NULL; port->nr_rmnet_devs--; return 0; } From 3352e6c45760fd6675468a35ef699ab94617cab4 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:57 -0600 Subject: [PATCH 0851/2177] net: qualcomm: rmnet: Convert the muxed endpoint to hlist Rather than using a static array, use a hlist to store the muxed endpoints and use the mux id to query the rmnet_device. This is useful as usually very few mux ids are used. Signed-off-by: Subash Abhinov Kasiviswanathan Cc: Dan Williams Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.c | 75 ++++++++++--------- .../ethernet/qualcomm/rmnet/rmnet_config.h | 4 +- .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 17 +++-- .../qualcomm/rmnet/rmnet_map_command.c | 4 +- .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 15 ++-- .../net/ethernet/qualcomm/rmnet/rmnet_vnd.h | 6 +- 6 files changed, 68 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 96058bbccf71..b5fe3f4d22a6 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -61,18 +61,6 @@ rmnet_get_port_rtnl(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); } -static struct rmnet_endpoint* -rmnet_get_endpoint(struct net_device *dev, int config_id) -{ - struct rmnet_endpoint *ep; - struct rmnet_port *port; - - port = rmnet_get_port_rtnl(dev); - ep = &port->muxed_ep[config_id]; - - return ep; -} - static int rmnet_unregister_real_device(struct net_device *real_dev, struct rmnet_port *port) { @@ -93,7 +81,7 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, static int rmnet_register_real_device(struct net_device *real_dev) { struct rmnet_port *port; - int rc; + int rc, entry; ASSERT_RTNL(); @@ -114,26 +102,13 @@ static int rmnet_register_real_device(struct net_device *real_dev) /* hold on to real dev for MAP data */ dev_hold(real_dev); + for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) + INIT_HLIST_HEAD(&port->muxed_ep[entry]); + netdev_dbg(real_dev, "registered with rmnet\n"); return 0; } -static void rmnet_set_endpoint_config(struct net_device *dev, - u8 mux_id, struct net_device *egress_dev) -{ - struct rmnet_endpoint *ep; - - netdev_dbg(dev, "id %d dev %s\n", mux_id, egress_dev->name); - - ep = rmnet_get_endpoint(dev, mux_id); - /* This config is cleared on every set, so its ok to not - * clear it on a device delete. - */ - memset(ep, 0, sizeof(struct rmnet_endpoint)); - ep->egress_dev = egress_dev; - ep->mux_id = mux_id; -} - static int rmnet_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -145,6 +120,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, RMNET_EGRESS_FORMAT_MAP; struct net_device *real_dev; int mode = RMNET_EPMODE_VND; + struct rmnet_endpoint *ep; struct rmnet_port *port; int err = 0; u16 mux_id; @@ -156,6 +132,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (!data[IFLA_VLAN_ID]) return -EINVAL; + ep = kzalloc(sizeof(*ep), GFP_ATOMIC); + if (!ep) + return -ENOMEM; + mux_id = nla_get_u16(data[IFLA_VLAN_ID]); err = rmnet_register_real_device(real_dev); @@ -163,7 +143,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, goto err0; port = rmnet_get_port_rtnl(real_dev); - err = rmnet_vnd_newlink(mux_id, dev, port, real_dev); + err = rmnet_vnd_newlink(mux_id, dev, port, real_dev, ep); if (err) goto err1; @@ -177,11 +157,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, port->ingress_data_format = ingress_format; port->rmnet_mode = mode; - rmnet_set_endpoint_config(real_dev, mux_id, dev); + hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); return 0; err2: - rmnet_vnd_dellink(mux_id, port); + rmnet_vnd_dellink(mux_id, port, ep); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -191,6 +171,7 @@ err0: static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct net_device *real_dev; + struct rmnet_endpoint *ep; struct rmnet_port *port; u8 mux_id; @@ -204,8 +185,15 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) port = rmnet_get_port_rtnl(real_dev); mux_id = rmnet_vnd_get_mux(dev); - rmnet_vnd_dellink(mux_id, port); netdev_upper_dev_unlink(dev, real_dev); + + ep = rmnet_get_endpoint(port, mux_id); + if (ep) { + hlist_del_init_rcu(&ep->hlnode); + rmnet_vnd_dellink(mux_id, port, ep); + kfree(ep); + } + rmnet_unregister_real_device(real_dev, port); unregister_netdevice_queue(dev, head); @@ -214,11 +202,16 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data) { struct rmnet_walk_data *d = data; + struct rmnet_endpoint *ep; u8 mux_id; mux_id = rmnet_vnd_get_mux(rmnet_dev); - - rmnet_vnd_dellink(mux_id, d->port); + ep = rmnet_get_endpoint(d->port, mux_id); + if (ep) { + hlist_del_init_rcu(&ep->hlnode); + rmnet_vnd_dellink(mux_id, d->port, ep); + kfree(ep); + } netdev_upper_dev_unlink(rmnet_dev, d->real_dev); unregister_netdevice_queue(rmnet_dev, d->head); @@ -316,6 +309,18 @@ struct rmnet_port *rmnet_get_port(struct net_device *real_dev) return NULL; } +struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id) +{ + struct rmnet_endpoint *ep; + + hlist_for_each_entry_rcu(ep, &port->muxed_ep[mux_id], hlnode) { + if (ep->mux_id == mux_id) + return ep; + } + + return NULL; +} + /* Startup/Shutdown */ static int __init rmnet_init(void) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 123ccf41fc95..8849986980f8 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -23,6 +23,7 @@ struct rmnet_endpoint { u8 mux_id; struct net_device *egress_dev; + struct hlist_node hlnode; }; /* One instance of this structure is instantiated for each real_dev associated @@ -30,11 +31,11 @@ struct rmnet_endpoint { */ struct rmnet_port { struct net_device *dev; - struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP]; u32 ingress_data_format; u32 egress_data_format; u8 nr_rmnet_devs; u8 rmnet_mode; + struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; }; extern struct rtnl_link_ops rmnet_link_ops; @@ -45,5 +46,6 @@ struct rmnet_priv { }; struct rmnet_port *rmnet_get_port(struct net_device *real_dev); +struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); #endif /* _RMNET_CONFIG_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index e0802d32d6a5..fa24ffb69713 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -71,19 +71,18 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, & RMNET_INGRESS_FORMAT_MAP_COMMANDS) return rmnet_map_command(skb, port); - kfree_skb(skb); - return RX_HANDLER_CONSUMED; + goto free_skb; } mux_id = RMNET_MAP_GET_MUX_ID(skb); len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb); - if (mux_id >= RMNET_MAX_LOGICAL_EP) { - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } + if (mux_id >= RMNET_MAX_LOGICAL_EP) + goto free_skb; - ep = &port->muxed_ep[mux_id]; + ep = rmnet_get_endpoint(port, mux_id); + if (!ep) + goto free_skb; if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING) skb->dev = ep->egress_dev; @@ -93,6 +92,10 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, skb_trim(skb, len); rmnet_set_skb_proto(skb); return rmnet_deliver_skb(skb); + +free_skb: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; } static rx_handler_result_t diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c index d1ea5e21b982..74d362f71cce 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -17,7 +17,7 @@ #include "rmnet_vnd.h" static u8 rmnet_map_do_flow_control(struct sk_buff *skb, - struct rmnet_port *rdinfo, + struct rmnet_port *port, int enable) { struct rmnet_map_control_command *cmd; @@ -37,7 +37,7 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb, return RX_HANDLER_CONSUMED; } - ep = &rdinfo->muxed_ep[mux_id]; + ep = rmnet_get_endpoint(port, mux_id); vnd = ep->egress_dev; ip_family = cmd->flow_control.ip_family; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 8b8497b17f52..1b6747d7154f 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -100,17 +100,19 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev) int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, - struct net_device *real_dev) + struct net_device *real_dev, + struct rmnet_endpoint *ep) { struct rmnet_priv *priv; int rc; - if (port->muxed_ep[id].egress_dev) + if (ep->egress_dev) return -EINVAL; rc = register_netdevice(rmnet_dev); if (!rc) { - port->muxed_ep[id].egress_dev = rmnet_dev; + ep->egress_dev = rmnet_dev; + ep->mux_id = id; port->nr_rmnet_devs++; rmnet_dev->rtnl_link_ops = &rmnet_link_ops; @@ -125,12 +127,13 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, return rc; } -int rmnet_vnd_dellink(u8 id, struct rmnet_port *port) +int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, + struct rmnet_endpoint *ep) { - if (id >= RMNET_MAX_LOGICAL_EP || !port->muxed_ep[id].egress_dev) + if (id >= RMNET_MAX_LOGICAL_EP || !ep->egress_dev) return -EINVAL; - port->muxed_ep[id].egress_dev = NULL; + ep->egress_dev = NULL; port->nr_rmnet_devs--; return 0; } diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index cae134d35774..71e4c3286951 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -19,8 +19,10 @@ int rmnet_vnd_do_flow_control(struct net_device *dev, int enable); int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, - struct net_device *real_dev); -int rmnet_vnd_dellink(u8 id, struct rmnet_port *port); + struct net_device *real_dev, + struct rmnet_endpoint *ep); +int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, + struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); From 60d58f971c1077a0f2467b2d5bc38058df43a819 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 11 Oct 2017 18:43:58 -0600 Subject: [PATCH 0852/2177] net: qualcomm: rmnet: Implement bridge mode Add support to bridge two devices which can send multiplexing and aggregation (MAP) data. This is done only when the data itself is not going to be consumed in the stack but is being passed on to a different endpoint. This is mainly used for testing. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.c | 93 ++++++++++++++++++- .../ethernet/qualcomm/rmnet/rmnet_config.h | 7 +- .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 26 +++++- .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 + 4 files changed, 122 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index b5fe3f4d22a6..71bee1af71ef 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -109,6 +109,36 @@ static int rmnet_register_real_device(struct net_device *real_dev) return 0; } +static void rmnet_unregister_bridge(struct net_device *dev, + struct rmnet_port *port) +{ + struct net_device *rmnet_dev, *bridge_dev; + struct rmnet_port *bridge_port; + + if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) + return; + + /* bridge slave handling */ + if (!port->nr_rmnet_devs) { + rmnet_dev = netdev_master_upper_dev_get_rcu(dev); + netdev_upper_dev_unlink(dev, rmnet_dev); + + bridge_dev = port->bridge_ep; + + bridge_port = rmnet_get_port_rtnl(bridge_dev); + bridge_port->bridge_ep = NULL; + bridge_port->rmnet_mode = RMNET_EPMODE_VND; + } else { + bridge_dev = port->bridge_ep; + + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev); + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + + rmnet_unregister_real_device(bridge_dev, bridge_port); + } +} + static int rmnet_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -190,10 +220,10 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) ep = rmnet_get_endpoint(port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); + rmnet_unregister_bridge(dev, port); rmnet_vnd_dellink(mux_id, port, ep); kfree(ep); } - rmnet_unregister_real_device(real_dev, port); unregister_netdevice_queue(dev, head); @@ -237,6 +267,8 @@ static void rmnet_force_unassociate_device(struct net_device *dev) d.port = port; rcu_read_lock(); + rmnet_unregister_bridge(dev, port); + netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d); rcu_read_unlock(); unregister_netdevice_many(&list); @@ -321,6 +353,65 @@ struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id) return NULL; } +int rmnet_add_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev, + struct netlink_ext_ack *extack) +{ + struct rmnet_priv *priv = netdev_priv(rmnet_dev); + struct net_device *real_dev = priv->real_dev; + struct rmnet_port *port, *slave_port; + int err; + + port = rmnet_get_port(real_dev); + + /* If there is more than one rmnet dev attached, its probably being + * used for muxing. Skip the briding in that case + */ + if (port->nr_rmnet_devs > 1) + return -EINVAL; + + if (rmnet_is_real_dev_registered(slave_dev)) + return -EBUSY; + + err = rmnet_register_real_device(slave_dev); + if (err) + return -EBUSY; + + err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) + return -EINVAL; + + slave_port = rmnet_get_port(slave_dev); + slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; + slave_port->bridge_ep = real_dev; + + port->rmnet_mode = RMNET_EPMODE_BRIDGE; + port->bridge_ep = slave_dev; + + netdev_dbg(slave_dev, "registered with rmnet as slave\n"); + return 0; +} + +int rmnet_del_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev) +{ + struct rmnet_priv *priv = netdev_priv(rmnet_dev); + struct net_device *real_dev = priv->real_dev; + struct rmnet_port *port, *slave_port; + + port = rmnet_get_port(real_dev); + port->rmnet_mode = RMNET_EPMODE_VND; + port->bridge_ep = NULL; + + netdev_upper_dev_unlink(slave_dev, rmnet_dev); + slave_port = rmnet_get_port(slave_dev); + rmnet_unregister_real_device(slave_dev, slave_port); + + netdev_dbg(slave_dev, "removed from rmnet as slave\n"); + return 0; +} + /* Startup/Shutdown */ static int __init rmnet_init(void) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 8849986980f8..60115e69e415 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -36,6 +36,7 @@ struct rmnet_port { u8 nr_rmnet_devs; u8 rmnet_mode; struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; + struct net_device *bridge_ep; }; extern struct rtnl_link_ops rmnet_link_ops; @@ -47,5 +48,9 @@ struct rmnet_priv { struct rmnet_port *rmnet_get_port(struct net_device *real_dev); struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); - +int rmnet_add_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev, + struct netlink_ext_ack *extack); +int rmnet_del_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev); #endif /* _RMNET_CONFIG_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index fa24ffb69713..df3d2d16ce55 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -149,6 +149,17 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, return RMNET_MAP_SUCCESS; } +static rx_handler_result_t +rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) +{ + if (bridge_dev) { + skb->dev = bridge_dev; + dev_queue_xmit(skb); + } + + return RX_HANDLER_CONSUMED; +} + /* Ingress / Egress Entry Points */ /* Processes packet as per ingress data format for receiving device. Logical @@ -157,10 +168,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, */ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) { - struct rmnet_port *port; + int rc = RX_HANDLER_CONSUMED; struct sk_buff *skb = *pskb; + struct rmnet_port *port; struct net_device *dev; - int rc; if (!skb) return RX_HANDLER_CONSUMED; @@ -168,8 +179,15 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port(dev); - if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) - rc = rmnet_map_ingress_handler(skb, port); + switch (port->rmnet_mode) { + case RMNET_EPMODE_VND: + if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) + rc = rmnet_map_ingress_handler(skb, port); + break; + case RMNET_EPMODE_BRIDGE: + rc = rmnet_bridge_handler(skb, port->bridge_ep); + break; + } return rc; } diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 1b6747d7154f..12bd0bbd5235 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -74,6 +74,8 @@ static const struct net_device_ops rmnet_vnd_ops = { .ndo_start_xmit = rmnet_vnd_start_xmit, .ndo_change_mtu = rmnet_vnd_change_mtu, .ndo_get_iflink = rmnet_vnd_get_iflink, + .ndo_add_slave = rmnet_add_bridge, + .ndo_del_slave = rmnet_del_bridge, }; /* Called by kernel whenever a new rmnet device is created. Sets MTU, From 843e79d05addd8eb06992cd6dfafc7b9d53f2bc8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:07 +0200 Subject: [PATCH 0853/2177] net: sched: make tc_action_ops->get_dev return dev and avoid passing net Return dev directly, NULL if not possible. That is enough. Makes no sense to pass struct net * to get_dev op, as there is only one net possible, the one the action was created in. So just store it in mirred priv and use directly. Rename the mirred op callback function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 3 +-- include/net/tc_act/tc_mirred.h | 1 + net/sched/act_mirred.c | 13 +++++-------- net/sched/cls_api.c | 6 ++---- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index b944e0eb93be..900168a9901e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -93,8 +93,7 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); - int (*get_dev)(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev); + struct net_device *(*get_dev)(const struct tc_action *a); }; struct tc_action_net { diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 604bc31e23ab..21a656569840 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_ifindex; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + struct net *net; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 416627c66f08..8b3e59388480 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, m->tcfm_eaction = parm->eaction; if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; + m->net = net; if (ret != ACT_P_CREATED) dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); @@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static int tcf_mirred_device(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev) +static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) { - int ifindex = tcf_mirred_ifindex(a); + struct tcf_mirred *m = to_mirred(a); - *mirred_dev = __dev_get_by_index(net, ifindex); - if (!*mirred_dev) - return -EINVAL; - return 0; + return __dev_get_by_index(m->net, m->tcfm_ifindex); } static struct tc_action_ops act_mirred_ops = { @@ -336,7 +333,7 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .size = sizeof(struct tcf_mirred), - .get_dev = tcf_mirred_device, + .get_dev = tcf_mirred_get_dev, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b2219adf520..450873b0c4b9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1016,10 +1016,8 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - if (a->ops->get_dev) { - a->ops->get_dev(a, dev_net(dev), hw_dev); - break; - } + if (a->ops->get_dev) + *hw_dev = a->ops->get_dev(a); } if (*hw_dev) return 0; From b3f55bdda8df55a563005e00b1b71212d8546541 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:08 +0200 Subject: [PATCH 0854/2177] net: sched: introduce per-egress action device callbacks Introduce infrastructure that allows drivers to register callbacks that are called whenever tc would offload inserted rule and specified device acts as tc action egress device. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 34 +++++++ include/net/pkt_cls.h | 2 + net/sched/act_api.c | 220 ++++++++++++++++++++++++++++++++++++++++++ net/sched/cls_api.c | 30 ++++++ 4 files changed, 286 insertions(+) diff --git a/include/net/act_api.h b/include/net/act_api.h index 900168a9901e..f5e8c9048fb0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #endif } +typedef int tc_setup_cb_t(enum tc_setup_type type, + void *type_data, void *cb_priv); + +#ifdef CONFIG_NET_CLS_ACT +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv); +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop); +#else +static inline +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ +} + +static inline +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + return 0; +} +#endif + #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..6f8149c82571 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, struct net_device **hw_dev); +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop); /** * struct tcf_pkt_info - packet information diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da6fa82c98a8..ac97db92ab68 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -1249,8 +1251,226 @@ out_module_put: return skb->len; } +struct tcf_action_net { + struct rhashtable egdev_ht; +}; + +static unsigned int tcf_action_net_id; + +struct tcf_action_egdev_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_priv; +}; + +struct tcf_action_egdev { + struct rhash_head ht_node; + const struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; +}; + +static const struct rhashtable_params tcf_action_egdev_ht_params = { + .key_offset = offsetof(struct tcf_action_egdev, dev), + .head_offset = offsetof(struct tcf_action_egdev, ht_node), + .key_len = sizeof(const struct net_device *), +}; + +static struct tcf_action_egdev * +tcf_action_egdev_lookup(const struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_lookup_fast(&tan->egdev_ht, &dev, + tcf_action_egdev_ht_params); +} + +static struct tcf_action_egdev * +tcf_action_egdev_get(const struct net_device *dev) +{ + struct tcf_action_egdev *egdev; + struct tcf_action_net *tan; + + egdev = tcf_action_egdev_lookup(dev); + if (egdev) + goto inc_ref; + + egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); + if (!egdev) + return NULL; + INIT_LIST_HEAD(&egdev->cb_list); + tan = net_generic(dev_net(dev), tcf_action_net_id); + rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + +inc_ref: + egdev->refcnt++; + return egdev; +} + +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) +{ + struct tcf_action_net *tan; + + if (--egdev->refcnt) + return; + tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); + rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + kfree(egdev); +} + +static struct tcf_action_egdev_cb * +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) + if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) + return egdev_cb; + return NULL; +} + +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, + enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_action_egdev_cb *egdev_cb; + int ok_count = 0; + int err; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) { + err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(egdev_cb)) + return -EEXIST; + egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); + if (!egdev_cb) + return -ENOMEM; + egdev_cb->cb = cb; + egdev_cb->cb_priv = cb_priv; + list_add(&egdev_cb->list, &egdev->cb_list); + return 0; +} + +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(!egdev_cb)) + return; + list_del(&egdev_cb->list); + kfree(egdev_cb); +} + +static int __tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); + int err; + + if (!egdev) + return -ENOMEM; + err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); + if (err) + goto err_cb_add; + return 0; + +err_cb_add: + tcf_action_egdev_put(egdev); + return err; +} +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + int err; + + rtnl_lock(); + err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); + +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (WARN_ON(!egdev)) + return; + tcf_action_egdev_cb_del(egdev, cb, cb_priv); + tcf_action_egdev_put(egdev); +} +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + rtnl_lock(); + __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); + +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (!egdev) + return 0; + return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); + +static __net_init int tcf_action_net_init(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); +} + +static void __net_exit tcf_action_net_exit(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + rhashtable_destroy(&tan->egdev_ht); +} + +static struct pernet_operations tcf_action_net_ops = { + .init = tcf_action_net_init, + .exit = tcf_action_net_exit, + .id = &tcf_action_net_id, + .size = sizeof(struct tcf_action_net), +}; + static int __init tc_action_init(void) { + int err; + + err = register_pernet_subsys(&tcf_action_net_ops); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 450873b0c4b9..99f9432f63cf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, } EXPORT_SYMBOL(tcf_exts_get_dev); +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop) +{ + int ok_count = 0; +#ifdef CONFIG_NET_CLS_ACT + const struct tc_action *a; + struct net_device *dev; + LIST_HEAD(actions); + int ret; + + if (!tcf_exts_has_actions(exts)) + return 0; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!a->ops->get_dev) + continue; + dev = a->ops->get_dev(a); + if (!dev || !tc_can_offload(dev)) + continue; + ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; + } +#endif + return ok_count; +} +EXPORT_SYMBOL(tcf_exts_egdev_cb_call); + static int __init tc_filter_init(void) { rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); From 717503b9cf57c0bb7ea4d3a9f5699c9a04adf988 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:09 +0200 Subject: [PATCH 0855/2177] net: sched: convert cls_flower->egress_dev users to tc_setup_cb_egdev infra The only user of cls_flower->egress_dev is mlx5. So do the conversion there alongside with the code originating the call in cls_flower function fl_hw_replace_filter to the newly introduced egress device callback infrastucture. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 31 ++++++--- include/net/pkt_cls.h | 5 +- net/sched/cls_api.c | 13 +++- net/sched/cls_flower.c | 63 ++++++++++--------- 6 files changed, 73 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc13d3dbd366..5ec6d3e8dc89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1081,6 +1081,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cc11bbbd0309..2a32102e7648 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3108,8 +3108,8 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev, } #endif -static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e03c427faf..765fc74fbb1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -667,14 +668,6 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, cls_flower->common.chain_index) return -EOPNOTSUPP; - if (cls_flower->egress_dev) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - dev = mlx5_eswitch_get_uplink_netdev(esw); - return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - cls_flower); - } - switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: return mlx5e_configure_flower(priv, cls_flower); @@ -698,6 +691,14 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct net_device *dev = cb_priv; + + return mlx5e_setup_tc(dev, type, type_data); +} + bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1017,15 +1018,24 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } + err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); + if (err) + goto err_neigh_cleanup; + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_egdev_cleanup; } return 0; +err_egdev_cleanup: + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); + err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1047,7 +1057,8 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) void *ppriv = priv->ppriv; unregister_netdev(rep->netdev); - + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); mlx5e_destroy_netdev(priv); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6f8149c82571..c0bdf5cad727 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -206,8 +206,6 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, struct net_device **hw_dev); -int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop); /** * struct tcf_pkt_info - packet information @@ -407,6 +405,9 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop); + struct tc_cls_common_offload { u32 chain_index; __be16 protocol; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 99f9432f63cf..51994a202585 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1026,8 +1026,9 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, } EXPORT_SYMBOL(tcf_exts_get_dev); -int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop) +static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, + enum tc_setup_type type, + void *type_data, bool err_stop) { int ok_count = 0; #ifdef CONFIG_NET_CLS_ACT @@ -1054,7 +1055,13 @@ int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, #endif return ok_count; } -EXPORT_SYMBOL(tcf_exts_egdev_cb_call); + +int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, + void *type_data, bool err_stop) +{ + return tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); +} +EXPORT_SYMBOL(tc_setup_cb_call); static int __init tc_filter_init(void) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index db831ac708f6..5b7bb968d1d4 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -88,7 +88,6 @@ struct cls_fl_filter { u32 handle; u32 flags; struct rcu_head rcu; - struct net_device *hw_dev; }; static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) @@ -201,16 +200,17 @@ static void fl_destroy_filter(struct rcu_head *head) static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; - struct net_device *dev = f->hw_dev; - - if (!tc_can_offload(dev)) - return; + struct net_device *dev = tp->q->dev_queue->dev; tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); + if (tc_can_offload(dev)) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + &cls_flower); + tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -220,20 +220,9 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload cls_flower = {}; + bool skip_sw = tc_skip_sw(f->flags); int err; - if (!tc_can_offload(dev)) { - if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) || - (f->hw_dev && !tc_can_offload(f->hw_dev))) { - f->hw_dev = dev; - return tc_skip_sw(f->flags) ? -EINVAL : 0; - } - dev = f->hw_dev; - cls_flower.egress_dev = true; - } else { - f->hw_dev = dev; - } - tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; @@ -242,31 +231,47 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - &cls_flower); - if (!err) - f->flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_can_offload(dev)) { + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + &cls_flower); + if (err) { + if (skip_sw) + return err; + } else { + f->flags |= TCA_CLS_FLAGS_IN_HW; + } + } - if (tc_skip_sw(f->flags)) + err = tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, skip_sw); + if (err < 0) { + fl_hw_destroy_filter(tp, f); return err; + } else if (err > 0) { + f->flags |= TCA_CLS_FLAGS_IN_HW; + } + + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; - struct net_device *dev = f->hw_dev; - - if (!tc_can_offload(dev)) - return; + struct net_device *dev = tp->q->dev_queue->dev; tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - &cls_flower); + if (tc_can_offload(dev)) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + &cls_flower); + tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) From 7578d7b45ed870b13a8ace57e32feaed623c2a94 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 11 Oct 2017 09:41:10 +0200 Subject: [PATCH 0856/2177] net: sched: remove unused tcf_exts_get_dev helper and cls_flower->egress_dev The helper and the struct field ares no longer used by any code, so remove them. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 --- net/sched/cls_api.c | 22 ---------------------- 2 files changed, 25 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c0bdf5cad727..f5263743076b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -204,8 +204,6 @@ void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information @@ -517,7 +515,6 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; - bool egress_dev; }; enum tc_matchall_command { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 51994a202585..2977b8a90851 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1004,28 +1004,6 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev) -{ -#ifdef CONFIG_NET_CLS_ACT - const struct tc_action *a; - LIST_HEAD(actions); - - if (!tcf_exts_has_actions(exts)) - return -EINVAL; - - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - if (a->ops->get_dev) - *hw_dev = a->ops->get_dev(a); - } - if (*hw_dev) - return 0; -#endif - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(tcf_exts_get_dev); - static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop) From 4ea2607f7871ca433ab0c5300289215974213f26 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:28:00 +0200 Subject: [PATCH 0857/2177] ipv6: addrconf: don't use rtnl mutex in RTM_GETNETCONF Instead of relying on rtnl mutex bump device reference count. After this change, values reported can change in parallel, but thats not much different from current state, as anyone can change the settings right after rtnl_unlock (and before userspace processed reply). While at it, switch to GFP_KERNEL allocation. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d9f6226694eb..5207f567ef28 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; + struct inet6_dev *in6_dev = NULL; + struct net_device *dev = NULL; struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; - struct inet6_dev *in6_dev; - struct net_device *dev; int ifindex; int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, devconf_ipv6_policy, extack); if (err < 0) - goto errout; + return err; + + if (!tb[NETCONFA_IFINDEX]) + return -EINVAL; err = -EINVAL; - if (!tb[NETCONFA_IFINDEX]) - goto errout; - ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); switch (ifindex) { case NETCONFA_IFINDEX_ALL: @@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, devconf = net->ipv6.devconf_dflt; break; default: - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); if (!dev) - goto errout; - in6_dev = __in6_dev_get(dev); + return -EINVAL; + in6_dev = in6_dev_get(dev); if (!in6_dev) goto errout; devconf = &in6_dev->cnf; @@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, } err = -ENOBUFS; - skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC); + skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); if (!skb) goto errout; @@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, } err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: + if (in6_dev) + in6_dev_put(in6_dev); + if (dev) + dev_put(dev); return err; } @@ -6570,7 +6574,7 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, 0); __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, 0); + inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED); ipv6_addr_label_rtnl_register(); From c24675f871d3bca569532b3744962dd49938ddff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:28:01 +0200 Subject: [PATCH 0858/2177] ipv6: addrconf: don't use rtnl mutex in RTM_GETADDR Similar to the previous patch, use the device lookup functions that bump device refcount and flag this as DOIT_UNLOCKED to avoid rtnl mutex. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5207f567ef28..4603aa488f4f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4890,17 +4890,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, extack); if (err < 0) - goto errout; + return err; addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) { - err = -EINVAL; - goto errout; - } + if (!addr) + return -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(net, ifm->ifa_index); + dev = dev_get_by_index(net, ifm->ifa_index); ifa = ipv6_get_ifaddr(net, addr, dev, 1); if (!ifa) { @@ -4926,6 +4924,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: + if (dev) + dev_put(dev); return err; } @@ -6568,7 +6568,7 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, - inet6_dump_ifaddr, 0); + inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED); __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr, 0); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, From 1a37b770cf78dca77c386f8a95e05403233e4d52 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Oct 2017 11:17:57 +0100 Subject: [PATCH 0859/2177] sctp: make array sctp_sched_ops static The array sctp_sched_ops is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'sctp_sched_ops' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 03513a9fa110..0b83ec51e43b 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -124,7 +124,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { extern struct sctp_sched_ops sctp_sched_prio; extern struct sctp_sched_ops sctp_sched_rr; -struct sctp_sched_ops *sctp_sched_ops[] = { +static struct sctp_sched_ops *sctp_sched_ops[] = { &sctp_sched_fcfs, &sctp_sched_prio, &sctp_sched_rr, From 14c68c43b7695cbcfe148bf3c513b598c2886e7d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Oct 2017 10:53:28 +0100 Subject: [PATCH 0860/2177] net: mpls: make function ipgre_mpls_encap_hlen static The function ipgre_mpls_encap_hlen is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'ipgre_mpls_encap_hlen' was not declared. Should it be static? Fixes: bdc476413dcdb ("ip_tunnel: add mpls over gre support") Signed-off-by: Colin Ian King Acked-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 9745e8f69810..8ca9915befc8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -41,7 +41,7 @@ static int label_limit = (1 << 20) - 1; static int ttl_max = 255; #if IS_ENABLED(CONFIG_NET_IP_TUNNEL) -size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) +static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) { return sizeof(struct mpls_shim_hdr); } From 5dad61b83840d7eceaba5bf316419be11bbb993a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 11 Oct 2017 13:17:25 +0300 Subject: [PATCH 0861/2177] net/mlx4_en: Replace netdev parameter with priv in XDP xmit function The struct net_device parameter was passed only to extract struct mlx4_en_priv out of it. Here we pass the priv parameter directly. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 +-- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a7866954d106..92aec17f4b4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (likely(!mlx4_en_xmit_frame(ring, frags, dev, + if (likely(!mlx4_en_xmit_frame(ring, frags, priv, length, cq_ring, &doorbell_pending))) { frags[0].page = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 2cc82dc07397..f16774c9c347 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -1087,10 +1087,9 @@ tx_drop: netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending) { - struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_info *tx_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 245e9ea09ab2..8cad9b4f1936 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, From f6f0aa97413a420606aabe8142f8a5c0f15b9246 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 11 Oct 2017 13:17:26 +0300 Subject: [PATCH 0862/2177] net/mlx4_en: Obsolete call to generic write_desc in XDP xmit flow Function mlx4_en_tx_write_desc() is not optimized to use of XDP xmit. Use the relevant parts inline instead. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f16774c9c347..ac7254e3f909 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -1090,7 +1090,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending) { - union mlx4_wqe_qpn_vlan qpn_vlan = {}; + union mlx4_wqe_qpn_vlan qpn_vlan = { + .fence_size = MLX4_EN_XDP_TX_REAL_SZ, + }; struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_info *tx_info; struct mlx4_wqe_data_seg *data; @@ -1140,7 +1142,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, data->byte_count = cpu_to_be32(length); /* tx completion can avoid cache line miss for common cases */ - tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; op_own = cpu_to_be32(MLX4_OPCODE_SEND) | ((ring->prod & ring->size) ? @@ -1151,10 +1152,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ring->prod += MLX4_EN_XDP_TX_NRTXBB; - qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + tx_desc->ctrl.qpn_vlan = qpn_vlan; + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + ring->xmit_more++; - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, - op_own, false, false); *doorbell_pending = true; return NETDEV_TX_OK; From f025fd6061e120713d6c11c92983804c6805d6cb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 11 Oct 2017 13:17:27 +0300 Subject: [PATCH 0863/2177] net/mlx4_en: XDP_TX, assign constant values of TX descs on ring creaion In XDP_TX, some fields in tx_info and tx_desc are constants across all entries of the different XDP_TX rings. Assign values to these fields on ring creation time, rather than in data-path. Patchset performance tests: Tested on ConnectX3Pro, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Single queue no-RSS optimization ON. XDP_TX packet rate: ------------------------------ Before | After | Gain | 13.7 Mpps | 14.0 Mpps | %2.2 | ------------------------------ Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_tx.c | 38 ++++++++++++------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 + 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e4c7a80ef5a8..d611df2f274d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_arm_cq(priv, cq); } else { + mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring); mlx4_en_init_recycle_ring(priv, i); /* XDP TX CQ should never be armed */ } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index ac7254e3f909..596445a4a241 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -1085,14 +1085,35 @@ tx_drop: #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ / 16) & 0x3f) +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + int i; + + for (i = 0; i < ring->size; i++) { + struct mlx4_en_tx_info *tx_info = &ring->tx_info[i]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + + (i << LOG_TXBB_SIZE); + + tx_info->map0_byte_count = PAGE_SIZE; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + tx_desc->data.lkey = ring->mr_key; + tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + } +} + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending) { - union mlx4_wqe_qpn_vlan qpn_vlan = { - .fence_size = MLX4_EN_XDP_TX_REAL_SZ, - }; struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_info *tx_info; struct mlx4_wqe_data_seg *data; @@ -1124,20 +1145,12 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_info->page = frame->page; frame->page = NULL; tx_info->map0_dma = dma; - tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); - tx_info->ts_requested = 0; - tx_info->nr_maps = 1; - tx_info->linear = 1; - tx_info->inl = 0; dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, length, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma + frame->page_offset); - data->lkey = ring->mr_key; dma_wmb(); data->byte_count = cpu_to_be32(length); @@ -1152,9 +1165,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ring->prod += MLX4_EN_XDP_TX_NRTXBB; - tx_desc->ctrl.qpn_vlan = qpn_vlan; - tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; - /* Ensure new descriptor hits memory * before setting ownership of this descriptor to HW */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 8cad9b4f1936..1856e279a7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, int node, int queue_index); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); From 952925dec0f276b407b2abce4aee82cba7c700c3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Oct 2017 11:56:23 +0100 Subject: [PATCH 0864/2177] bpf: remove redundant variable old_flags Variable old_flags is being assigned but is never read; it is redundant and can be removed. Cleans up clang warning: Value stored to 'old_flags' is never read Signed-off-by: Colin Ian King Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/cgroup.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index e88abc0865d5..3db5a17fcfe8 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -192,7 +192,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, struct cgroup_subsys_state *css; struct bpf_prog_list *pl; bool pl_was_allocated; - u32 old_flags; int err; if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) @@ -239,7 +238,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = prog; } - old_flags = cgrp->bpf.flags[type]; cgrp->bpf.flags[type] = flags; /* allocate and recompute effective prog arrays */ From a67a4893f37d5389c812d85ccf1e258b87ef4ead Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Oct 2017 11:23:04 +0200 Subject: [PATCH 0865/2177] cfg80211: remove set but never used variable cf_offset Perhaps it had been intended to be used, but it clearly isn't. Signed-off-by: Johannes Berg --- net/wireless/chan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b8aa5a7d5c77..eb824270f6e3 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -464,7 +464,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan) { int width; - u32 cf_offset, freq; + u32 freq; if (chandef->chan->center_freq == chan->center_freq) return true; @@ -473,8 +473,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, if (width <= 20) return false; - cf_offset = width / 2 - 10; - for (freq = chandef->center_freq1 - width / 2 + 10; freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) From bad5680b5052f9d3008a8cfcc0b34ba0da08401d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 7 Oct 2017 14:21:22 +0200 Subject: [PATCH 0866/2177] batman-adv: Add missing kerneldoc for extack The parameter extack was added to batadv_softif_slave_add without adding the kernel-doc for it. This caused kernel-doc warnings. Signed-off-by: Sven Eckelmann Acked-by: David Ahern Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 543d2c3e0f0d..9f673cdfecf8 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -863,6 +863,7 @@ free_bat_counters: * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should become the slave interface + * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ From 437d2762ba07f0fc639d5a09acb323fe4106a61f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2017 20:45:40 -0700 Subject: [PATCH 0867/2177] tcp: remove obsolete helpers Remove three inline helpers that are no longer needed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 15163454174b..3b3b9b968e2d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1629,18 +1629,6 @@ static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) return skb_peek_tail(&sk->sk_write_queue); } -static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_next(&sk->sk_write_queue, skb); -} - -static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_prev(&sk->sk_write_queue, skb); -} - #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) @@ -1697,11 +1685,6 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb } } -static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) -{ - __skb_queue_head(&sk->sk_write_queue, skb); -} - /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, From 3f7832c26cc0cad2245981f777f3ee684399ce93 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 12 Oct 2017 12:42:04 -0500 Subject: [PATCH 0868/2177] Revert "net: qcom/emac: enforce DMA address restrictions" This reverts commit df1ec1b9d0df57e96011f175418dc95b1af46821. It turns out that memory allocated via dma_alloc_coherent is always aligned to the size of the buffer, so there's no way the RRD and RFD can ever be in separate 32-bit regions. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 39 +++++++------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 0f5ece5d9507..9cbb27263742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -734,11 +734,6 @@ static int emac_rx_descs_alloc(struct emac_adapter *adpt) rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4); rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4); - /* Check if the RRD and RFD are aligned properly, and if not, adjust. */ - if (upper_32_bits(ring_header->dma_addr) != - upper_32_bits(ring_header->dma_addr + ALIGN(rx_q->rrd.size, 8))) - ring_header->used = ALIGN(rx_q->rrd.size, 8); - rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used; rx_q->rrd.v_addr = ring_header->v_addr + ring_header->used; ring_header->used += ALIGN(rx_q->rrd.size, 8); @@ -772,18 +767,11 @@ int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment, * hence the additional padding bytes are allocated. - * - * Also double the memory allocated for the RRD so that we can - * re-align it if necessary. The EMAC has a restriction that the - * upper 32 bits of the base addresses for the RFD and RRD rings - * must be the same. It is extremely unlikely that this is not the - * case, since the rings are only a few KB in size. However, we - * need to check for this anyway, and if the two rings are not - * compliant, then we re-align. */ - ring_header->size = ALIGN(num_tx_descs * (adpt->tpd_size * 4), 8) + - ALIGN(num_rx_descs * (adpt->rfd_size * 4), 8) + - ALIGN(num_rx_descs * (adpt->rrd_size * 4), 8) * 2; + ring_header->size = num_tx_descs * (adpt->tpd_size * 4) + + num_rx_descs * (adpt->rfd_size * 4) + + num_rx_descs * (adpt->rrd_size * 4) + + 8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */ ring_header->used = 0; ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size, @@ -792,23 +780,26 @@ int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) if (!ring_header->v_addr) return -ENOMEM; + ring_header->used = ALIGN(ring_header->dma_addr, 8) - + ring_header->dma_addr; + + ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); + if (ret) { + netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n"); + goto err_alloc_tx; + } + ret = emac_rx_descs_alloc(adpt); if (ret) { netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n"); goto err_alloc_rx; } - ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); - if (ret) { - netdev_err(adpt->netdev, "transmit queue allocation failed\n"); - goto err_alloc_tx; - } - return 0; -err_alloc_tx: - emac_rx_q_bufs_free(adpt); err_alloc_rx: + emac_tx_q_bufs_free(adpt); +err_alloc_tx: dma_free_coherent(dev, ring_header->size, ring_header->v_addr, ring_header->dma_addr); From 60724d4bae14cd295b27b1610cad9a2720eb0860 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:48 -0700 Subject: [PATCH 0869/2177] net: dsa: Add support for DSA specific notifiers In preparation for communicating a given DSA network device's port number and switch index, create a specialized DSA notifier and two events: DSA_PORT_REGISTER and DSA_PORT_UNREGISTER that communicate: the slave network device (slave_dev), port number and switch number in the tree. This will be later used for network device drivers like bcmsysport which needs to cooperate with its DSA network devices to set-up queue mapping and scheduling. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/dsa/dsa.c | 23 +++++++++++++++++++++++ net/dsa/slave.c | 13 +++++++++++++ 3 files changed, 81 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 10dceccd9ce8..40a709a0754d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -471,4 +471,49 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) } #endif /* CONFIG_PM_SLEEP */ +enum dsa_notifier_type { + DSA_PORT_REGISTER, + DSA_PORT_UNREGISTER, +}; + +struct dsa_notifier_info { + struct net_device *dev; +}; + +struct dsa_notifier_register_info { + struct dsa_notifier_info info; /* must be first */ + struct net_device *master; + unsigned int port_number; + unsigned int switch_number; +}; + +static inline struct net_device * +dsa_notifier_info_to_dev(const struct dsa_notifier_info *info) +{ + return info->dev; +} + +#if IS_ENABLED(CONFIG_NET_DSA) +int register_dsa_notifier(struct notifier_block *nb); +int unregister_dsa_notifier(struct notifier_block *nb); +int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info); +#else +static inline int register_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_dsa_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info) +{ + return NOTIFY_DONE; +} +#endif + #endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 51ca2a524a27..832c659ff993 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -261,6 +262,28 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } +static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain); + +int register_dsa_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&dsa_notif_chain, nb); +} +EXPORT_SYMBOL_GPL(register_dsa_notifier); + +int unregister_dsa_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&dsa_notif_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_dsa_notifier); + +int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info) +{ + info->dev = dev; + return atomic_notifier_call_chain(&dsa_notif_chain, val, info); +} +EXPORT_SYMBOL_GPL(call_dsa_notifiers); + static int __init dsa_init_module(void) { int rc; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fb2954ff198c..45f4ea845c07 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1116,6 +1116,7 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port, const char *name) { + struct dsa_notifier_register_info rinfo = { }; struct dsa_switch *ds = port->ds; struct net_device *master; struct net_device *slave_dev; @@ -1177,6 +1178,12 @@ int dsa_slave_create(struct dsa_port *port, const char *name) goto out_free; } + rinfo.info.dev = slave_dev; + rinfo.master = master; + rinfo.port_number = p->dp->index; + rinfo.switch_number = p->dp->ds->index; + call_dsa_notifiers(DSA_PORT_REGISTER, slave_dev, &rinfo.info); + ret = register_netdev(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", @@ -1200,6 +1207,7 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + struct dsa_notifier_register_info rinfo = { }; struct device_node *port_dn; port_dn = p->dp->dn; @@ -1211,6 +1219,11 @@ void dsa_slave_destroy(struct net_device *slave_dev) if (of_phy_is_fixed_link(port_dn)) of_phy_deregister_fixed_link(port_dn); } + rinfo.info.dev = slave_dev; + rinfo.master = p->dp->cpu_dp->netdev; + rinfo.port_number = p->dp->index; + rinfo.switch_number = p->dp->ds->index; + call_dsa_notifiers(DSA_PORT_UNREGISTER, slave_dev, &rinfo.info); unregister_netdev(slave_dev); free_percpu(p->stats64); free_netdev(slave_dev); From 0a5f14ce67a6e093e651d3cd75e6ac281123d93a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:49 -0700 Subject: [PATCH 0870/2177] net: dsa: tag_brcm: Indicate to master netdevice port + queue We need to tell the DSA master network device doing the actual transmission what the desired switch port and queue number is for it to resolve that to the internal transmit queue it is mapped to. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ net/dsa/tag_brcm.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 40a709a0754d..ce1d622734d7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -516,4 +516,9 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, } #endif +/* Broadcom tag specific helpers to insert and extract queue/port number */ +#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q) +#define BRCM_TAG_GET_PORT(v) ((v) >> 8) +#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + #endif diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 8e4bdb9d9ae3..cc4f472fbd77 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -86,6 +86,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; + /* Now tell the master network device about the desired output queue + * as well + */ + skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(p->dp->index, + queue)); + return skb; } From d156576362c07e954dc36e07b0d7b0733a010f7d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:50 -0700 Subject: [PATCH 0871/2177] net: systemport: Establish lower/upper queue mapping Establish a queue mapping between the DSA slave network device queues created that correspond to switch port queues, and the transmit queue that SYSTEMPORT manages. We need to configure the SYSTEMPORT transmit queue with the switch port number and switch port queue number in order for the switch and SYSTEMPORT hardware to utilize the out of band congestion notification. This hardware mechanism works by looking at the switch port egress queue and determines whether there is enough buffers for this queue, with that class of service for a successful transmission and if not, backpressures the SYSTEMPORT queue that is being used. For this to work, we implement a notifier which looks at the DSA_PORT_REGISTER event. When DSA network devices are registered, the framework calls the DSA notifiers when that happens, extracts the number of queues for these devices and their associated port number, remembers that in the driver private structure and linearly maps those queues to TX rings/queues that we manage. This scheme works because DSA slave network deviecs always transmit through SYSTEMPORT so when DSA slave network devices are destroyed/brought down, the corresponding SYSTEMPORT queues are no longer used. Also, by design of the DSA framework, the master network device (SYSTEMPORT) is registered first. For faster lookups we use an array of up to DSA_MAX_PORTS * number of queues per port, and then map pointers to bcm_sysport_tx_ring such that our ndo_select_queue() implementation can just index into that array to locate the corresponding ring index. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 115 ++++++++++++++++++++- drivers/net/ethernet/broadcom/bcmsysport.h | 11 +- 2 files changed, 121 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 83eec9a8c275..78bed9a84e81 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1416,7 +1416,14 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, tdma_writel(priv, 0, TDMA_DESC_RING_COUNT(index)); tdma_writel(priv, 1, TDMA_DESC_RING_INTR_CONTROL(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PROD_CONS_INDEX(index)); - tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index)); + + /* Configure QID and port mapping */ + reg = tdma_readl(priv, TDMA_DESC_RING_MAPPING(index)); + reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT); + reg |= ring->switch_queue & RING_QID_MASK; + reg |= ring->switch_port << RING_PORT_ID_SHIFT; + reg |= RING_IGNORE_STATUS; + tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index)); /* Do not use tdma_control_bit() here because TSB_SWAP1 collides @@ -1447,8 +1454,9 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, napi_enable(&ring->napi); netif_dbg(priv, hw, priv->netdev, - "TDMA cfg, size=%d, desc_cpu=%p\n", - ring->size, ring->desc_cpu); + "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n", + ring->size, ring->desc_cpu, ring->switch_queue, + ring->switch_port); return 0; } @@ -2011,6 +2019,92 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; +static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + u16 queue = skb_get_queue_mapping(skb); + struct bcm_sysport_tx_ring *tx_ring; + unsigned int q, port; + + if (!netdev_uses_dsa(dev)) + return fallback(dev, skb); + + /* DSA tagging layer will have configured the correct queue */ + q = BRCM_TAG_GET_QUEUE(queue); + port = BRCM_TAG_GET_PORT(queue); + tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues]; + + return tx_ring->index; +} + +static int bcm_sysport_map_queues(struct net_device *dev, + struct dsa_notifier_register_info *info) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + struct bcm_sysport_tx_ring *ring; + struct net_device *slave_dev; + unsigned int num_tx_queues; + unsigned int q, start, port; + + /* We can't be setting up queue inspection for non directly attached + * switches + */ + if (info->switch_number) + return 0; + + port = info->port_number; + slave_dev = info->info.dev; + + /* On SYSTEMPORT Lite we have twice as less queues, so we cannot do a + * 1:1 mapping, we can only do a 2:1 mapping. By reducing the number of + * per-port (slave_dev) network devices queue, we achieve just that. + * This need to happen now before any slave network device is used such + * it accurately reflects the number of real TX queues. + */ + if (priv->is_lite) + netif_set_real_num_tx_queues(slave_dev, + slave_dev->num_tx_queues / 2); + num_tx_queues = slave_dev->real_num_tx_queues; + + if (priv->per_port_num_tx_queues && + priv->per_port_num_tx_queues != num_tx_queues) + netdev_warn(slave_dev, "asymetric number of per-port queues\n"); + + priv->per_port_num_tx_queues = num_tx_queues; + + start = find_first_zero_bit(&priv->queue_bitmap, dev->num_tx_queues); + for (q = 0; q < num_tx_queues; q++) { + ring = &priv->tx_rings[q + start]; + + /* Just remember the mapping actual programming done + * during bcm_sysport_init_tx_ring + */ + ring->switch_queue = q; + ring->switch_port = port; + priv->ring_map[q + port * num_tx_queues] = ring; + + /* Set all queues as being used now */ + set_bit(q + start, &priv->queue_bitmap); + } + + return 0; +} + +static int bcm_sysport_dsa_notifier(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct dsa_notifier_register_info *info; + + if (event != DSA_PORT_REGISTER) + return NOTIFY_DONE; + + info = ptr; + + return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); +} + static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_start_xmit = bcm_sysport_xmit, .ndo_tx_timeout = bcm_sysport_tx_timeout, @@ -2023,6 +2117,7 @@ static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_poll_controller = bcm_sysport_poll_controller, #endif .ndo_get_stats64 = bcm_sysport_get_stats64, + .ndo_select_queue = bcm_sysport_select_queue, }; #define REV_FMT "v%2x.%02x" @@ -2172,10 +2267,18 @@ static int bcm_sysport_probe(struct platform_device *pdev) u64_stats_init(&priv->syncp); + priv->dsa_notifier.notifier_call = bcm_sysport_dsa_notifier; + + ret = register_dsa_notifier(&priv->dsa_notifier); + if (ret) { + dev_err(&pdev->dev, "failed to register DSA notifier\n"); + goto err_deregister_fixed_link; + } + ret = register_netdev(dev); if (ret) { dev_err(&pdev->dev, "failed to register net_device\n"); - goto err_deregister_fixed_link; + goto err_deregister_notifier; } priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK; @@ -2188,6 +2291,8 @@ static int bcm_sysport_probe(struct platform_device *pdev) return 0; +err_deregister_notifier: + unregister_dsa_notifier(&priv->dsa_notifier); err_deregister_fixed_link: if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); @@ -2199,11 +2304,13 @@ err_free_netdev: static int bcm_sysport_remove(struct platform_device *pdev) { struct net_device *dev = dev_get_drvdata(&pdev->dev); + struct bcm_sysport_priv *priv = netdev_priv(dev); struct device_node *dn = pdev->dev.of_node; /* Not much to do, ndo_close has been called * and we use managed allocations */ + unregister_dsa_notifier(&priv->dsa_notifier); unregister_netdev(dev); if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 82e401df199e..82f70a6783cb 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -404,7 +404,7 @@ struct bcm_rsb { #define RING_CONS_INDEX_MASK 0xffff #define RING_MAPPING 0x14 -#define RING_QID_MASK 0x3 +#define RING_QID_MASK 0x7 #define RING_PORT_ID_SHIFT 3 #define RING_PORT_ID_MASK 0x7 #define RING_IGNORE_STATUS (1 << 6) @@ -712,6 +712,8 @@ struct bcm_sysport_tx_ring { struct bcm_sysport_priv *priv; /* private context backpointer */ unsigned long packets; /* packets statistics */ unsigned long bytes; /* bytes statistics */ + unsigned int switch_queue; /* switch port queue number */ + unsigned int switch_port; /* switch port queue number */ }; /* Driver private structure */ @@ -765,5 +767,12 @@ struct bcm_sysport_priv { /* For atomic update generic 64bit value on 32bit Machine */ struct u64_stats_sync syncp; + + /* map information between switch port queues and local queues */ + struct notifier_block dsa_notifier; + unsigned int per_port_num_tx_queues; + unsigned long queue_bitmap; + struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8]; + }; #endif /* __BCM_SYSPORT_H */ From 32e47ff0cd236d5fcab8ffcce85d1dcded663f61 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:51 -0700 Subject: [PATCH 0872/2177] net: dsa: bcm_sf2: Turn on ACB at the switch level Turn on the out of band Advanced Congestion Buffering (ACB) mechanism at the switch level now that we have properly established the queue mapping between the switch egress queues and the SYSTEMPORT egress queues. This allows the switch to correctly backpressure the host system when one of its queue drops below the configured thresholds. This is also helping achieve so called "lossless" behavior by adapting the TX interrupt pacing to the actual speed and capacity of the switch port. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 30 ++++++++++++++++++++++++++++++ drivers/net/dsa/bcm_sf2_regs.h | 23 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 7aecc98d0a18..32025b990437 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -205,6 +205,19 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, if (port == priv->moca_port) bcm_sf2_port_intr_enable(priv, port); + /* Set per-queue pause threshold to 32 */ + core_writel(priv, 32, CORE_TXQ_THD_PAUSE_QN_PORT(port)); + + /* Set ACB threshold to 24 */ + for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++) { + reg = acb_readl(priv, ACB_QUEUE_CFG(port * + SF2_NUM_EGRESS_QUEUES + i)); + reg &= ~XOFF_THRESHOLD_MASK; + reg |= 24; + acb_writel(priv, reg, ACB_QUEUE_CFG(port * + SF2_NUM_EGRESS_QUEUES + i)); + } + return b53_enable_port(ds, port, phy); } @@ -613,6 +626,20 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, status->pause = 1; } +static void bcm_sf2_enable_acb(struct dsa_switch *ds) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + u32 reg; + + /* Enable ACB globally */ + reg = acb_readl(priv, ACB_CONTROL); + reg |= (ACB_FLUSH_MASK << ACB_FLUSH_SHIFT); + acb_writel(priv, reg, ACB_CONTROL); + reg &= ~(ACB_FLUSH_MASK << ACB_FLUSH_SHIFT); + reg |= ACB_EN | ACB_ALGORITHM; + acb_writel(priv, reg, ACB_CONTROL); +} + static int bcm_sf2_sw_suspend(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -655,6 +682,8 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) bcm_sf2_imp_setup(ds, port); } + bcm_sf2_enable_acb(ds); + return 0; } @@ -766,6 +795,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) } bcm_sf2_sw_configure_vlan(ds); + bcm_sf2_enable_acb(ds); return 0; } diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index d8b8074a47b9..d1596dfca323 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -115,6 +115,24 @@ enum bcm_sf2_reg_offs { #define P7_IRQ_OFF 0 #define P_IRQ_OFF(x) ((6 - (x)) * P_NUM_IRQ) +/* Register set relative to 'ACB' */ +#define ACB_CONTROL 0x00 +#define ACB_EN (1 << 0) +#define ACB_ALGORITHM (1 << 1) +#define ACB_FLUSH_SHIFT 2 +#define ACB_FLUSH_MASK 0x3 + +#define ACB_QUEUE_0_CFG 0x08 +#define XOFF_THRESHOLD_MASK 0x7ff +#define XON_EN (1 << 11) +#define TOTAL_XOFF_THRESHOLD_SHIFT 12 +#define TOTAL_XOFF_THRESHOLD_MASK 0x7ff +#define TOTAL_XOFF_EN (1 << 23) +#define TOTAL_XON_EN (1 << 24) +#define PKTLEN_SHIFT 25 +#define PKTLEN_MASK 0x3f +#define ACB_QUEUE_CFG(x) (ACB_QUEUE_0_CFG + ((x) * 0x4)) + /* Register set relative to 'CORE' */ #define CORE_G_PCTL_PORT0 0x00000 #define CORE_G_PCTL_PORT(x) (CORE_G_PCTL_PORT0 + (x * 0x4)) @@ -237,6 +255,11 @@ enum bcm_sf2_reg_offs { #define CORE_PORT_VLAN_CTL_PORT(x) (0xc400 + ((x) * 0x8)) #define PORT_VLAN_CTRL_MASK 0x1ff +#define CORE_TXQ_THD_PAUSE_QN_PORT_0 0x2c80 +#define TXQ_PAUSE_THD_MASK 0x7ff +#define CORE_TXQ_THD_PAUSE_QN_PORT(x) (CORE_TXQ_THD_PAUSE_QN_PORT_0 + \ + (x) * 0x8) + #define CORE_DEFAULT_1Q_TAG_P(x) (0xd040 + ((x) * 8)) #define CFI_SHIFT 12 #define PRI_SHIFT 13 From 723934fb792f2dbc76ee3ac334fcde95136bf409 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 10:57:52 -0700 Subject: [PATCH 0873/2177] net: systemport: Turn on ACB at the SYSTEMPORT level Now that we have established the queue mapping between the switch port egress queues and the SYSTEMPORT egress queues, we can turn on Advanced Congestion Buffering (ACB) at the SYSTEMPORT level. This enables the Ethernet MAC controller to get out of band flow control information directly from the switch port and queue that it monitors such that its internal TDMA can be appropriately backpressured. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 78bed9a84e81..dafc26690555 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1422,10 +1422,14 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT); reg |= ring->switch_queue & RING_QID_MASK; reg |= ring->switch_port << RING_PORT_ID_SHIFT; - reg |= RING_IGNORE_STATUS; tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index)); + /* Enable ACB algorithm 2 */ + reg = tdma_readl(priv, TDMA_CONTROL); + reg |= tdma_control_bit(priv, ACB_ALGO); + tdma_writel(priv, reg, TDMA_CONTROL); + /* Do not use tdma_control_bit() here because TSB_SWAP1 collides * with the original definition of ACB_ALGO */ From d921c420d2efa5731011b922e5a2f2ad33203c0b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 11 Oct 2017 13:47:22 +0200 Subject: [PATCH 0874/2177] net/smc: replace function pointer get_netdev() SMC should not open code the function pointer get_netdev of the IB device. Replacing ib_query_gid(..., NULL) with ib_query_gid(..., gid_attr) allows access to the netdev. Signed-off-by: Ursula Braun Suggested-by: Parav Pandit Reviewed-by: Parav Pandit Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0b5852299158..468e1d725d97 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -369,26 +369,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) { - struct net_device *ndev; + struct ib_gid_attr gattr; int rc; rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], NULL); - /* the SMC protocol requires specification of the roce MAC address; - * if net_device cannot be determined, it can be derived from gid 0 - */ - ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport); - if (ndev) { - memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN); - dev_put(ndev); - } else if (!rc) { - memcpy(&smcibdev->mac[ibport - 1][0], - &smcibdev->gid[ibport - 1].raw[8], 3); - memcpy(&smcibdev->mac[ibport - 1][3], - &smcibdev->gid[ibport - 1].raw[13], 3); - smcibdev->mac[ibport - 1][0] &= ~0x02; - } - return rc; + &smcibdev->gid[ibport - 1], &gattr); + if (rc || !gattr.ndev) + return -ENODEV; + + memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); + dev_put(gattr.ndev); + return 0; } /* Create an identifier unique for this instance of SMC-R. @@ -419,6 +410,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) &smcibdev->pattr[ibport - 1]); if (rc) goto out; + /* the SMC protocol requires specification of the RoCE MAC address */ rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); if (rc) goto out; From 43e2ada3e06aefe3596be75bd05b34ef14fd1f7c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 11 Oct 2017 13:47:23 +0200 Subject: [PATCH 0875/2177] net/smc: dev_put for netdev after usage of ib_query_gid() For RoCEs ib_query_gid() takes a reference count on the net_device. This reference count must be decreased by the caller. Signed-off-by: Ursula Braun Reported-by: Parav Pandit Reviewed-by: Parav Pandit Fixes: 0cfdd8f92cac ("smc: connection and link group creation") Signed-off-by: David S. Miller --- net/smc/smc_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 20b66e79c5d6..5f6a20084157 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -380,10 +380,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, &gattr)) continue; - if (gattr.ndev && - (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { - lnk->gid = gid; - return 0; + if (gattr.ndev) { + if (is_vlan_dev(gattr.ndev) && + vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { + lnk->gid = gid; + dev_put(gattr.ndev); + return 0; + } + dev_put(gattr.ndev); } } return -ENODEV; From 0eb16f82ecd40d4ce344ec882d6fb5c330f8ef39 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:55:31 +0200 Subject: [PATCH 0876/2177] ip_tunnel: fix building with NET_IP_TUNNEL=m When af_mpls is built-in but the tunnel support is a module, we get a link failure: net/mpls/af_mpls.o: In function `mpls_init': af_mpls.c:(.init.text+0xdc): undefined reference to `ip_tunnel_encap_add_ops' This adds a Kconfig statement to prevent the broken configuration and force mpls to be a module as well in this case. Fixes: bdc476413dcd ("ip_tunnel: add mpls over gre support") Signed-off-by: Arnd Bergmann Acked-by: Amine Kherbouche Signed-off-by: David S. Miller --- net/mpls/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 5c467ef97311..801ea9098387 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -24,6 +24,7 @@ config NET_MPLS_GSO config MPLS_ROUTING tristate "MPLS: routing support" + depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n ---help--- Add support for forwarding of mpls packets. From ad2d116c5242875bba27522682ec5ba7f0df75f0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 11:14:49 -0700 Subject: [PATCH 0877/2177] sched: tc_mirred: Remove whitespaces This file contains unnecessary whitespaces as newlines, remove them, found by looking at what struct tc_mirred looks like. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_mirred.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 3d7a2b352a62..69038c29e8a9 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -9,13 +9,13 @@ #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */ - + struct tc_mirred { tc_gen; int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ __u32 ifindex; /* ifindex of egress port */ }; - + enum { TCA_MIRRED_UNSPEC, TCA_MIRRED_TM, @@ -24,5 +24,5 @@ enum { __TCA_MIRRED_MAX }; #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) - + #endif From 47f25464122bd7aebba35bfb0a26ee24d8026885 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Wed, 11 Oct 2017 20:26:58 +0100 Subject: [PATCH 0878/2177] vxge: Clean up unused variables in vxge-traffic Delete unused channel variables in vxge-traffic. Signed-off-by: Christos Gkekas Signed-off-by: David S. Miller --- .../net/ethernet/neterion/vxge/vxge-traffic.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c index 5f630a24e491..0c3b5dea2858 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c @@ -1209,9 +1209,6 @@ void vxge_hw_ring_rxd_pre_post(struct __vxge_hw_ring *ring, void *rxdh) void vxge_hw_ring_rxd_post_post(struct __vxge_hw_ring *ring, void *rxdh) { struct vxge_hw_ring_rxd_1 *rxdp = (struct vxge_hw_ring_rxd_1 *)rxdh; - struct __vxge_hw_channel *channel; - - channel = &ring->channel; rxdp->control_0 = VXGE_HW_RING_RXD_LIST_OWN_ADAPTER; @@ -1359,11 +1356,8 @@ exit: enum vxge_hw_status vxge_hw_ring_handle_tcode( struct __vxge_hw_ring *ring, void *rxdh, u8 t_code) { - struct __vxge_hw_channel *channel; enum vxge_hw_status status = VXGE_HW_OK; - channel = &ring->channel; - /* If the t_code is not supported and if the * t_code is other than 0x5 (unparseable packet * such as unknown UPV6 header), Drop it !!! @@ -1399,10 +1393,6 @@ exit: static void __vxge_hw_non_offload_db_post(struct __vxge_hw_fifo *fifo, u64 txdl_ptr, u32 num_txds, u32 no_snoop) { - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; - writeq(VXGE_HW_NODBW_TYPE(VXGE_HW_NODBW_TYPE_NODBW) | VXGE_HW_NODBW_LAST_TXD_NUMBER(num_txds) | VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop), @@ -1506,9 +1496,6 @@ void vxge_hw_fifo_txdl_buffer_set(struct __vxge_hw_fifo *fifo, { struct __vxge_hw_fifo_txdl_priv *txdl_priv; struct vxge_hw_fifo_txd *txdp, *txdp_last; - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh); txdp = (struct vxge_hw_fifo_txd *)txdlh + txdl_priv->frags; @@ -1554,9 +1541,6 @@ void vxge_hw_fifo_txdl_post(struct __vxge_hw_fifo *fifo, void *txdlh) struct __vxge_hw_fifo_txdl_priv *txdl_priv; struct vxge_hw_fifo_txd *txdp_last; struct vxge_hw_fifo_txd *txdp_first; - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh); txdp_first = txdlh; @@ -1672,10 +1656,7 @@ enum vxge_hw_status vxge_hw_fifo_handle_tcode(struct __vxge_hw_fifo *fifo, void *txdlh, enum vxge_hw_fifo_tcode t_code) { - struct __vxge_hw_channel *channel; - enum vxge_hw_status status = VXGE_HW_OK; - channel = &fifo->channel; if (((t_code & 0x7) < 0) || ((t_code & 0x7) > 0x4)) { status = VXGE_HW_ERR_INVALID_TCODE; From 8f04748016f3b583e675e0f649d42cfc10812a8b Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 11 Oct 2017 10:50:29 -0400 Subject: [PATCH 0879/2177] net sched actions: change IFE modules alias names Make style of module alias name consistent with other subsystems in kernel, for example net devices. Fixes: 084e2f6566d2 ("Support to encoding decoding skb mark on IFE action") Fixes: 200e10f46936 ("Support to encoding decoding skb prio on IFE action") Fixes: 408fbc22ef1e ("net sched ife action: Introduce skb tcindex metadata encap decap") Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_meta_mark.c | 2 +- net/sched/act_meta_skbprio.c | 2 +- net/sched/act_meta_skbtcindex.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 30ba459ddd34..104578f16062 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -40,7 +40,7 @@ struct tcf_meta_ops { struct module *owner; }; -#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ifemeta" __stringify_1(metan)) +#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ife-meta-" metan) int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 8ccd35825b6b..791aeee11c7e 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -263,7 +263,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (exists) spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); - request_module("ifemeta%u", metaid); + request_module("ife-meta-%u", metaid); rtnl_lock(); if (exists) spin_lock_bh(&ife->tcf_lock); diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 82892170ce4f..1e3f10e5da99 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb mark metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBMARK); +MODULE_ALIAS_IFE_META("skbmark"); diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c index 26bf4d86030b..4033f9fc4d4a 100644 --- a/net/sched/act_meta_skbprio.c +++ b/net/sched/act_meta_skbprio.c @@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb prio metadata action"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_PRIO); +MODULE_ALIAS_IFE_META("skbprio"); diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 3b35774ce890..2ea1f26c9e96 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2016)"); MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); +MODULE_ALIAS_IFE_META("tcindex"); From d3f24ba895f0bbbc8ab0ecb03de7daa6eccc7ceb Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 11 Oct 2017 10:50:30 -0400 Subject: [PATCH 0880/2177] net sched actions: fix module auto-loading Macro __stringify_1() can stringify a macro argument, however IFE_META_* are enums, so they never expand, however request_module expects an integer in IFE module name, so as a result it always fails to auto-load. Fixes: ef6980b6becb ("introduce IFE action") Signed-off-by: Roman Mashak Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 791aeee11c7e..e0bc228c1218 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -248,6 +248,20 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) return ret; } +static const char *ife_meta_id2name(u32 metaid) +{ + switch (metaid) { + case IFE_META_SKBMARK: + return "skbmark"; + case IFE_META_PRIO: + return "skbprio"; + case IFE_META_TCINDEX: + return "tcindex"; + default: + return "unknown"; + } +} + /* called when adding new meta information * under ife->tcf_lock for existing action */ @@ -263,7 +277,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (exists) spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); - request_module("ife-meta-%u", metaid); + request_module("ife-meta-%s", ife_meta_id2name(metaid)); rtnl_lock(); if (exists) spin_lock_bh(&ife->tcf_lock); From 734534e9a8e537d33d3598fa03b98eb089819961 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Oct 2017 17:16:06 -0400 Subject: [PATCH 0881/2177] sched: act: ife: move encode/decode check to init This patch adds the check of the two possible ife handlings encode and decode to the init callback. The decode value is for usability aspect and used in userspace code only. The current code offers encode else decode only. This patch avoids any other option than this. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ife.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e0bc228c1218..21c10f5d1247 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -464,6 +464,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); + /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because + * they cannot run as the same time. Check on all other values which + * are not supported right now. + */ + if (parm->flags & ~IFE_ENCODE) + return -EINVAL; + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -786,17 +793,7 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, if (ife->flags & IFE_ENCODE) return tcf_ife_encode(skb, a, res); - if (!(ife->flags & IFE_ENCODE)) - return tcf_ife_decode(skb, a, res); - - pr_info_ratelimited("unknown failure(policy neither de/encode\n"); - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); - tcf_lastuse_update(&ife->tcf_tm); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); - - return TC_ACT_SHOT; + return tcf_ife_decode(skb, a, res); } static int tcf_ife_walker(struct net *net, struct sk_buff *skb, From ced273eacfe1876e2c3c4ea1244a2e386e20eadb Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Oct 2017 17:16:07 -0400 Subject: [PATCH 0882/2177] sched: act: ife: migrate to use per-cpu counters This patch migrates the current counter handling which is protected by a spinlock to a per-cpu counter handling. This reduce the time where the spinlock is being held. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ife.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 21c10f5d1247..f59d78918cf9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -477,7 +477,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, - bind, false); + bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -638,19 +638,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); - spin_unlock(&ife->tcf_lock); if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); tlv_data = ife_decode(skb, &metalen); if (unlikely(!tlv_data)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -668,14 +664,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, */ pr_info_ratelimited("Unknown metaid %d dlen %d\n", mtype, dlen); - ife->tcf_qstats.overlimits++; + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); } } if (WARN_ON(tlv_data != ifehdr_end)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -727,23 +721,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ /* abuse overlimits to count when we allow packet * with no metadata */ - ife->tcf_qstats.overlimits++; - spin_unlock(&ife->tcf_lock); + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); return action; } /* could be stupid policy setup or mtu config * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -752,6 +743,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, ife_meta = ife_encode(skb, metalen); + spin_lock(&ife->tcf_lock); + /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... @@ -763,8 +756,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } if (err < 0) { /* too corrupt to keep around if overwritten */ - ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } skboff += err; From aa9fd9a325d51fa0b11153b03b8fefff569fa955 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Oct 2017 17:16:08 -0400 Subject: [PATCH 0883/2177] sched: act: ife: update parameters via rcu handling This patch changes the parameter updating via RCU and not protected by a spinlock anymore. This reduce the time that the spinlock is being held. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 10 ++++- net/sched/act_ife.c | 85 +++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 104578f16062..c7fb99c3f76c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -6,12 +6,18 @@ #include #include -struct tcf_ife_info { - struct tc_action common; +struct tcf_ife_params { u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; + + struct rcu_head rcu; +}; + +struct tcf_ife_info { + struct tc_action common; + struct tcf_ife_params __rcu *params; /* list of metaids allowed */ struct list_head metalist; }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index f59d78918cf9..252ee7d8c731 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -406,10 +406,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) static void tcf_ife_cleanup(struct tc_action *a, int bind) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; spin_lock_bh(&ife->tcf_lock); _tcf_ife_cleanup(a, bind); spin_unlock_bh(&ife->tcf_lock); + + p = rcu_dereference_protected(ife->params, 1); + kfree_rcu(p, rcu); } /* under ife->tcf_lock for existing action */ @@ -446,6 +450,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_ife_params *p, *p_old; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; struct tc_ife *parm; @@ -471,24 +476,34 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (parm->flags & ~IFE_ENCODE) return -EINVAL; + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + exists = tcf_idr_check(tn, parm->index, a, bind); - if (exists && bind) + if (exists && bind) { + kfree(p); return 0; + } if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, bind, true); - if (ret) + if (ret) { + kfree(p); return ret; + } ret = ACT_P_CREATED; } else { tcf_idr_release(*a, bind); - if (!ovr) + if (!ovr) { + kfree(p); return -EEXIST; + } } ife = to_ife(*a); - ife->flags = parm->flags; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { if (tb[TCA_IFE_TYPE]) @@ -499,24 +514,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - if (exists) - spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { if (daddr) - ether_addr_copy(ife->eth_dst, daddr); + ether_addr_copy(p->eth_dst, daddr); else - eth_zero_addr(ife->eth_dst); + eth_zero_addr(p->eth_dst); if (saddr) - ether_addr_copy(ife->eth_src, saddr); + ether_addr_copy(p->eth_src, saddr); else - eth_zero_addr(ife->eth_src); + eth_zero_addr(p->eth_src); - ife->eth_type = ife_type; + p->eth_type = ife_type; } + if (exists) + spin_lock_bh(&ife->tcf_lock); + if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -532,6 +548,7 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } @@ -552,6 +569,7 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } } @@ -559,6 +577,11 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + p_old = rtnl_dereference(ife->params); + rcu_assign_pointer(ife->params, p); + if (p_old) + kfree_rcu(p_old, rcu); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -570,12 +593,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p = rtnl_dereference(ife->params); struct tc_ife opt = { .index = ife->tcf_index, .refcnt = ife->tcf_refcnt - ref, .bindcnt = ife->tcf_bindcnt - bind, .action = ife->tcf_action, - .flags = ife->flags, + .flags = p->flags, }; struct tcf_t t; @@ -586,17 +610,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; - if (!is_zero_ether_addr(ife->eth_dst)) { - if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst)) + if (!is_zero_ether_addr(p->eth_dst)) { + if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; } - if (!is_zero_ether_addr(ife->eth_src)) { - if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src)) + if (!is_zero_ether_addr(p->eth_src)) { + if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; } - if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type)) + if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; if (dump_metalist(skb, ife)) { @@ -698,7 +722,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) } static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) + struct tcf_result *res, struct tcf_ife_params *p) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; @@ -762,19 +786,18 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } skboff += err; } + spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; - if (!is_zero_ether_addr(ife->eth_src)) - ether_addr_copy(oethh->h_source, ife->eth_src); - if (!is_zero_ether_addr(ife->eth_dst)) - ether_addr_copy(oethh->h_dest, ife->eth_dst); - oethh->h_proto = htons(ife->eth_type); + if (!is_zero_ether_addr(p->eth_src)) + ether_addr_copy(oethh->h_source, p->eth_src); + if (!is_zero_ether_addr(p->eth_dst)) + ether_addr_copy(oethh->h_dest, p->eth_dst); + oethh->h_proto = htons(p->eth_type); if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); - spin_unlock(&ife->tcf_lock); - return action; } @@ -782,9 +805,17 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; + int ret; - if (ife->flags & IFE_ENCODE) - return tcf_ife_encode(skb, a, res); + rcu_read_lock(); + p = rcu_dereference(ife->params); + if (p->flags & IFE_ENCODE) { + ret = tcf_ife_encode(skb, a, res, p); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); return tcf_ife_decode(skb, a, res); } From 32c10bbfe914c76d8802be33c97b59be9582df1b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 11 Oct 2017 17:16:26 -0700 Subject: [PATCH 0884/2177] ipvlan: always use the current L2 addr of the master If the underlying master ever changes its L2 (e.g. bonding device), then make sure that the IPvlan slaves always emit packets with the current L2 of the master instead of the stale mac addr which was copied during the device creation. The problem can be seen with following script - #!/bin/bash # Create a vEth pair ip link add dev veth0 type veth peer name veth1 ip link set veth0 up ip link set veth1 up ip link show veth0 ip link show veth1 # Create an IPvlan device on one end of this vEth pair. ip link add link veth0 dev ipvl0 type ipvlan mode l2 ip link show ipvl0 # Change the mac-address of the vEth master. ip link set veth0 address 02:11:22:33:44:55 Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 57c3856bab05..3cf67db513e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -407,7 +407,7 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, * while the packets use the mac-addr on the physical device. */ return dev_hard_header(skb, phy_dev, type, daddr, - saddr ? : dev->dev_addr, len); + saddr ? : phy_dev->dev_addr, len); } static const struct header_ops ipvlan_header_ops = { @@ -730,6 +730,11 @@ static int ipvlan_device_event(struct notifier_block *unused, ipvlan_adjust_mtu(ipvlan, dev); break; + case NETDEV_CHANGEADDR: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) + ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr); + break; + case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlying device to change its type. */ return NOTIFY_BAD; From 2355a6546a053b1c16ebefd6ce1f0cccc00e1da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 12 Oct 2017 10:21:25 +0200 Subject: [PATCH 0885/2177] net: phy: broadcom: support new device flag for setting master mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of Broadcom's PHYs run by default in slave mode with Automatic Slave/Master configuration disabled. It stops them from working properly with some devices. So far it has been verified for BCM54210E and BCM50212E which don't work well with Intel's I217-LM and I218-LM: http://ark.intel.com/products/60019/Intel-Ethernet-Connection-I217-LM http://ark.intel.com/products/71307/Intel-Ethernet-Connection-I218-LM I was told there is massive ping loss. This commit adds support for a new flag which can be set by an ethernet driver to fixup PHY setup. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 6 ++++++ include/linux/brcmphy.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 1e9ad30a35c8..d7ed69deabfb 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -43,6 +43,12 @@ static int bcm54210e_config_init(struct phy_device *phydev) val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) { + val = phy_read(phydev, MII_CTRL1000); + val |= CTL1000_AS_MASTER | CTL1000_ENABLE_MASTER; + phy_write(phydev, MII_CTRL1000, val); + } + return 0; } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index abcda9b458ab..9ac9e3e3d1e5 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -63,6 +63,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 +#define PHY_BRCM_EN_MASTER_MODE 0x00010000 /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) From 12acd136913ccdf394eeb2bc8686ff5505368119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 12 Oct 2017 10:21:26 +0200 Subject: [PATCH 0886/2177] net: bgmac: enable master mode for BCM54210E and B50212E PHYs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 4 very similar PHYs: 0x600d84a1: BCM54210E (rev B0) 0x600d84a2: BCM54210E (rev B1) 0x600d84a5: B50212E (rev B0) 0x600d84a6: B50212E (rev B1) that need setting master mode manually. It's because they run in slave mode by default with Automatic Slave/Master configuration disabled which can lead to unreliable connection with massive ping loss. So far it was reported for a board with BCM47189 SoC and B50212E B1 PHY connected to the bgmac supported ethernet device. Telling PHY driver to setup PHY properly solves this issue. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-bcma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 6322594ab260..6fe074c1588b 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_device *core) if (!bgmac_is_bcm4707_family(core) && !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) { + struct phy_device *phydev; + mii_bus = bcma_mdio_mii_register(bgmac); if (IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); goto err; } - bgmac->mii_bus = mii_bus; + + phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr); + if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev && + (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E) + phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE; } if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) { From ab104615e01c2c4cbe9ea4073a430d51f6547dd2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 Oct 2017 10:24:53 +0200 Subject: [PATCH 0887/2177] ravb: Consolidate clock handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The module clock is used for two purposes: - Wake-on-LAN (WoL), which is optional, - gPTP Timer Increment (GTI) configuration, which is mandatory. As the clock is needed for GTI configuration anyway, WoL is always available. Hence remove duplication and repeated obtaining of the clock by making GTI use the stored clock for WoL use. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 35 +++++++----------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index a8822a756e08..2b962d349f5f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1354,20 +1354,15 @@ static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct ravb_private *priv = netdev_priv(ndev); - wol->supported = 0; - wol->wolopts = 0; - - if (priv->clk) { - wol->supported = WAKE_MAGIC; - wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0; - } + wol->supported = WAKE_MAGIC; + wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0; } static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct ravb_private *priv = netdev_priv(ndev); - if (!priv->clk || wol->wolopts & ~WAKE_MAGIC) + if (wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC); @@ -1962,22 +1957,12 @@ MODULE_DEVICE_TABLE(of, ravb_match_table); static int ravb_set_gti(struct net_device *ndev) { - + struct ravb_private *priv = netdev_priv(ndev); struct device *dev = ndev->dev.parent; - struct device_node *np = dev->of_node; unsigned long rate; - struct clk *clk; uint64_t inc; - clk = of_clk_get(np, 0); - if (IS_ERR(clk)) { - dev_err(dev, "could not get clock\n"); - return PTR_ERR(clk); - } - - rate = clk_get_rate(clk); - clk_put(clk); - + rate = clk_get_rate(priv->clk); if (!rate) return -EINVAL; @@ -2126,10 +2111,11 @@ static int ravb_probe(struct platform_device *pdev) priv->chip_id = chip_id; - /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */ priv->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(priv->clk)) - priv->clk = NULL; + if (IS_ERR(priv->clk)) { + error = PTR_ERR(priv->clk); + goto out_release; + } /* Set function */ ndev->netdev_ops = &ravb_netdev_ops; @@ -2197,8 +2183,7 @@ static int ravb_probe(struct platform_device *pdev) if (error) goto out_napi_del; - if (priv->clk) - device_set_wakeup_capable(&pdev->dev, 1); + device_set_wakeup_capable(&pdev->dev, 1); /* Print device information */ netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n", From 2d0d21c12dfa3851620f1fa9fe2d444538f1fad4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2017 11:11:22 +0200 Subject: [PATCH 0888/2177] selftests: rtnetlink: add a small macsec test case Signed-off-by: Florian Westphal Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index a8a8cdf726b2..5215493166c9 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -461,6 +461,47 @@ kci_test_encap() ip netns del "$testns" } +kci_test_macsec() +{ + msname="test_macsec0" + ret=0 + + ip macsec help 2>&1 | grep -q "^Usage: ip macsec" + if [ $? -ne 0 ]; then + echo "SKIP: macsec: iproute2 too old" + return 0 + fi + + ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: can't add macsec interface, skipping test" + return 1 + fi + + ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + check_err $? + + ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + check_err $? + + ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef + check_err $? + + ip macsec show > /dev/null + check_err $? + + ip link del dev "$msname" + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: macsec" + return 1 + fi + + echo "PASS: macsec" +} + kci_test_rtnl() { kci_add_dummy @@ -478,6 +519,7 @@ kci_test_rtnl() kci_test_ifalias kci_test_vrf kci_test_encap + kci_test_macsec kci_del_dummy } From 0f0a0af826260a267d11a66e4b13543784bc9c56 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Sep 2017 13:01:49 -0700 Subject: [PATCH 0889/2177] bcma: keep *config menu together MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use "if BCMA"/"endif" around all Kconfig symbols so that they are kept together in *config menus instead of showing up in unexpected places. Also remove "depends on BCMA" since this is handled by the "if BCMA" addition. Tested with ARCH={x86_64,MIPS} using make {n,menu,g,x}config. Signed-off-by: Randy Dunlap Cc: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/Kconfig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index 54f81c554815..56ee55b66dfe 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -10,14 +10,15 @@ menuconfig BCMA Bus driver for Broadcom specific Advanced Microcontroller Bus Architecture. +if BCMA + # Support for Block-I/O. SELECT this from the driver that needs it. config BCMA_BLOCKIO bool - depends on BCMA config BCMA_HOST_PCI_POSSIBLE bool - depends on BCMA && PCI = y + depends on PCI = y default y config BCMA_HOST_PCI @@ -28,7 +29,6 @@ config BCMA_HOST_PCI config BCMA_HOST_SOC bool "Support for BCMA in a SoC" - depends on BCMA help Host interface for a Broadcom AIX bus directly mapped into the memory. This only works with the Broadcom SoCs from the @@ -38,7 +38,7 @@ config BCMA_HOST_SOC config BCMA_DRIVER_PCI bool "BCMA Broadcom PCI core driver" - depends on BCMA && PCI + depends on PCI default y help BCMA bus may have many versions of PCIe core. This driver @@ -54,13 +54,13 @@ config BCMA_DRIVER_PCI config BCMA_DRIVER_PCI_HOSTMODE bool "Driver for PCI core working in hostmode" - depends on BCMA && MIPS && BCMA_DRIVER_PCI + depends on MIPS && BCMA_DRIVER_PCI help PCI core hostmode operation (external PCI bus). config BCMA_DRIVER_MIPS bool "BCMA Broadcom MIPS core driver" - depends on BCMA && MIPS + depends on MIPS help Driver for the Broadcom MIPS core attached to Broadcom specific Advanced Microcontroller Bus. @@ -91,7 +91,6 @@ config BCMA_NFLASH config BCMA_DRIVER_GMAC_CMN bool "BCMA Broadcom GBIT MAC COMMON core driver" - depends on BCMA help Driver for the Broadcom GBIT MAC COMMON core attached to Broadcom specific Advanced Microcontroller Bus. @@ -100,7 +99,7 @@ config BCMA_DRIVER_GMAC_CMN config BCMA_DRIVER_GPIO bool "BCMA GPIO driver" - depends on BCMA && GPIOLIB + depends on GPIOLIB select GPIOLIB_IRQCHIP if BCMA_HOST_SOC help Driver to provide access to the GPIO pins of the bcma bus. @@ -109,8 +108,9 @@ config BCMA_DRIVER_GPIO config BCMA_DEBUG bool "BCMA debugging" - depends on BCMA help This turns on additional debugging messages. If unsure, say N + +endif # BCMA From 40351051d022762d95142a5a9e37220dbbce39f5 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 3 Oct 2017 20:49:43 +0530 Subject: [PATCH 0890/2177] mwifiex: kill useless list_empty checks There's absolutely no reason to check to see if a list is empty before iterating through it. It's just like writing code like this: if (count != 0) { for (i = 0; i < count; i++) { ... } } The loop will already be avoided if "count == 0" so there was no reason to check. Signed-off-by: Douglas Anderson Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n.c | 9 --------- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 6 ------ drivers/net/wireless/marvell/mwifiex/init.c | 4 ---- drivers/net/wireless/marvell/mwifiex/tdls.c | 7 ------- 4 files changed, 26 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 725206914911..8772e3949327 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -658,12 +658,6 @@ void mwifiex_11n_delba(struct mwifiex_private *priv, int tid) unsigned long flags; spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); - if (list_empty(&priv->rx_reorder_tbl_ptr)) { - dev_dbg(priv->adapter->dev, - "mwifiex_11n_delba: rx_reorder_tbl_ptr empty\n"); - goto exit; - } - list_for_each_entry(rx_reor_tbl_ptr, &priv->rx_reorder_tbl_ptr, list) { if (rx_reor_tbl_ptr->tid == tid) { dev_dbg(priv->adapter->dev, @@ -854,9 +848,6 @@ mwifiex_send_delba_txbastream_tbl(struct mwifiex_private *priv, u8 tid) struct mwifiex_adapter *adapter = priv->adapter; struct mwifiex_tx_ba_stream_tbl *tx_ba_stream_tbl_ptr; - if (list_empty(&priv->tx_ba_stream_tbl_ptr)) - return; - list_for_each_entry(tx_ba_stream_tbl_ptr, &priv->tx_ba_stream_tbl_ptr, list) { if (tx_ba_stream_tbl_ptr->ba_status == BA_SETUP_COMPLETE) { diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 274dd5a1574a..d87df2dfcfa4 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -835,12 +835,6 @@ void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags) continue; spin_lock_irqsave(&priv->rx_reorder_tbl_lock, lock_flags); - if (list_empty(&priv->rx_reorder_tbl_ptr)) { - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, - lock_flags); - continue; - } - list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list) tbl->flags = flags; spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, lock_flags); diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index e11919db7818..1176706ab094 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -579,10 +579,6 @@ static void mwifiex_delete_bss_prio_tbl(struct mwifiex_private *priv) { spin_lock_irqsave(lock, flags); - if (list_empty(head)) { - spin_unlock_irqrestore(lock, flags); - continue; - } list_for_each_entry_safe(bssprio_node, tmp_node, head, list) { if (bssprio_node->priv == priv) { diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index e76af2866a19..9fe0bae957b7 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -1413,13 +1413,6 @@ void mwifiex_check_auto_tdls(unsigned long context) priv->check_tdls_tx = false; - if (list_empty(&priv->auto_tdls_list)) { - mod_timer(&priv->auto_tdls_timer, - jiffies + - msecs_to_jiffies(MWIFIEX_TIMER_10S)); - return; - } - spin_lock_irqsave(&priv->auto_tdls_lock, flags); list_for_each_entry(tdls_peer, &priv->auto_tdls_list, list) { if ((jiffies - tdls_peer->rssi_jiffies) > From f0f7c2275fb925ea287e2da0585d17bde3d1fd2e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 3 Oct 2017 20:49:44 +0530 Subject: [PATCH 0891/2177] mwifiex: minor cleanups w/ sta_list_spinlock in cfg80211.c The sta_list_spinlock looks to be used to control locking of the list. Specifically when someone has the lock they may be allowed to modify or delete elements of the list. That implies that we shouldn't access the fields of the elements returned by mwifiex_get_sta_entry() after we've released the spinlock. Let's make some small changes so this is true. It's unlikely that this matters since it looks to be just error handling, but it's nice to be clean. Signed-off-by: Douglas Anderson Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 14 +++++++++----- drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 ++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index cc7d777eb26c..3638b6130216 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3794,9 +3794,8 @@ mwifiex_cfg80211_tdls_chan_switch(struct wiphy *wiphy, struct net_device *dev, spin_lock_irqsave(&priv->sta_list_spinlock, flags); sta_ptr = mwifiex_get_sta_entry(priv, addr); - spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); - if (!sta_ptr) { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); wiphy_err(wiphy, "%s: Invalid TDLS peer %pM\n", __func__, addr); return -ENOENT; @@ -3804,15 +3803,18 @@ mwifiex_cfg80211_tdls_chan_switch(struct wiphy *wiphy, struct net_device *dev, if (!(sta_ptr->tdls_cap.extcap.ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH)) { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); wiphy_err(wiphy, "%pM do not support tdls cs\n", addr); return -ENOENT; } if (sta_ptr->tdls_status == TDLS_CHAN_SWITCHING || sta_ptr->tdls_status == TDLS_IN_OFF_CHAN) { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); wiphy_err(wiphy, "channel switch is running, abort request\n"); return -EALREADY; } + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); chan = chandef->chan->hw_value; second_chan_offset = mwifiex_get_sec_chan_offset(chan); @@ -3833,18 +3835,20 @@ mwifiex_cfg80211_tdls_cancel_chan_switch(struct wiphy *wiphy, spin_lock_irqsave(&priv->sta_list_spinlock, flags); sta_ptr = mwifiex_get_sta_entry(priv, addr); - spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); - if (!sta_ptr) { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); wiphy_err(wiphy, "%s: Invalid TDLS peer %pM\n", __func__, addr); } else if (!(sta_ptr->tdls_status == TDLS_CHAN_SWITCHING || sta_ptr->tdls_status == TDLS_IN_BASE_CHAN || sta_ptr->tdls_status == TDLS_IN_OFF_CHAN)) { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); wiphy_err(wiphy, "tdls chan switch not initialize by %pM\n", addr); - } else + } else { + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); mwifiex_stop_tdls_cs(priv, addr); + } } static int diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 839df8a9634e..d8db412b76c6 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -359,13 +359,12 @@ static void mwifiex_process_uap_tx_pause(struct mwifiex_private *priv, } else { spin_lock_irqsave(&priv->sta_list_spinlock, flags); sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); - spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); - if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); } + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); } } @@ -396,14 +395,13 @@ static void mwifiex_process_sta_tx_pause(struct mwifiex_private *priv, if (mwifiex_is_tdls_link_setup(status)) { spin_lock_irqsave(&priv->sta_list_spinlock, flags); sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); - spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); - if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); } + spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); } } } From 2d5cc60949e0d2de3cb05d5772b58a5aa7f4abc3 Mon Sep 17 00:00:00 2001 From: Rohit Fule Date: Wed, 4 Oct 2017 17:36:06 +0530 Subject: [PATCH 0892/2177] mwifiex: double the size of chan_stats array in adapter When a user requests scan, driver sends multiple scan requests to firmware, which might be active or passive. Firmware will send channel statistics for each channel in the request. This will be stored in chan_stats array. Few channels might report hidden SSIDs in passive scan results. So, once the original scan request is finished, driver issues an active scan request for all channels which reported hidden SSIDs. This will cause duplicates in the chan_stats array. At worst, every channel will have a hidden SSID, in which case the driver can issue active scan requests for each channel. So the complete scan statistics size will be twice of existing limit. At present maximum number of channels returned in scan statistics is 31(BG) + 14(A) = 45. Clearly there will be an overflow of the chan_stats array in the above mentioned scenario. To fix this double the size of chan_stats array. Signed-off-by: Rohit Fule Signed-off-by: Mangesh Malusare Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 3638b6130216..6e0d9a9c5cfb 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4205,7 +4205,10 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter) if (adapter->config_bands & BAND_A) n_channels_a = mwifiex_band_5ghz.n_channels; - adapter->num_in_chan_stats = n_channels_bg + n_channels_a; + /* allocate twice the number total channels, since the driver issues an + * additional active scan request for hidden SSIDs on passive channels. + */ + adapter->num_in_chan_stats = 2 * (n_channels_bg + n_channels_a); adapter->chan_stats = vmalloc(sizeof(*adapter->chan_stats) * adapter->num_in_chan_stats); From 317049204cd3dfd831ffd15691eece3fe9064444 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Fri, 6 Oct 2017 20:34:39 +0530 Subject: [PATCH 0893/2177] mwifiex: Use put_unaligned_le32 Use put_unaligned_le32 rather than using byte ordering function and memcpy which makes code clear. Also, add the header file where it is declared. Done using Coccinelle and semantic patch used is : @ rule1 @ identifier tmp; expression ptr,x; type T; @@ - tmp = cpu_to_le32(x); <+... when != tmp - memcpy(ptr, (T)&tmp, ...); + put_unaligned_le32(x,ptr); ...+> @ depends on rule1 @ type j; identifier tmp; @@ - j tmp; ...when != tmp Signed-off-by: Himanshu Jha Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index 0edc5d621304..a9a1a736e6e8 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -17,6 +17,7 @@ * this warranty disclaimer. */ +#include #include "decl.h" #include "ioctl.h" #include "util.h" @@ -183,7 +184,6 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv, uint16_t cmd_code; uint16_t cmd_size; unsigned long flags; - __le32 tmp; if (!adapter || !cmd_node) return -1; @@ -249,9 +249,9 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv, mwifiex_dbg_dump(adapter, CMD_D, "cmd buffer:", host_cmd, cmd_size); if (adapter->iface_type == MWIFIEX_USB) { - tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD); skb_push(cmd_node->cmd_skb, MWIFIEX_TYPE_LEN); - memcpy(cmd_node->cmd_skb->data, &tmp, MWIFIEX_TYPE_LEN); + put_unaligned_le32(MWIFIEX_USB_TYPE_CMD, + cmd_node->cmd_skb->data); adapter->cmd_sent = true; ret = adapter->if_ops.host_to_card(adapter, MWIFIEX_USB_EP_CMD_EVENT, @@ -317,7 +317,6 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter) (struct mwifiex_opt_sleep_confirm *) adapter->sleep_cfm->data; struct sk_buff *sleep_cfm_tmp; - __le32 tmp; priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); @@ -342,8 +341,7 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter) + MWIFIEX_TYPE_LEN); skb_put(sleep_cfm_tmp, sizeof(struct mwifiex_opt_sleep_confirm) + MWIFIEX_TYPE_LEN); - tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD); - memcpy(sleep_cfm_tmp->data, &tmp, MWIFIEX_TYPE_LEN); + put_unaligned_le32(MWIFIEX_USB_TYPE_CMD, sleep_cfm_tmp->data); memcpy(sleep_cfm_tmp->data + MWIFIEX_TYPE_LEN, adapter->sleep_cfm->data, sizeof(struct mwifiex_opt_sleep_confirm)); From f06eb3f9c03eda3bf7e40b49f5a4b032752bb176 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:51 -0500 Subject: [PATCH 0894/2177] rtlwifi: Fix MAX MPDU of VHT capability We must choose only one of VHT_CAP among IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895, IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 and IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ea18aa7afecb..fcf6e31d0fb9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -249,8 +249,6 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw, vht_cap->vht_supported = true; vht_cap->cap = - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_TXSTBC | @@ -283,8 +281,6 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw, vht_cap->vht_supported = true; vht_cap->cap = - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_TXSTBC | From ecf4000e0d925c6ba074d11801df4a4cdd8d5324 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:52 -0500 Subject: [PATCH 0895/2177] rtlwifi: Remove redundant semicolon in wifi.h. The semicolon can cause compiler error, if it exists in if...else statement. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/wifi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 1ab1024330fb..90e875beff66 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2857,19 +2857,19 @@ value to host byte ordering.*/ cpu_to_le32( \ LE_BITS_CLEARED_TO_4BYTE(__pstart, __bitoffset, __bitlen) | \ ((((u32)__val) & BIT_LEN_MASK_32(__bitlen)) << (__bitoffset)) \ - ); + ) #define SET_BITS_TO_LE_2BYTE(__pstart, __bitoffset, __bitlen, __val) \ *((__le16 *)(__pstart)) = \ cpu_to_le16( \ LE_BITS_CLEARED_TO_2BYTE(__pstart, __bitoffset, __bitlen) | \ ((((u16)__val) & BIT_LEN_MASK_16(__bitlen)) << (__bitoffset)) \ - ); + ) #define SET_BITS_TO_LE_1BYTE(__pstart, __bitoffset, __bitlen, __val) \ *((u8 *)(__pstart)) = EF1BYTE \ ( \ LE_BITS_CLEARED_TO_1BYTE(__pstart, __bitoffset, __bitlen) | \ ((((u8)__val) & BIT_LEN_MASK_8(__bitlen)) << (__bitoffset)) \ - ); + ) #define N_BYTE_ALIGMENT(__value, __aligment) ((__aligment == 1) ? \ (__value) : (((__value + __aligment - 1) / __aligment) * __aligment)) From 0c07bd7457607b2bf846824424e78703c27ffd92 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:53 -0500 Subject: [PATCH 0896/2177] rtlwifi: rtl8192ee: Make driver support 64bits DMA. 1. Both 32-bit and 64-bit use the same TX/RX buffer desc layout 2. Extend set_desc() and get_desc() to set and get 64-bit address 3. Remove directive DMA_IS_64BIT 4. Add module parameter to turn on 64-bit dma Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 49 ++++-- drivers/net/wireless/realtek/rtlwifi/pci.h | 10 +- .../wireless/realtek/rtlwifi/rtl8188ee/hw.c | 1 + .../wireless/realtek/rtlwifi/rtl8188ee/trx.c | 5 +- .../wireless/realtek/rtlwifi/rtl8188ee/trx.h | 3 +- .../wireless/realtek/rtlwifi/rtl8192ce/trx.c | 5 +- .../wireless/realtek/rtlwifi/rtl8192ce/trx.h | 3 +- .../wireless/realtek/rtlwifi/rtl8192de/fw.c | 2 +- .../wireless/realtek/rtlwifi/rtl8192de/trx.c | 3 +- .../wireless/realtek/rtlwifi/rtl8192de/trx.h | 3 +- .../wireless/realtek/rtlwifi/rtl8192ee/hw.c | 31 +++- .../wireless/realtek/rtlwifi/rtl8192ee/sw.c | 3 + .../wireless/realtek/rtlwifi/rtl8192ee/trx.c | 49 +++--- .../wireless/realtek/rtlwifi/rtl8192ee/trx.h | 140 +++--------------- .../wireless/realtek/rtlwifi/rtl8192se/sw.c | 2 +- .../wireless/realtek/rtlwifi/rtl8192se/trx.c | 3 +- .../wireless/realtek/rtlwifi/rtl8192se/trx.h | 3 +- .../wireless/realtek/rtlwifi/rtl8723ae/trx.c | 5 +- .../wireless/realtek/rtlwifi/rtl8723ae/trx.h | 3 +- .../wireless/realtek/rtlwifi/rtl8723be/hw.c | 1 + .../wireless/realtek/rtlwifi/rtl8723be/trx.c | 5 +- .../wireless/realtek/rtlwifi/rtl8723be/trx.h | 3 +- .../realtek/rtlwifi/rtl8723com/fw_common.c | 3 +- .../wireless/realtek/rtlwifi/rtl8821ae/hw.c | 1 + .../wireless/realtek/rtlwifi/rtl8821ae/trx.c | 5 +- .../wireless/realtek/rtlwifi/rtl8821ae/trx.h | 3 +- drivers/net/wireless/realtek/rtlwifi/wifi.h | 11 +- 27 files changed, 166 insertions(+), 189 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 08dc8919ef60..b9a6d23364be 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -586,7 +586,7 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) skb = __skb_dequeue(&ring->queue); pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops-> - get_desc((u8 *)entry, true, + get_desc(hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); @@ -691,9 +691,10 @@ remap: return 0; rtlpci->rx_ring[rxring_idx].rx_buf[desc_idx] = skb; if (rtlpriv->use_new_trx_flow) { + /* skb->cb may be 64 bit address */ rtlpriv->cfg->ops->set_desc(hw, (u8 *)entry, false, HW_DESC_RX_PREPARE, - (u8 *)&bufferaddress); + (u8 *)(dma_addr_t *)skb->cb); } else { rtlpriv->cfg->ops->set_desc(hw, (u8 *)entry, false, HW_DESC_RXBUFF_ADDR, @@ -798,7 +799,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) pdesc = &rtlpci->rx_ring[rxring_idx].desc[ rtlpci->rx_ring[rxring_idx].idx]; - own = (u8)rtlpriv->cfg->ops->get_desc((u8 *)pdesc, + own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, false, HW_DESC_OWN); if (own) /* wait data to be filled by hardware */ @@ -825,7 +826,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) (u8 *)buffer_desc, hw_queue); - len = rtlpriv->cfg->ops->get_desc((u8 *)pdesc, false, + len = rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, false, HW_DESC_RXPKT_LEN); if (skb->end - skb->tail > len) { @@ -1122,7 +1123,7 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) if (pskb) { pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops->get_desc( - (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), + hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), pskb->len, PCI_DMA_TODEVICE); kfree_skb(pskb); } @@ -1378,7 +1379,8 @@ static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw, pci_unmap_single(rtlpci->pdev, rtlpriv->cfg-> - ops->get_desc((u8 *)entry, true, + ops->get_desc(hw, (u8 *)entry, + true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); kfree_skb(skb); @@ -1507,7 +1509,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) for (i = 0; i < rtlpci->rxringcount; i++) { entry = &rtlpci->rx_ring[rxring_idx].desc[i]; bufferaddress = - rtlpriv->cfg->ops->get_desc((u8 *)entry, + rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry, false , HW_DESC_RXBUFF_ADDR); memset((u8 *)entry , 0 , sizeof(*rtlpci->rx_ring @@ -1560,7 +1562,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops-> - get_desc((u8 *) + get_desc(hw, (u8 *) entry, true, HW_DESC_TXBUFF_ADDR), @@ -1673,7 +1675,7 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, if (rtlpriv->use_new_trx_flow) { ptx_bd_desc = &ring->buffer_desc[idx]; } else { - own = (u8) rtlpriv->cfg->ops->get_desc((u8 *)pdesc, + own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN); if ((own == 1) && (hw_queue != BEACON_QUEUE)) { @@ -2163,6 +2165,21 @@ static int rtl_pci_intr_mode_decide(struct ieee80211_hw *hw) return ret; } +static void platform_enable_dma64(struct pci_dev *pdev, bool dma64) +{ + u8 value; + + pci_read_config_byte(pdev, 0x719, &value); + + /* 0x719 Bit5 is DMA64 bit fetch. */ + if (dma64) + value |= BIT(5); + else + value &= ~BIT(5); + + pci_write_config_byte(pdev, 0x719, value); +} + int rtl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -2181,13 +2198,25 @@ int rtl_pci_probe(struct pci_dev *pdev, return err; } - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (((struct rtl_hal_cfg *)id->driver_data)->mod_params->dma64 && + !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + WARN_ONCE(true, + "Unable to obtain 64bit DMA for consistent allocations\n"); + err = -ENOMEM; + goto fail1; + } + + platform_enable_dma64(pdev, true); + } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { WARN_ONCE(true, "rtlwifi: Unable to obtain 32bit DMA for consistent allocations\n"); err = -ENOMEM; goto fail1; } + + platform_enable_dma64(pdev, false); } pci_set_master(pdev); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index d9039ea10ba4..1af92b34979d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -143,13 +143,7 @@ struct rtl_pci_capabilities_header { * RX wifi info == RX descriptor in old flow */ struct rtl_tx_buffer_desc { -#if (RTL8192EE_SEG_NUM == 2) - u32 dword[2*(DMA_IS_64BIT + 1)*8]; /*seg = 8*/ -#elif (RTL8192EE_SEG_NUM == 1) - u32 dword[2*(DMA_IS_64BIT + 1)*4]; /*seg = 4*/ -#elif (RTL8192EE_SEG_NUM == 0) - u32 dword[2*(DMA_IS_64BIT + 1)*2]; /*seg = 2*/ -#endif + u32 dword[4 * (1 << (BUFDESC_SEG_NUM + 1))]; } __packed; struct rtl_tx_desc { @@ -157,7 +151,7 @@ struct rtl_tx_desc { } __packed; struct rtl_rx_buffer_desc { /*rx buffer desc*/ - u32 dword[2]; + u32 dword[4]; } __packed; struct rtl_rx_desc { /*old: rx desc new: rx wifi info*/ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 0ba26d27d11c..5b939935eb56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -99,6 +99,7 @@ static void _rtl88ee_return_beacon_queue_skb(struct ieee80211_hw *hw) pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops->get_desc( + hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); kfree_skb(skb); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c index dd3e12b74447..9670732b2bc6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c @@ -786,7 +786,8 @@ void rtl88ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, } } -u32 rtl88ee_get_desc(u8 *pdesc, bool istx, u8 desc_name) +u64 rtl88ee_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name) { u32 ret = 0; @@ -828,7 +829,7 @@ bool rtl88ee_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl88ee_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl88ee_get_desc(hw, entry, true, HW_DESC_OWN); /*beacon packet will only use the first *descriptor defautly,and the own may not diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h index 9a1c2087adee..f902d6769aa8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h @@ -782,7 +782,8 @@ bool rtl88ee_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl88ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl88ee_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl88ee_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name); bool rtl88ee_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl88ee_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c index 94a4b39437cd..d36e0060cc7a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c @@ -697,7 +697,8 @@ void rtl92ce_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, } } -u32 rtl92ce_get_desc(u8 *p_desc, bool istx, u8 desc_name) +u64 rtl92ce_get_desc(struct ieee80211_hw *hw, u8 *p_desc, + bool istx, u8 desc_name) { u32 ret = 0; @@ -740,7 +741,7 @@ bool rtl92ce_is_tx_desc_closed(struct ieee80211_hw *hw, struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl92ce_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl92ce_get_desc(hw, entry, true, HW_DESC_OWN); /*beacon packet will only use the first *descriptor defautly,and the own may not diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h index 66291fc341e7..91f0bd6b752f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h @@ -718,7 +718,8 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl92ce_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl92ce_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl92ce_get_desc(struct ieee80211_hw *hw, u8 *p_desc, + bool istx, u8 desc_name); bool rtl92ce_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl92ce_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index f4129cf96e7c..85cedd083d2b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -490,7 +490,7 @@ static bool _rtl92d_cmd_send_packet(struct ieee80211_hw *hw, spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags); pdesc = &ring->desc[idx]; /* discard output from call below */ - rtlpriv->cfg->ops->get_desc((u8 *) pdesc, true, HW_DESC_OWN); + rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN); rtlpriv->cfg->ops->fill_tx_cmddesc(hw, (u8 *) pdesc, 1, 1, skb); __skb_queue_tail(&ring->queue, skb); spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c index 86019f654428..d7b023cf7400 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c @@ -821,7 +821,8 @@ void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, } } -u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name) +u64 rtl92de_get_desc(struct ieee80211_hw *hw, + u8 *p_desc, bool istx, u8 desc_name) { u32 ret = 0; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h index 9bb6cc648590..f7f776539438 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h @@ -735,7 +735,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl92de_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl92de_get_desc(struct ieee80211_hw *hw, + u8 *p_desc, bool istx, u8 desc_name); void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, bool b_firstseg, bool b_lastseg, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index ef9394be7016..6b0d42a93971 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -840,6 +840,31 @@ static bool _rtl92ee_init_mac(struct ieee80211_hw *hw) /* Set TCR register */ rtl_write_dword(rtlpriv, REG_TCR, rtlpci->transmit_config); + /* Set TX/RX descriptor physical address -- HI part */ + if (!rtlpriv->cfg->mod_params->dma64) + goto dma64_end; + + rtl_write_dword(rtlpriv, REG_BCNQ_DESA + 4, + ((u64)rtlpci->tx_ring[BEACON_QUEUE].buffer_desc_dma) >> + 32); + rtl_write_dword(rtlpriv, REG_MGQ_DESA + 4, + (u64)rtlpci->tx_ring[MGNT_QUEUE].buffer_desc_dma >> 32); + rtl_write_dword(rtlpriv, REG_VOQ_DESA + 4, + (u64)rtlpci->tx_ring[VO_QUEUE].buffer_desc_dma >> 32); + rtl_write_dword(rtlpriv, REG_VIQ_DESA + 4, + (u64)rtlpci->tx_ring[VI_QUEUE].buffer_desc_dma >> 32); + rtl_write_dword(rtlpriv, REG_BEQ_DESA + 4, + (u64)rtlpci->tx_ring[BE_QUEUE].buffer_desc_dma >> 32); + rtl_write_dword(rtlpriv, REG_BKQ_DESA + 4, + (u64)rtlpci->tx_ring[BK_QUEUE].buffer_desc_dma >> 32); + rtl_write_dword(rtlpriv, REG_HQ0_DESA + 4, + (u64)rtlpci->tx_ring[HIGH_QUEUE].buffer_desc_dma >> 32); + + rtl_write_dword(rtlpriv, REG_RX_DESA + 4, + (u64)rtlpci->rx_ring[RX_MPDU_QUEUE].dma >> 32); + +dma64_end: + /* Set TX/RX descriptor physical address(from OS API). */ rtl_write_dword(rtlpriv, REG_BCNQ_DESA, ((u64)rtlpci->tx_ring[BEACON_QUEUE].buffer_desc_dma) & @@ -913,15 +938,9 @@ static bool _rtl92ee_init_mac(struct ieee80211_hw *hw) rtl_write_word(rtlpriv, REG_HI7Q_TXBD_NUM, TX_DESC_NUM_92E | ((RTL8192EE_SEG_NUM << 12) & 0x3000)); /*Rx*/ -#if (DMA_IS_64BIT == 1) rtl_write_word(rtlpriv, REG_RX_RXBD_NUM, RX_DESC_NUM_92E | ((RTL8192EE_SEG_NUM << 13) & 0x6000) | 0x8000); -#else - rtl_write_word(rtlpriv, REG_RX_RXBD_NUM, - RX_DESC_NUM_92E | - ((RTL8192EE_SEG_NUM << 13) & 0x6000) | 0x0000); -#endif rtl_write_dword(rtlpriv, REG_TSFTIMER_HCI, 0XFFFFFFFF); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index a3490080d066..701493c1f235 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -259,6 +259,7 @@ static struct rtl_mod_params rtl92ee_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = true, + .dma64 = false, .debug_level = 0, .debug_mask = 0, }; @@ -376,6 +377,7 @@ module_param_named(ips, rtl92ee_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl92ee_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92ee_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl92ee_mod_params.msi_support, bool, 0444); +module_param_named(dma64, rtl92ee_mod_params.dma64, bool, 0444); module_param_named(disable_watchdog, rtl92ee_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); @@ -383,6 +385,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); +MODULE_PARM_DESC(dma64, "Set to 1 to use DMA 64 (default 0)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c index c58393eab6a1..12255682e890 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c @@ -581,13 +581,9 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, u8 i = 0; u16 real_desc_size = 0x28; u16 append_early_mode_size = 0; -#if (RTL8192EE_SEG_NUM == 0) - u8 segmentnum = 2; -#elif (RTL8192EE_SEG_NUM == 1) - u8 segmentnum = 4; -#elif (RTL8192EE_SEG_NUM == 2) - u8 segmentnum = 8; -#endif + u8 segmentnum = 1 << (RTL8192EE_SEG_NUM + 1); + dma_addr_t desc_dma_addr; + bool dma64 = rtlpriv->cfg->mod_params->dma64; tx_page_size = 2; current_bd_desc = rtlpci->tx_ring[queue_index].cur_tx_wp; @@ -609,6 +605,10 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, psblen += 1; } + /* tx desc addr */ + desc_dma_addr = rtlpci->tx_ring[queue_index].dma + + (current_bd_desc * TX_DESC_SIZE); + /* Reset */ SET_TX_BUFF_DESC_LEN_0(tx_bd_desc, 0); SET_TX_BUFF_DESC_PSB(tx_bd_desc, 0); @@ -618,17 +618,9 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, SET_TXBUFFER_DESC_LEN_WITH_OFFSET(tx_bd_desc, i, 0); SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(tx_bd_desc, i, 0); SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(tx_bd_desc, i, 0); -#if (DMA_IS_64BIT == 1) - SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(tx_bd_desc, i, 0); -#endif + SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(tx_bd_desc, i, 0, dma64); } - SET_TX_BUFF_DESC_LEN_1(tx_bd_desc, 0); - SET_TX_BUFF_DESC_AMSDU_1(tx_bd_desc, 0); - SET_TX_BUFF_DESC_LEN_2(tx_bd_desc, 0); - SET_TX_BUFF_DESC_AMSDU_2(tx_bd_desc, 0); - SET_TX_BUFF_DESC_LEN_3(tx_bd_desc, 0); - SET_TX_BUFF_DESC_AMSDU_3(tx_bd_desc, 0); /* Clear all status */ CLEAR_PCI_TX_DESC_CONTENT(desc, TX_DESC_SIZE); @@ -643,14 +635,16 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, SET_TX_BUFF_DESC_LEN_0(tx_bd_desc, desc_size); } SET_TX_BUFF_DESC_PSB(tx_bd_desc, psblen); - SET_TX_BUFF_DESC_ADDR_LOW_0(tx_bd_desc, - rtlpci->tx_ring[queue_index].dma + - (current_bd_desc * TX_DESC_SIZE)); + SET_TX_BUFF_DESC_ADDR_LOW_0(tx_bd_desc, desc_dma_addr); + SET_TX_BUFF_DESC_ADDR_HIGH_0(tx_bd_desc, ((u64)desc_dma_addr >> 32), + dma64); SET_TXBUFFER_DESC_LEN_WITH_OFFSET(tx_bd_desc, 1, pkt_len); /* don't using extendsion mode. */ SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(tx_bd_desc, 1, 0); SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(tx_bd_desc, 1, addr); + SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(tx_bd_desc, 1, + ((u64)addr >> 32), dma64); SET_TX_DESC_PKT_SIZE(desc, (u16)(pkt_len)); SET_TX_DESC_TX_BUFFER_SIZE(desc, (u16)(pkt_len)); @@ -918,6 +912,7 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, static bool over_run; u32 tmp = 0; u8 q_idx = *val; + bool dma64 = rtlpriv->cfg->mod_params->dma64; if (istx) { switch (desc_name) { @@ -982,7 +977,12 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, MAX_RECEIVE_BUFFER_SIZE + RX_DESC_SIZE); - SET_RX_BUFFER_PHYSICAL_LOW(pdesc, *(u32 *)val); + SET_RX_BUFFER_PHYSICAL_LOW(pdesc, (*(dma_addr_t *)val) & + DMA_BIT_MASK(32)); + SET_RX_BUFFER_PHYSICAL_HIGH(pdesc, + ((u64)(*(dma_addr_t *)val) + >> 32), + dma64); break; case HW_DESC_RXERO: SET_RX_DESC_EOR(pdesc, 1); @@ -996,9 +996,12 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, } } -u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name) +u64 rtl92ee_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name) { - u32 ret = 0; + struct rtl_priv *rtlpriv = rtl_priv(hw); + u64 ret = 0; + bool dma64 = rtlpriv->cfg->mod_params->dma64; if (istx) { switch (desc_name) { @@ -1007,6 +1010,8 @@ u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name) break; case HW_DESC_TXBUFF_ADDR: ret = GET_TXBUFFER_DESC_ADDR_LOW(pdesc, 1); + ret |= (u64)GET_TXBUFFER_DESC_ADDR_HIGH(pdesc, 1, + dma64) << 32; break; default: WARN_ONCE(true, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h index b0105c529010..48c16fff20c6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h @@ -26,24 +26,6 @@ #ifndef __RTL92E_TRX_H__ #define __RTL92E_TRX_H__ -#if (DMA_IS_64BIT == 1) -#if (RTL8192EE_SEG_NUM == 2) -#define TX_BD_DESC_SIZE 128 -#elif (RTL8192EE_SEG_NUM == 1) -#define TX_BD_DESC_SIZE 64 -#elif (RTL8192EE_SEG_NUM == 0) -#define TX_BD_DESC_SIZE 32 -#endif -#else -#if (RTL8192EE_SEG_NUM == 2) -#define TX_BD_DESC_SIZE 64 -#elif (RTL8192EE_SEG_NUM == 1) -#define TX_BD_DESC_SIZE 32 -#elif (RTL8192EE_SEG_NUM == 0) -#define TX_BD_DESC_SIZE 16 -#endif -#endif - #define TX_DESC_SIZE 64 #define RX_DRV_INFO_SIZE_UNIT 8 @@ -331,111 +313,34 @@ SET_BITS_TO_LE_4BYTE(__pdesc+(__set*16)+8, 0, 32, __val) /* for Txfilldescroptor92ee, fill the desc content. */ -#if (DMA_IS_64BIT == 1) -#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16), 0, 16, __val) -#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16), 31, 1, __val) -#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16)+4, 0, 32, __val) -#define SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(__pdesc, __offset, __val)\ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16)+8, 0, 32, __val) -#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \ - LE_BITS_TO_4BYTE(__pdesc+(__offset*16)+4, 0, 32) -#else -#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8), 0, 16, __val) -#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8), 31, 1, __val) -#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8)+4, 0, 32, __val) -#define SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(__pdesc, __offset, __val) -#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \ - LE_BITS_TO_4BYTE(__pdesc+(__offset*8)+4, 0, 32) -#endif +#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \ + SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16), 0, 16, __val) +#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \ + SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16), 31, 1, __val) +#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \ + SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16) + 4, 0, 32, __val) +#define SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(pbd, off, val, dma64) \ + (dma64 ? SET_BITS_TO_LE_4BYTE((pbd) + ((off) * 16) + 8, 0, 32, val) : 0) +#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \ + LE_BITS_TO_4BYTE((__pdesc) + ((__offset) * 16) + 4, 0, 32) +#define GET_TXBUFFER_DESC_ADDR_HIGH(pbd, off, dma64) \ + (dma64 ? LE_BITS_TO_4BYTE((pbd) + ((off) * 16) + 8, 0, 32) : 0) /* Dword 0 */ -#define SET_TX_BUFF_DESC_LEN_0(__pdesc, __val) \ +#define SET_TX_BUFF_DESC_LEN_0(__pdesc, __val) \ SET_BITS_TO_LE_4BYTE(__pdesc, 0, 14, __val) -#define SET_TX_BUFF_DESC_PSB(__pdesc, __val) \ +#define SET_TX_BUFF_DESC_PSB(__pdesc, __val) \ SET_BITS_TO_LE_4BYTE(__pdesc, 16, 15, __val) -#define SET_TX_BUFF_DESC_OWN(__pdesc, __val) \ +#define SET_TX_BUFF_DESC_OWN(__pdesc, __val) \ SET_BITS_TO_LE_4BYTE(__pdesc, 31, 1, __val) /* Dword 1 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_0(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+4, 0, 32, __val) -#if (DMA_IS_64BIT == 1) +#define SET_TX_BUFF_DESC_ADDR_LOW_0(__pdesc, __val) \ + SET_BITS_TO_LE_4BYTE((__pdesc) + 4, 0, 32, __val) /* Dword 2 */ -#define SET_TX_BUFF_DESC_ADDR_HIGH_0(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+8, 0, 32, __val) +#define SET_TX_BUFF_DESC_ADDR_HIGH_0(bdesc, val, dma64) \ + SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(bdesc, 0, val, dma64) /* Dword 3 / RESERVED 0 */ -/* Dword 4 */ -#define SET_TX_BUFF_DESC_LEN_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+16, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+16, 31, 1, __val) -/* Dword 5 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+20, 0, 32, __val) -/* Dword 6 */ -#define SET_TX_BUFF_DESC_ADDR_HIGH_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+24, 0, 32, __val) -/* Dword 7 / RESERVED 0 */ -/* Dword 8 */ -#define SET_TX_BUFF_DESC_LEN_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+32, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+32, 31, 1, __val) -/* Dword 9 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+36, 0, 32, __val) -/* Dword 10 */ -#define SET_TX_BUFF_DESC_ADDR_HIGH_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+40, 0, 32, __val) -/* Dword 11 / RESERVED 0 */ -/* Dword 12 */ -#define SET_TX_BUFF_DESC_LEN_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+48, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+48, 31, 1, __val) -/* Dword 13 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+52, 0, 32, __val) -/* Dword 14 */ -#define SET_TX_BUFF_DESC_ADDR_HIGH_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+56, 0, 32, __val) -/* Dword 15 / RESERVED 0 */ -#else -#define SET_TX_BUFF_DESC_ADDR_HIGH_0(__pdesc, __val) -/* Dword 2 */ -#define SET_TX_BUFF_DESC_LEN_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+8, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+8, 31, 1, __val) -/* Dword 3 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_1(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+12, 0, 32, __val) -#define SET_TX_BUFF_DESC_ADDR_HIGH_1(__pdesc, __val) -/* Dword 4 */ -#define SET_TX_BUFF_DESC_LEN_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+16, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+16, 31, 1, __val) -/* Dword 5 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_2(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+20, 0, 32, __val) -#define SET_TX_BUFF_DESC_ADDR_HIGH_2(__pdesc, __val) -/* Dword 6 */ -#define SET_TX_BUFF_DESC_LEN_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+24, 0, 16, __val) -#define SET_TX_BUFF_DESC_AMSDU_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+24, 31, 1, __val) -/* Dword 7 */ -#define SET_TX_BUFF_DESC_ADDR_LOW_3(__pdesc, __val) \ - SET_BITS_TO_LE_4BYTE(__pdesc+28, 0, 32, __val) -#define SET_TX_BUFF_DESC_ADDR_HIGH_3(__pdesc, __val) -#endif /* RX buffer */ @@ -463,8 +368,8 @@ SET_BITS_TO_LE_4BYTE(__status+4, 0, 32, __val) /* DWORD 2 */ -#define SET_RX_BUFFER_PHYSICAL_HIGH(__status, __val) \ - SET_BITS_TO_LE_4BYTE(__status+8, 0, 32, __val) +#define SET_RX_BUFFER_PHYSICAL_HIGH(__rx_status_desc, __val, dma64) \ + (dma64 ? SET_BITS_TO_LE_4BYTE((__rx_status_desc) + 8, 0, 32, __val) : 0) #define GET_RX_DESC_PKT_LEN(__pdesc) \ LE_BITS_TO_4BYTE(__pdesc, 0, 14) @@ -850,7 +755,8 @@ bool rtl92ee_rx_query_desc(struct ieee80211_hw *hw, void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl92ee_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name); bool rtl92ee_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl92ee_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); void rtl92ee_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index d7945b9db493..d715d537ca0a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -240,7 +240,7 @@ static bool rtl92se_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl92se_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl92se_get_desc(hw, entry, true, HW_DESC_OWN); if (own) return false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c index a01dbd31d1b4..e1904c39f147 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c @@ -610,7 +610,8 @@ void rtl92se_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, } } -u32 rtl92se_get_desc(u8 *desc, bool istx, u8 desc_name) +u64 rtl92se_get_desc(struct ieee80211_hw *hw, + u8 *desc, bool istx, u8 desc_name) { u32 ret = 0; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h index 728589138072..81a5445c04a3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h @@ -38,7 +38,8 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, struct sk_buff *skb); void rtl92se_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl92se_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl92se_get_desc(struct ieee80211_hw *hw, + u8 *desc, bool istx, u8 desc_name); void rtl92se_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); #endif diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c index f713c7249fed..23485602a9a1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c @@ -643,7 +643,8 @@ void rtl8723e_set_desc(struct ieee80211_hw *hw, u8 *pdesc, } } -u32 rtl8723e_get_desc(u8 *pdesc, bool istx, u8 desc_name) +u64 rtl8723e_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name) { u32 ret = 0; @@ -686,7 +687,7 @@ bool rtl8723e_is_tx_desc_closed(struct ieee80211_hw *hw, struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl8723e_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl8723e_get_desc(hw, entry, true, HW_DESC_OWN); /** *beacon packet will only use the first diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h index 43d4c791d563..985ce0b77ea5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h @@ -708,7 +708,8 @@ bool rtl8723e_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl8723e_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl8723e_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl8723e_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name); bool rtl8723e_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl8723e_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 4d47b97adfed..d79130d85ca3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -60,6 +60,7 @@ static void _rtl8723be_return_beacon_queue_skb(struct ieee80211_hw *hw) pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops->get_desc( + hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); kfree_skb(skb); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c index 0e8944119652..fd9b38aa08a1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c @@ -695,7 +695,8 @@ void rtl8723be_set_desc(struct ieee80211_hw *hw, u8 *pdesc, } } -u32 rtl8723be_get_desc(u8 *pdesc, bool istx, u8 desc_name) +u64 rtl8723be_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name) { u32 ret = 0; @@ -738,7 +739,7 @@ bool rtl8723be_is_tx_desc_closed(struct ieee80211_hw *hw, struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl8723be_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl8723be_get_desc(hw, entry, true, HW_DESC_OWN); /*beacon packet will only use the first *descriptor defautly,and the own may not diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h index 0274659f48ed..988bf0586674 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h @@ -624,7 +624,8 @@ bool rtl8723be_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl8723be_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl8723be_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl8723be_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name); bool rtl8723be_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl8723be_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c index ac573d69f6d6..efa7e1262461 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c @@ -253,7 +253,8 @@ bool rtl8723_cmd_send_packet(struct ieee80211_hw *hw, spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags); pdesc = &ring->desc[0]; - own = (u8) rtlpriv->cfg->ops->get_desc((u8 *)pdesc, true, HW_DESC_OWN); + own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true, + HW_DESC_OWN); rtlpriv->cfg->ops->fill_tx_cmddesc(hw, (u8 *)pdesc, 1, 1, skb); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 4f73012978e9..dd41bdaf51f7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -57,6 +57,7 @@ static void _rtl8821ae_return_beacon_queue_skb(struct ieee80211_hw *hw) pci_unmap_single(rtlpci->pdev, rtlpriv->cfg->ops->get_desc( + hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); kfree_skb(skb); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c index 749818929e8f..1e1bacf562f3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c @@ -935,7 +935,8 @@ void rtl8821ae_set_desc(struct ieee80211_hw *hw, u8 *pdesc, } } -u32 rtl8821ae_get_desc(u8 *pdesc, bool istx, u8 desc_name) +u64 rtl8821ae_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name) { u32 ret = 0; @@ -980,7 +981,7 @@ bool rtl8821ae_is_tx_desc_closed(struct ieee80211_hw *hw, struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; u8 *entry = (u8 *)(&ring->desc[ring->idx]); - u8 own = (u8)rtl8821ae_get_desc(entry, true, HW_DESC_OWN); + u8 own = (u8)rtl8821ae_get_desc(hw, entry, true, HW_DESC_OWN); /** *beacon packet will only use the first diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h index 9843a616dcec..221dd2b29d3b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h @@ -620,7 +620,8 @@ bool rtl8821ae_rx_query_desc(struct ieee80211_hw *hw, u8 *pdesc, struct sk_buff *skb); void rtl8821ae_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); -u32 rtl8821ae_get_desc(u8 *pdesc, bool istx, u8 desc_name); +u64 rtl8821ae_get_desc(struct ieee80211_hw *hw, + u8 *pdesc, bool istx, u8 desc_name); bool rtl8821ae_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index); void rtl8821ae_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 90e875beff66..809648d28f52 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -169,7 +169,7 @@ enum rtl8192c_h2c_cmd { #define MAX_BASE_NUM_IN_PHY_REG_PG_24G 6 #define MAX_BASE_NUM_IN_PHY_REG_PG_5G 5 -#define RTL8192EE_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */ +#define BUFDESC_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */ #define DEL_SW_IDX_SZ 30 #define BAND_NUM 3 @@ -177,8 +177,7 @@ enum rtl8192c_h2c_cmd { /* For now, it's just for 8192ee * but not OK yet, keep it 0 */ -#define DMA_IS_64BIT 0 -#define RTL8192EE_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */ +#define RTL8192EE_SEG_NUM BUFDESC_SEG_NUM enum rf_tx_num { RF_1TX = 0, @@ -2162,7 +2161,8 @@ struct rtl_hal_ops { enum led_ctl_mode ledaction); void (*set_desc)(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); - u32 (*get_desc) (u8 *pdesc, bool istx, u8 desc_name); + u64 (*get_desc)(struct ieee80211_hw *hw, u8 *pdesc, bool istx, + u8 desc_name); bool (*is_tx_desc_closed) (struct ieee80211_hw *hw, u8 hw_queue, u16 index); void (*tx_polling) (struct ieee80211_hw *hw, u8 hw_queue); @@ -2261,6 +2261,9 @@ struct rtl_mod_params { */ bool msi_support; + /* default: 0 = dma 32 */ + bool dma64; + /* default 0: 1 means disable */ bool disable_watchdog; From cdc9c7a032aae4dcdac47cf47d095207ce27bb87 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:54 -0500 Subject: [PATCH 0897/2177] rtlwifi: Implement rtl_get_tx_hw_rate to yield correct hw_rate Originally, we get legacy rate only, so we extend to get HT and VHT rate. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 42 +++++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index fcf6e31d0fb9..e37d4c765f87 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1099,6 +1099,42 @@ int rtlwifi_rate_mapping(struct ieee80211_hw *hw, bool isht, bool isvht, } EXPORT_SYMBOL(rtlwifi_rate_mapping); +static u8 _rtl_get_tx_hw_rate(struct ieee80211_hw *hw, + struct ieee80211_tx_info *info) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + struct ieee80211_tx_rate *r = &info->status.rates[0]; + struct ieee80211_rate *txrate; + u8 hw_value = 0x0; + + if (r->flags & IEEE80211_TX_RC_MCS) { + /* HT MCS0-15 */ + hw_value = rtlpriv->cfg->maps[RTL_RC_HT_RATEMCS15] - 15 + + r->idx; + } else if (r->flags & IEEE80211_TX_RC_VHT_MCS) { + /* VHT MCS0-9, NSS */ + if (ieee80211_rate_get_vht_nss(r) == 2) + hw_value = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9]; + else + hw_value = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9]; + + hw_value = hw_value - 9 + ieee80211_rate_get_vht_mcs(r); + } else { + /* legacy */ + txrate = ieee80211_get_tx_rate(hw, info); + + if (txrate) + hw_value = txrate->hw_value; + } + + /* check 5G band */ + if (rtlpriv->rtlhal.current_bandtype == BAND_ON_5G && + hw_value < rtlpriv->cfg->maps[RTL_RC_OFDM_RATE6M]) + hw_value = rtlpriv->cfg->maps[RTL_RC_OFDM_RATE6M]; + + return hw_value; +} + void rtl_get_tcb_desc(struct ieee80211_hw *hw, struct ieee80211_tx_info *info, struct ieee80211_sta *sta, @@ -1107,12 +1143,10 @@ void rtl_get_tcb_desc(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *rtlmac = rtl_mac(rtl_priv(hw)); struct ieee80211_hdr *hdr = rtl_get_hdr(skb); - struct ieee80211_rate *txrate; + __le16 fc = rtl_get_fc(skb); - txrate = ieee80211_get_tx_rate(hw, info); - if (txrate) - tcb_desc->hw_rate = txrate->hw_value; + tcb_desc->hw_rate = _rtl_get_tx_hw_rate(hw, info); if (rtl_is_tx_report_skb(hw, skb)) tcb_desc->use_spe_rpt = 1; From c1816f1709e82e773c501c416a47dd84a8f5baf7 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:55 -0500 Subject: [PATCH 0898/2177] rtlwifi: Add rtl_get_hal_edca_param() to generate register's format of EDCA. Convert from the value of ieee80211_tx_queue_params to Realtek's register value. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 36 +++++++++++++++++++++ drivers/net/wireless/realtek/rtlwifi/base.h | 4 +++ 2 files changed, 40 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index e37d4c765f87..081b9507e02e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1557,6 +1557,42 @@ void rtl_wait_tx_report_acked(struct ieee80211_hw *hw, u32 wait_ms) "Wait 1ms (%d/%d) to disable key.\n", i, wait_ms); } } + +u32 rtl_get_hal_edca_param(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum wireless_mode wirelessmode, + struct ieee80211_tx_queue_params *param) +{ + u32 reg = 0; + u8 sifstime = 10; + u8 slottime = 20; + + /* AIFS = AIFSN * slot time + SIFS */ + switch (wirelessmode) { + case WIRELESS_MODE_A: + case WIRELESS_MODE_N_24G: + case WIRELESS_MODE_N_5G: + case WIRELESS_MODE_AC_5G: + case WIRELESS_MODE_AC_24G: + sifstime = 16; + slottime = 9; + break; + case WIRELESS_MODE_G: + slottime = (vif->bss_conf.use_short_slot ? 9 : 20); + break; + default: + break; + } + + reg |= (param->txop & 0x7FF) << 16; + reg |= (fls(param->cw_max) & 0xF) << 12; + reg |= (fls(param->cw_min) & 0xF) << 8; + reg |= (param->aifs & 0x0F) * slottime + sifstime; + + return reg; +} +EXPORT_SYMBOL_GPL(rtl_get_hal_edca_param); + /********************************************************* * * functions called by core.c diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h index b56d1b7f5567..cfea9fc39a9f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.h +++ b/drivers/net/wireless/realtek/rtlwifi/base.h @@ -137,6 +137,10 @@ void rtl_tx_report_handler(struct ieee80211_hw *hw, u8 *tmp_buf, u8 c2h_cmd_len); bool rtl_check_tx_report_acked(struct ieee80211_hw *hw); void rtl_wait_tx_report_acked(struct ieee80211_hw *hw, u32 wait_ms); +u32 rtl_get_hal_edca_param(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum wireless_mode wirelessmode, + struct ieee80211_tx_queue_params *param); void rtl_beacon_statistic(struct ieee80211_hw *hw, struct sk_buff *skb); void rtl_collect_scan_list(struct ieee80211_hw *hw, struct sk_buff *skb); From 74451b935c4285774d46fba864a7a403587c0b97 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:56 -0500 Subject: [PATCH 0899/2177] rtlwifi: Add TX/RX throughput statistics in period The statistic variables use u64 to get higher precision. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 16 ++++++++++++++++ drivers/net/wireless/realtek/rtlwifi/wifi.h | 15 +++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 081b9507e02e..e28261bc2c27 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1998,6 +1998,22 @@ label_lps_done: rtlpriv->link_info.tx_busy_traffic = tx_busy_traffic; rtlpriv->link_info.higher_busyrxtraffic = higher_busyrxtraffic; + rtlpriv->stats.txbytesunicast_inperiod = + rtlpriv->stats.txbytesunicast - + rtlpriv->stats.txbytesunicast_last; + rtlpriv->stats.rxbytesunicast_inperiod = + rtlpriv->stats.rxbytesunicast - + rtlpriv->stats.rxbytesunicast_last; + rtlpriv->stats.txbytesunicast_last = rtlpriv->stats.txbytesunicast; + rtlpriv->stats.rxbytesunicast_last = rtlpriv->stats.rxbytesunicast; + + rtlpriv->stats.txbytesunicast_inperiod_tp = + (u32)(rtlpriv->stats.txbytesunicast_inperiod * 8 / 2 / + 1024 / 1024); + rtlpriv->stats.rxbytesunicast_inperiod_tp = + (u32)(rtlpriv->stats.rxbytesunicast_inperiod * 8 / 2 / + 1024 / 1024); + /* <3> DM */ if (!rtlpriv->cfg->mod_params->disable_watchdog) rtlpriv->cfg->ops->dm_watchdog(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 809648d28f52..ceeca79acc56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -1013,10 +1013,17 @@ struct init_gain { }; struct wireless_stats { - unsigned long txbytesunicast; - unsigned long txbytesmulticast; - unsigned long txbytesbroadcast; - unsigned long rxbytesunicast; + u64 txbytesunicast; + u64 txbytesmulticast; + u64 txbytesbroadcast; + u64 rxbytesunicast; + + u64 txbytesunicast_inperiod; + u64 rxbytesunicast_inperiod; + u32 txbytesunicast_inperiod_tp; + u32 rxbytesunicast_inperiod_tp; + u64 txbytesunicast_last; + u64 rxbytesunicast_last; long rx_snr_db[4]; /*Correct smoothed ss in Dbm, only used From 08ab7465f36c07d461faa13e7625aea81b70b2f4 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:57 -0500 Subject: [PATCH 0900/2177] rtlwifi: Add RSSI and RF type to wifi.h for phydm These definition will be used by phydm later. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/wifi.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index ceeca79acc56..5843aee5c921 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -560,6 +560,11 @@ enum rf_type { RF_1T2R = 1, RF_2T2R = 2, RF_2T2R_GREEN = 3, + RF_2T3R = 4, + RF_2T4R = 5, + RF_3T3R = 6, + RF_3T4R = 7, + RF_4T4R = 8, }; enum ht_channel_width { @@ -1329,6 +1334,7 @@ struct rtl_sta_info { struct rtl_tid_data tids[MAX_TID_COUNT]; /* just used for ap adhoc or mesh*/ struct rssi_sta rssi_stat; + u8 rssi_level; u16 wireless_mode; u8 ratr_index; u8 mimo_ps; From aa59a1e7c6e82b7ab8f40583829b6670d7d0a21e Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:58 -0500 Subject: [PATCH 0901/2177] rtlwifi: Remove BAND_NUM and related fields These fields are unused, and we will define them in phydm later. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/wifi.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 5843aee5c921..6705f863f152 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -172,7 +172,6 @@ enum rtl8192c_h2c_cmd { #define BUFDESC_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */ #define DEL_SW_IDX_SZ 30 -#define BAND_NUM 3 /* For now, it's just for 8192ee * but not OK yet, keep it 0 @@ -1748,21 +1747,6 @@ struct rtl_dm { s8 swing_diff_2g; s8 swing_diff_5g; - u8 delta_swing_table_idx_24gccka_p[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24gccka_n[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24gcckb_p[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24gcckb_n[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24ga_p[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24ga_n[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24gb_p[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24gb_n[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_5ga_p[BAND_NUM][DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_5ga_n[BAND_NUM][DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_5gb_p[BAND_NUM][DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_5gb_n[BAND_NUM][DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24ga_p_8188e[DEL_SW_IDX_SZ]; - u8 delta_swing_table_idx_24ga_n_8188e[DEL_SW_IDX_SZ]; - /* DMSP */ bool supp_phymode_switch; From 1d22b17744a317d7cbb2b3cddce0e0ed170b6ff5 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:47:59 -0500 Subject: [PATCH 0902/2177] rtlwifi: Add bw_update parameter for RA mask update. - Add new parameter "is_bw_update" to control if current bandwidth setting is updated to FW RA. - After this commit, we keep the same setting as before. - Later, bandwidth update in watchdog is changed to false for 8822BE. Signed-off-by: Tsang-Shian Lin Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 2 +- drivers/net/wireless/realtek/rtlwifi/core.c | 5 +++-- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 6 +++--- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c | 6 +++--- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h | 5 ++--- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c | 6 +++--- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 6 +++--- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 9 +++++---- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 6 +++--- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 7 ++++--- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 8 ++++---- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 8 ++++---- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h | 2 +- drivers/net/wireless/realtek/rtlwifi/wifi.h | 3 ++- 26 files changed, 61 insertions(+), 50 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index e28261bc2c27..3a297c5551ed 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -2286,7 +2286,7 @@ int rtl_send_smps_action(struct ieee80211_hw *hw, struct rtl_sta_info *sta_entry = (struct rtl_sta_info *) sta->drv_priv; sta_entry->mimo_ps = smps; - /* rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); */ + /* rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0, true); */ info->control.rates[0].idx = 0; info->band = hw->conf.chandef.chan->band; diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 294a6b43d1bc..015a8ec36703 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -946,7 +946,7 @@ static int rtl_op_sta_add(struct ieee80211_hw *hw, memcpy(sta_entry->mac_addr, sta->addr, ETH_ALEN); RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, "Add sta addr is %pM\n", sta->addr); - rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); + rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0, true); } return 0; @@ -1152,7 +1152,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } if (vif->type == NL80211_IFTYPE_STATION) - rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); + rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0, + true); rcu_read_unlock(); /* to avoid AP Disassociation caused by inactivity */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c index f936a491371b..207411d1b015 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c @@ -1221,7 +1221,8 @@ static void rtl88e_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw) sta = rtl_find_sta(hw, mac->bssid); if (sta) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, - p_ra->ratr_state); + p_ra->ratr_state, + true); rcu_read_unlock(); p_ra->pre_ratr_state = p_ra->ratr_state; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 5b939935eb56..6fbf3df4947e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -2077,7 +2077,7 @@ static void rtl88ee_update_hal_rate_table(struct ieee80211_hw *hw, } static void rtl88ee_update_hal_rate_mask(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2208,12 +2208,12 @@ static void rtl88ee_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl88ee_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl88ee_update_hal_rate_mask(hw, sta, rssi_level); + rtl88ee_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl88ee_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h index d38dbca3c19e..719b78a3b7db 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h @@ -43,7 +43,8 @@ void rtl88ee_update_interrupt_mask(struct ieee80211_hw *hw, u32 add_msr, u32 rm_msr); void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl88ee_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw); void rtl88ee_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl88ee_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 9956026bae0a..9310fad69cd9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -1865,7 +1865,7 @@ static void rtl92ce_update_hal_rate_table(struct ieee80211_hw *hw, } static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -1995,12 +1995,12 @@ static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl92ce_update_hal_rate_mask(hw, sta, rssi_level); + rtl92ce_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl92ce_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h index 877f138a0cb9..7683c5dfe851 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h @@ -56,9 +56,8 @@ void rtl92ce_update_interrupt_mask(struct ieee80211_hw *hw, u32 add_msr, u32 rm_msr); void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); -void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw); void rtl92ce_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl92ce_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl92ce_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 530e80f0ef0b..1e60f70481f5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -2006,7 +2006,7 @@ static void rtl92cu_update_hal_rate_table(struct ieee80211_hw *hw, static void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2153,12 +2153,12 @@ static void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw, void rtl92cu_update_hal_rate_tbl(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl92cu_update_hal_rate_mask(hw, sta, rssi_level); + rtl92cu_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl92cu_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h index 932f056f7ef8..ebd168400d45 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h @@ -104,6 +104,6 @@ void rtl92c_fill_h2c_cmd(struct ieee80211_hw *hw, bool rtl92cu_phy_mac_config(struct ieee80211_hw *hw); void rtl92cu_update_hal_rate_tbl(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level); + u8 rssi_level, bool update_bw); #endif diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index cf28d25c551f..5a67f85fa165 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1897,7 +1897,7 @@ static void rtl92de_update_hal_rate_table(struct ieee80211_hw *hw, } static void rtl92de_update_hal_rate_mask(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2033,12 +2033,12 @@ static void rtl92de_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl92de_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl92de_update_hal_rate_mask(hw, sta, rssi_level); + rtl92de_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl92de_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h index 24b03b9999be..85c565b86ae3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h @@ -43,7 +43,8 @@ void rtl92de_update_interrupt_mask(struct ieee80211_hw *hw, u32 add_msr, u32 rm_msr); void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92de_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw); void rtl92de_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl92de_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl92de_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c index e6b5786c7d4a..faed6e2dedf6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c @@ -1039,7 +1039,8 @@ static void rtl92ee_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw) sta = rtl_find_sta(hw, mac->bssid); if (sta) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, - p_ra->ratr_state); + p_ra->ratr_state, + true); rcu_read_unlock(); p_ra->pre_ratr_state = p_ra->ratr_state; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index 6b0d42a93971..6fc3090c4b72 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -2270,7 +2270,7 @@ static u8 _rtl92ee_mrate_idx_to_arfr_id(struct ieee80211_hw *hw, u8 rate_index) static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &rtlpriv->phy; @@ -2389,7 +2389,7 @@ static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw, (ratr_index << 28); rate_mask[0] = macid; rate_mask[1] = ratr_index | (b_shortgi ? 0x80 : 0x00); - rate_mask[2] = curtxbw_40mhz; + rate_mask[2] = curtxbw_40mhz | ((!update_bw) << 3); rate_mask[3] = (u8)(ratr_bitmap & 0x000000ff); rate_mask[4] = (u8)((ratr_bitmap & 0x0000ff00) >> 8); rate_mask[5] = (u8)((ratr_bitmap & 0x00ff0000) >> 16); @@ -2404,12 +2404,13 @@ static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl92ee_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl92ee_update_hal_rate_mask(hw, sta, rssi_level); + rtl92ee_update_hal_rate_mask(hw, sta, rssi_level, update_bw); } void rtl92ee_update_channel_access_setting(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h index 05413f189685..cd6aeb44b996 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h @@ -43,7 +43,8 @@ void rtl92ee_update_interrupt_mask(struct ieee80211_hw *hw, u32 add_msr, u32 rm_msr); void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92ee_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw); void rtl92ee_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl92ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl92ee_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c index 2c073a77b194..44f510a94b09 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c @@ -295,7 +295,8 @@ static void _rtl92s_dm_refresh_rateadaptive_mask(struct ieee80211_hw *hw) sta = rtl_find_sta(hw, mac->bssid); if (sta) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, - ra->ratr_state); + ra->ratr_state, + true); rcu_read_unlock(); ra->pre_ratr_state = ra->ratr_state; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index ba1bd782238b..66be79ca4247 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -2129,7 +2129,7 @@ static void rtl92se_update_hal_rate_table(struct ieee80211_hw *hw, static void rtl92se_update_hal_rate_mask(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2288,12 +2288,12 @@ static void rtl92se_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl92se_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl92se_update_hal_rate_mask(hw, sta, rssi_level); + rtl92se_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl92se_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h index 86bce1be83ce..3c93d30fcae7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h @@ -59,7 +59,7 @@ void rtl92se_update_interrupt_mask(struct ieee80211_hw *hw, void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92se_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw); void rtl92se_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl92se_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index 5ac7b815648a..4c07f03b4ec1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -1943,7 +1943,7 @@ static void rtl8723e_update_hal_rate_table(struct ieee80211_hw *hw, static void rtl8723e_update_hal_rate_mask(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2074,12 +2074,13 @@ static void rtl8723e_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl8723e_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl8723e_update_hal_rate_mask(hw, sta, rssi_level); + rtl8723e_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else rtl8723e_update_hal_rate_table(hw, sta); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h index 32c1ace97c3f..1e7063105c96 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h @@ -49,7 +49,8 @@ void rtl8723e_update_interrupt_mask(struct ieee80211_hw *hw, u32 add_msr, u32 rm_msr); void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8723e_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_level, + bool update_bw); void rtl8723e_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl8723e_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c index 15c117e95a99..47e87a21ae27 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c @@ -984,7 +984,8 @@ static void rtl8723be_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw) sta = rtl_find_sta(hw, mac->bssid); if (sta) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, - p_ra->ratr_state); + p_ra->ratr_state, + true); rcu_read_unlock(); p_ra->pre_ratr_state = p_ra->ratr_state; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index d79130d85ca3..6f622b12ba43 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -2325,7 +2325,7 @@ static u8 _rtl8723be_mrate_idx_to_arfr_id(struct ieee80211_hw *hw, static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); @@ -2441,7 +2441,7 @@ static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw, rate_mask[0] = macid; rate_mask[1] = _rtl8723be_mrate_idx_to_arfr_id(hw, ratr_index) | (shortgi ? 0x80 : 0x00); - rate_mask[2] = curtxbw_40mhz; + rate_mask[2] = curtxbw_40mhz | ((!update_bw) << 3); rate_mask[3] = (u8)(ratr_bitmap & 0x000000ff); rate_mask[4] = (u8)((ratr_bitmap & 0x0000ff00) >> 8); @@ -2461,11 +2461,11 @@ static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw, void rtl8723be_update_hal_rate_tbl(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level) + u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl8723be_update_hal_rate_mask(hw, sta, rssi_level); + rtl8723be_update_hal_rate_mask(hw, sta, rssi_level, update_bw); } void rtl8723be_update_channel_access_setting(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h index eae863d08de8..54d7afa7297e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h @@ -46,7 +46,7 @@ void rtl8723be_update_interrupt_mask(struct ieee80211_hw *hw, void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8723be_update_hal_rate_tbl(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level); + u8 rssi_level, bool update_bw); void rtl8723be_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl8723be_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c index 32900c51f024..b11365a5ee1f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c @@ -2592,7 +2592,7 @@ static void rtl8821ae_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw) sta = rtl_find_sta(hw, mac->bssid); if (sta) rtlpriv->cfg->ops->update_rate_tbl(hw, - sta, p_ra->ratr_state); + sta, p_ra->ratr_state, true); rcu_read_unlock(); p_ra->pre_ratr_state = p_ra->ratr_state; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index dd41bdaf51f7..bffd7553c5e6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -3599,7 +3599,7 @@ static bool _rtl8821ae_get_ra_shortgi(struct ieee80211_hw *hw, struct ieee80211_ } static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &rtlpriv->phy; @@ -3778,7 +3778,7 @@ static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw, rate_mask[0] = macid; rate_mask[1] = ratr_index | (b_shortgi ? 0x80 : 0x00); - rate_mask[2] = rtlphy->current_chan_bw + rate_mask[2] = rtlphy->current_chan_bw | ((!update_bw) << 3) | _rtl8821ae_get_vht_eni(wirelessmode, ratr_bitmap) | _rtl8821ae_get_ra_ldpc(hw, macid, sta_entry, wirelessmode); @@ -3799,11 +3799,11 @@ static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw, } void rtl8821ae_update_hal_rate_tbl(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level) + struct ieee80211_sta *sta, u8 rssi_level, bool update_bw) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->dm.useramask) - rtl8821ae_update_hal_rate_mask(hw, sta, rssi_level); + rtl8821ae_update_hal_rate_mask(hw, sta, rssi_level, update_bw); else /*RT_TRACE(rtlpriv, COMP_RATR,DBG_LOUD, "rtl8821ae_update_hal_rate_tbl() Error! 8821ae FW RA Only\n");*/ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h index a3553e3abaa1..50fa9c718189 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h @@ -46,7 +46,7 @@ void rtl8821ae_update_interrupt_mask(struct ieee80211_hw *hw, void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8821ae_update_hal_rate_tbl(struct ieee80211_hw *hw, struct ieee80211_sta *sta, - u8 rssi_level); + u8 rssi_level, bool update_bw); void rtl8821ae_update_channel_access_setting(struct ieee80211_hw *hw); bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid); void rtl8821ae_enable_hw_security_config(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 6705f863f152..4decf1e71ba8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2123,7 +2123,8 @@ struct rtl_hal_ops { void (*get_hw_reg) (struct ieee80211_hw *hw, u8 variable, u8 *val); void (*set_hw_reg) (struct ieee80211_hw *hw, u8 variable, u8 *val); void (*update_rate_tbl) (struct ieee80211_hw *hw, - struct ieee80211_sta *sta, u8 rssi_level); + struct ieee80211_sta *sta, u8 rssi_leve, + bool update_bw); void (*pre_fill_tx_bd_desc)(struct ieee80211_hw *hw, u8 *tx_bd_desc, u8 *desc, u8 queue_index, struct sk_buff *skb, dma_addr_t addr); From 84efbad4f867507b1067d3277c238885f182068d Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 29 Sep 2017 14:48:00 -0500 Subject: [PATCH 0903/2177] rtlwifi: Add module parameter ASPM On some platforms, enable ASPM will cause AER error to be logged, thus we use a parameter to selectively turn on ASPM. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 6 +++++- drivers/net/wireless/realtek/rtlwifi/wifi.h | 3 +++ 9 files changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 57e5d5c1d24b..35de3aeafcc9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -41,6 +41,7 @@ static void rtl88e_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -77,7 +78,7 @@ static void rtl88e_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } int rtl88e_init_sw_vars(struct ieee80211_hw *hw) @@ -276,6 +277,7 @@ static struct rtl_mod_params rtl88ee_mod_params = { .swctrl_lps = false, .fwctrl_lps = false, .msi_support = true, + .aspm_support = 1, .debug_level = 0, .debug_mask = 0, }; @@ -399,6 +401,7 @@ module_param_named(ips, rtl88ee_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl88ee_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl88ee_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl88ee_mod_params.msi_support, bool, 0444); +module_param_named(aspm, rtl88ee_mod_params.aspm_support, int, 0444); module_param_named(disable_watchdog, rtl88ee_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); @@ -406,6 +409,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 38f85bfdf0c7..71a6761d3648 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -44,6 +44,7 @@ static void rtl92c_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -83,7 +84,7 @@ static void rtl92c_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } int rtl92c_init_sw_vars(struct ieee80211_hw *hw) @@ -252,6 +253,7 @@ static struct rtl_mod_params rtl92ce_mod_params = { .inactiveps = true, .swctrl_lps = false, .fwctrl_lps = true, + .aspm_support = 1, .debug_level = 0, .debug_mask = 0, }; @@ -375,10 +377,12 @@ module_param_named(debug_mask, rtl92ce_mod_params.debug_mask, ullong, 0644); module_param_named(ips, rtl92ce_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl92ce_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92ce_mod_params.fwctrl_lps, bool, 0444); +module_param_named(aspm, rtl92ce_mod_params.aspm_support, int, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index a6549f5f6c59..d5ba2bace79b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -40,6 +40,7 @@ static void rtl92d_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -79,7 +80,7 @@ static void rtl92d_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) @@ -254,6 +255,7 @@ static struct rtl_mod_params rtl92de_mod_params = { .inactiveps = true, .swctrl_lps = true, .fwctrl_lps = false, + .aspm_support = 1, .debug_level = 0, .debug_mask = 0, }; @@ -369,11 +371,13 @@ module_param_named(debug_level, rtl92de_mod_params.debug_level, int, 0644); module_param_named(ips, rtl92de_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl92de_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92de_mod_params.fwctrl_lps, bool, 0444); +module_param_named(aspm, rtl92de_mod_params.aspm_support, int, 0444); module_param_named(debug_mask, rtl92de_mod_params.debug_mask, ullong, 0644); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index 701493c1f235..ef92a789871d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -44,6 +44,7 @@ static void rtl92ee_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -83,7 +84,7 @@ static void rtl92ee_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) @@ -260,6 +261,7 @@ static struct rtl_mod_params rtl92ee_mod_params = { .fwctrl_lps = true, .msi_support = true, .dma64 = false, + .aspm_support = 1, .debug_level = 0, .debug_mask = 0, }; @@ -378,6 +380,7 @@ module_param_named(swlps, rtl92ee_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92ee_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl92ee_mod_params.msi_support, bool, 0444); module_param_named(dma64, rtl92ee_mod_params.dma64, bool, 0444); +module_param_named(aspm, rtl92ee_mod_params.aspm_support, int, 0444); module_param_named(disable_watchdog, rtl92ee_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); @@ -386,6 +389,7 @@ MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); MODULE_PARM_DESC(dma64, "Set to 1 to use DMA 64 (default 0)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index d715d537ca0a..d55554b7fa9a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -41,6 +41,7 @@ static void rtl92s_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -77,7 +78,7 @@ static void rtl92s_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 2; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } static void rtl92se_fw_cb(const struct firmware *firmware, void *context) @@ -297,6 +298,7 @@ static struct rtl_mod_params rtl92se_mod_params = { .inactiveps = true, .swctrl_lps = true, .fwctrl_lps = false, + .aspm_support = 2, .debug_level = 0, .debug_mask = 0, }; @@ -422,10 +424,12 @@ module_param_named(debug_mask, rtl92se_mod_params.debug_mask, ullong, 0644); module_param_named(ips, rtl92se_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl92se_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92se_mod_params.fwctrl_lps, bool, 0444); +module_param_named(aspm, rtl92se_mod_params.aspm_support, int, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index 97b8bd294aa8..a545ea317323 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -46,6 +46,7 @@ static void rtl8723e_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -85,7 +86,7 @@ static void rtl8723e_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) @@ -268,6 +269,7 @@ static struct rtl_mod_params rtl8723e_mod_params = { .inactiveps = true, .swctrl_lps = false, .fwctrl_lps = true, + .aspm_support = 1, .debug_level = 0, .debug_mask = 0, .msi_support = false, @@ -389,6 +391,7 @@ module_param_named(ips, rtl8723e_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723e_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723e_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8723e_mod_params.msi_support, bool, 0444); +module_param_named(aspm, rtl8723e_mod_params.aspm_support, int, 0444); module_param_named(disable_watchdog, rtl8723e_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); @@ -396,6 +399,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 2b16a1467e78..6a42988aad65 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -46,6 +46,7 @@ static void rtl8723be_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -82,7 +83,7 @@ static void rtl8723be_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) @@ -271,6 +272,7 @@ static struct rtl_mod_params rtl8723be_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = false, + .aspm_support = 1, .disable_watchdog = false, .debug_level = 0, .debug_mask = 0, @@ -396,6 +398,7 @@ module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444); +module_param_named(aspm, rtl8723be_mod_params.aspm_support, int, 0444); module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog, bool, 0444); module_param_named(ant_sel, rtl8723be_mod_params.ant_sel, int, 0444); @@ -404,6 +407,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 0894ef48ab87..ab5d462b1a3a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -43,6 +43,7 @@ static void rtl8821ae_init_aspm_vars(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); /*close ASPM for AMD defaultly */ @@ -82,7 +83,7 @@ static void rtl8821ae_init_aspm_vars(struct ieee80211_hw *hw) * 1 - Support ASPM, * 2 - According to chipset. */ - rtlpci->const_support_pciaspm = 1; + rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support; } /*InitializeVariables8812E*/ @@ -313,6 +314,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = true, + .aspm_support = 1, .int_clear = true, .debug_level = 0, .debug_mask = 0, @@ -444,6 +446,7 @@ module_param_named(ips, rtl8821ae_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8821ae_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8821ae_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8821ae_mod_params.msi_support, bool, 0444); +module_param_named(aspm, rtl8821ae_mod_params.aspm_support, int, 0444); module_param_named(disable_watchdog, rtl8821ae_mod_params.disable_watchdog, bool, 0444); module_param_named(int_clear, rtl8821ae_mod_params.int_clear, bool, 0444); @@ -452,6 +455,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); +MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 4decf1e71ba8..7dfc73b554f1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2262,6 +2262,9 @@ struct rtl_mod_params { /* default: 0 = dma 32 */ bool dma64; + /* default: 1 = enable aspm */ + int aspm_support; + /* default 0: 1 means disable */ bool disable_watchdog; From 76d7b12cbbe256bc390689273d1ffae2626f2f92 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Wed, 11 Oct 2017 22:15:15 +0100 Subject: [PATCH 0904/2177] rtlwifi: Remove unused cur_rfstate variables Clean up unused cur_rfstate variables in rtl8188ee, rtl8723ae, rtl8723be and rtl8821ae. Signed-off-by: Christos Gkekas Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 4 +--- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 4 +--- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 4 +--- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 6fbf3df4947e..d31117d52381 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -2236,7 +2236,7 @@ bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); - enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate; + enum rf_pwrstate e_rfpowerstate_toset; u32 u4tmp; bool b_actuallyset = false; @@ -2255,8 +2255,6 @@ bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) spin_unlock(&rtlpriv->locks.rf_ps_lock); } - cur_rfstate = ppsc->rfpwr_state; - u4tmp = rtl_read_dword(rtlpriv, REG_GPIO_OUTPUT); e_rfpowerstate_toset = (u4tmp & BIT(31)) ? ERFON : ERFOFF; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index 4c07f03b4ec1..8cfd4993c90a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -2104,7 +2104,7 @@ bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_phy *rtlphy = &(rtlpriv->phy); - enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate; + enum rf_pwrstate e_rfpowerstate_toset; u8 u1tmp; bool b_actuallyset = false; @@ -2123,8 +2123,6 @@ bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) spin_unlock(&rtlpriv->locks.rf_ps_lock); } - cur_rfstate = ppsc->rfpwr_state; - rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2, rtl_read_byte(rtlpriv, REG_GPIO_IO_SEL_2)&~(BIT(1))); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 6f622b12ba43..239518bd31f1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -2487,7 +2487,7 @@ bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_phy *rtlphy = &(rtlpriv->phy); - enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate; + enum rf_pwrstate e_rfpowerstate_toset; u8 u1tmp; bool b_actuallyset = false; @@ -2506,8 +2506,6 @@ bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) spin_unlock(&rtlpriv->locks.rf_ps_lock); } - cur_rfstate = ppsc->rfpwr_state; - rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2, rtl_read_byte(rtlpriv, REG_GPIO_IO_SEL_2) & ~(BIT(1))); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index bffd7553c5e6..6e683458a59d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -3846,7 +3846,7 @@ bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_phy *rtlphy = &rtlpriv->phy; - enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate; + enum rf_pwrstate e_rfpowerstate_toset; u8 u1tmp = 0; bool b_actuallyset = false; @@ -3865,8 +3865,6 @@ bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid) spin_unlock(&rtlpriv->locks.rf_ps_lock); } - cur_rfstate = ppsc->rfpwr_state; - rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2, rtl_read_byte(rtlpriv, REG_GPIO_IO_SEL_2) & ~(BIT(1))); From 9b692df1e66ff6e6340eaaab20f8f0333daaeb62 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:06 -0700 Subject: [PATCH 0905/2177] qtnfmac: do not cache AP settings in driver structures Cached AP setings are passed to WiFi card right after they are initialized and are never used for anything else. There is no point in keeping them in driver state. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 17 +------ .../net/wireless/quantenna/qtnfmac/commands.c | 46 +++++++++---------- .../net/wireless/quantenna/qtnfmac/commands.h | 3 +- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 262e8cfd8f8d..fe157f54b69f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -267,7 +267,6 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); struct qtnf_wmac *mac = wiphy_priv(wiphy); - struct qtnf_bss_config *bss_cfg; int ret; if (!cfg80211_chandef_identical(&mac->chandef, &settings->chandef)) { @@ -278,21 +277,7 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, settings->chandef.chan->center_freq); } - bss_cfg = &vif->bss_cfg; - memset(bss_cfg, 0, sizeof(*bss_cfg)); - - bss_cfg->bcn_period = settings->beacon_interval; - bss_cfg->dtim = settings->dtim_period; - bss_cfg->auth_type = settings->auth_type; - bss_cfg->privacy = settings->privacy; - - bss_cfg->ssid_len = settings->ssid_len; - memcpy(&bss_cfg->ssid, settings->ssid, bss_cfg->ssid_len); - - memcpy(&bss_cfg->crypto, &settings->crypto, - sizeof(struct cfg80211_crypto_settings)); - - ret = qtnf_cmd_send_config_ap(vif); + ret = qtnf_cmd_send_config_ap(vif, settings); if (ret) { pr_err("VIF%u.%u: failed to push config to FW\n", vif->mac->macid, vif->vifid); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 8f95f9842f49..88fdf7daac92 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -181,10 +181,10 @@ out: return ret; } -int qtnf_cmd_send_config_ap(struct qtnf_vif *vif) +int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, + const struct cfg80211_ap_settings *s) { struct sk_buff *cmd_skb; - struct qtnf_bss_config *bss_cfg = &vif->bss_cfg; struct cfg80211_chan_def *chandef = &vif->mac->chandef; struct qlink_tlv_channel *qchan; struct qlink_auth_encr aen; @@ -200,11 +200,13 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif) qtnf_bus_lock(vif->mac->bus); - qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, bss_cfg->ssid, - bss_cfg->ssid_len); + if (s->ssid && s->ssid_len > 0 && s->ssid_len <= IEEE80211_MAX_SSID_LEN) + qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, s->ssid, + s->ssid_len); + qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_BCN_PERIOD, - bss_cfg->bcn_period); - qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_DTIM, bss_cfg->dtim); + s->beacon_interval); + qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_DTIM, s->dtim_period); qchan = skb_put_zero(cmd_skb, sizeof(*qchan)); qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL); @@ -214,26 +216,22 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif) ieee80211_frequency_to_channel(chandef->chan->center_freq)); memset(&aen, 0, sizeof(aen)); - aen.auth_type = bss_cfg->auth_type; - aen.privacy = !!bss_cfg->privacy; - aen.mfp = bss_cfg->mfp; - aen.wpa_versions = cpu_to_le32(bss_cfg->crypto.wpa_versions); - aen.cipher_group = cpu_to_le32(bss_cfg->crypto.cipher_group); - aen.n_ciphers_pairwise = cpu_to_le32( - bss_cfg->crypto.n_ciphers_pairwise); + aen.auth_type = s->auth_type; + aen.privacy = !!s->privacy; + aen.mfp = 0; + aen.wpa_versions = cpu_to_le32(s->crypto.wpa_versions); + aen.cipher_group = cpu_to_le32(s->crypto.cipher_group); + aen.n_ciphers_pairwise = cpu_to_le32(s->crypto.n_ciphers_pairwise); for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++) - aen.ciphers_pairwise[i] = cpu_to_le32( - bss_cfg->crypto.ciphers_pairwise[i]); - aen.n_akm_suites = cpu_to_le32( - bss_cfg->crypto.n_akm_suites); + aen.ciphers_pairwise[i] = + cpu_to_le32(s->crypto.ciphers_pairwise[i]); + aen.n_akm_suites = cpu_to_le32(s->crypto.n_akm_suites); for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++) - aen.akm_suites[i] = cpu_to_le32( - bss_cfg->crypto.akm_suites[i]); - aen.control_port = bss_cfg->crypto.control_port; - aen.control_port_no_encrypt = - bss_cfg->crypto.control_port_no_encrypt; - aen.control_port_ethertype = cpu_to_le16(be16_to_cpu( - bss_cfg->crypto.control_port_ethertype)); + aen.akm_suites[i] = cpu_to_le32(s->crypto.akm_suites[i]); + aen.control_port = s->crypto.control_port; + aen.control_port_no_encrypt =s->crypto.control_port_no_encrypt; + aen.control_port_ethertype = + cpu_to_le16(be16_to_cpu(s->crypto.control_port_ethertype)); qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen, sizeof(aen)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index 8a5a82ce82cd..e87c4a484dd4 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -33,7 +33,8 @@ int qtnf_cmd_send_del_intf(struct qtnf_vif *vif); int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, struct ieee80211_supported_band *band); int qtnf_cmd_send_regulatory_config(struct qtnf_wmac *mac, const char *alpha2); -int qtnf_cmd_send_config_ap(struct qtnf_vif *vif); +int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, + const struct cfg80211_ap_settings *s); int qtnf_cmd_send_start_ap(struct qtnf_vif *vif); int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif); int qtnf_cmd_send_register_mgmt(struct qtnf_vif *vif, u16 frame_type, bool reg); From 8b5f4aa7340a4ebfd8dc11159f5259f51a4d9229 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:07 -0700 Subject: [PATCH 0906/2177] qtnfmac: pass all AP settings to wireless card for processing Modify QLINK START_AP command payload to pass all AP settings contained within struct cfg80211_ap_settings. Make most of settings a constant part of "config AP" command instead of passing it as an optional TLVs. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/commands.c | 52 +++++++------ .../net/wireless/quantenna/qtnfmac/qlink.h | 78 ++++++++++++++----- .../wireless/quantenna/qtnfmac/qlink_util.c | 13 ++++ .../wireless/quantenna/qtnfmac/qlink_util.h | 1 + 4 files changed, 101 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 88fdf7daac92..493c3f86f767 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -187,27 +187,34 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, struct sk_buff *cmd_skb; struct cfg80211_chan_def *chandef = &vif->mac->chandef; struct qlink_tlv_channel *qchan; - struct qlink_auth_encr aen; + struct qlink_cmd_config_ap *cmd; + struct qlink_auth_encr *aen; u16 res_code = QLINK_CMD_RESULT_OK; int ret; int i; cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, QLINK_CMD_CONFIG_AP, - sizeof(struct qlink_cmd)); + sizeof(*cmd)); if (unlikely(!cmd_skb)) return -ENOMEM; - qtnf_bus_lock(vif->mac->bus); + cmd = (struct qlink_cmd_config_ap *)cmd_skb->data; + cmd->dtim_period = s->dtim_period; + cmd->beacon_interval = cpu_to_le16(s->beacon_interval); + cmd->hidden_ssid = qlink_hidden_ssid_nl2q(s->hidden_ssid); + cmd->inactivity_timeout = cpu_to_le16(s->inactivity_timeout); + cmd->smps_mode = s->smps_mode; + cmd->p2p_ctwindow = s->p2p_ctwindow; + cmd->p2p_opp_ps = s->p2p_opp_ps; + cmd->pbss = s->pbss; + cmd->ht_required = s->ht_required; + cmd->vht_required = s->vht_required; if (s->ssid && s->ssid_len > 0 && s->ssid_len <= IEEE80211_MAX_SSID_LEN) qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, s->ssid, s->ssid_len); - qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_BCN_PERIOD, - s->beacon_interval); - qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_DTIM, s->dtim_period); - qchan = skb_put_zero(cmd_skb, sizeof(*qchan)); qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL); qchan->hdr.len = cpu_to_le16(sizeof(*qchan) - @@ -215,26 +222,25 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, qchan->hw_value = cpu_to_le16( ieee80211_frequency_to_channel(chandef->chan->center_freq)); - memset(&aen, 0, sizeof(aen)); - aen.auth_type = s->auth_type; - aen.privacy = !!s->privacy; - aen.mfp = 0; - aen.wpa_versions = cpu_to_le32(s->crypto.wpa_versions); - aen.cipher_group = cpu_to_le32(s->crypto.cipher_group); - aen.n_ciphers_pairwise = cpu_to_le32(s->crypto.n_ciphers_pairwise); + aen = &cmd->aen; + aen->auth_type = s->auth_type; + aen->privacy = !!s->privacy; + aen->mfp = 0; + aen->wpa_versions = cpu_to_le32(s->crypto.wpa_versions); + aen->cipher_group = cpu_to_le32(s->crypto.cipher_group); + aen->n_ciphers_pairwise = cpu_to_le32(s->crypto.n_ciphers_pairwise); for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++) - aen.ciphers_pairwise[i] = - cpu_to_le32(s->crypto.ciphers_pairwise[i]); - aen.n_akm_suites = cpu_to_le32(s->crypto.n_akm_suites); + aen->ciphers_pairwise[i] = + cpu_to_le32(s->crypto.ciphers_pairwise[i]); + aen->n_akm_suites = cpu_to_le32(s->crypto.n_akm_suites); for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++) - aen.akm_suites[i] = cpu_to_le32(s->crypto.akm_suites[i]); - aen.control_port = s->crypto.control_port; - aen.control_port_no_encrypt =s->crypto.control_port_no_encrypt; - aen.control_port_ethertype = + aen->akm_suites[i] = cpu_to_le32(s->crypto.akm_suites[i]); + aen->control_port = s->crypto.control_port; + aen->control_port_no_encrypt = s->crypto.control_port_no_encrypt; + aen->control_port_ethertype = cpu_to_le16(be16_to_cpu(s->crypto.control_port_ethertype)); - qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen, - sizeof(aen)); + qtnf_bus_lock(vif->mac->bus); ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index fb88f3ebf083..68142543fdda 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -132,6 +132,24 @@ struct qlink_chandef { u8 rsvd[3]; } __packed; +#define QLINK_MAX_NR_CIPHER_SUITES 5 +#define QLINK_MAX_NR_AKM_SUITES 2 + +struct qlink_auth_encr { + __le32 wpa_versions; + __le32 cipher_group; + __le32 n_ciphers_pairwise; + __le32 ciphers_pairwise[QLINK_MAX_NR_CIPHER_SUITES]; + __le32 n_akm_suites; + __le32 akm_suites[QLINK_MAX_NR_AKM_SUITES]; + __le16 control_port_ethertype; + u8 auth_type; + u8 privacy; + u8 mfp; + u8 control_port; + u8 control_port_no_encrypt; +} __packed; + /* QLINK Command messages related definitions */ @@ -521,6 +539,46 @@ struct qlink_cmd_chan_switch { u8 beacon_count; } __packed; +/** + * enum qlink_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID + * + * Refer to &enum nl80211_hidden_ssid + */ +enum qlink_hidden_ssid { + QLINK_HIDDEN_SSID_NOT_IN_USE, + QLINK_HIDDEN_SSID_ZERO_LEN, + QLINK_HIDDEN_SSID_ZERO_CONTENTS +}; + +/** + * struct qlink_cmd_config_ap - data for QLINK_CMD_CONFIG_AP command + * + * @beacon_interval: beacon interval + * @inactivity_timeout: station's inactivity period in seconds + * @dtim_period: DTIM period + * @hidden_ssid: whether to hide the SSID, one of &enum qlink_hidden_ssid + * @smps_mode: SMPS mode + * @ht_required: stations must support HT + * @vht_required: stations must support VHT + * @aen: encryption info + * @info: variable configurations + */ +struct qlink_cmd_config_ap { + struct qlink_cmd chdr; + __le16 beacon_interval; + __le16 inactivity_timeout; + u8 dtim_period; + u8 hidden_ssid; + u8 smps_mode; + u8 p2p_ctwindow; + u8 p2p_opp_ps; + u8 pbss; + u8 ht_required; + u8 vht_required; + struct qlink_auth_encr aen; + u8 info[0]; +} __packed; + /* QLINK Command Responses messages related definitions */ @@ -881,8 +939,6 @@ enum qlink_tlv_id { QTN_TLV_ID_RTS_THRESH = 0x0202, QTN_TLV_ID_SRETRY_LIMIT = 0x0203, QTN_TLV_ID_LRETRY_LIMIT = 0x0204, - QTN_TLV_ID_BCN_PERIOD = 0x0205, - QTN_TLV_ID_DTIM = 0x0206, QTN_TLV_ID_REG_RULE = 0x0207, QTN_TLV_ID_CHANNEL = 0x020F, QTN_TLV_ID_COVERAGE_CLASS = 0x0213, @@ -1072,24 +1128,6 @@ struct qlink_tlv_channel { u8 rsvd[2]; } __packed; -#define QLINK_MAX_NR_CIPHER_SUITES 5 -#define QLINK_MAX_NR_AKM_SUITES 2 - -struct qlink_auth_encr { - __le32 wpa_versions; - __le32 cipher_group; - __le32 n_ciphers_pairwise; - __le32 ciphers_pairwise[QLINK_MAX_NR_CIPHER_SUITES]; - __le32 n_akm_suites; - __le32 akm_suites[QLINK_MAX_NR_AKM_SUITES]; - __le16 control_port_ethertype; - u8 auth_type; - u8 privacy; - u8 mfp; - u8 control_port; - u8 control_port_no_encrypt; -} __packed; - struct qlink_chan_stats { __le32 chan_num; __le32 cca_tx; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index 3c1db5bd6393..63a74b2aa4a3 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -127,3 +127,16 @@ void qlink_chandef_q2cfg(struct wiphy *wiphy, break; } } + +enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val) +{ + switch (nl_val) { + case NL80211_HIDDEN_SSID_ZERO_LEN: + return QLINK_HIDDEN_SSID_ZERO_LEN; + case NL80211_HIDDEN_SSID_ZERO_CONTENTS: + return QLINK_HIDDEN_SSID_ZERO_CONTENTS; + case NL80211_HIDDEN_SSID_NOT_IN_USE: + default: + return QLINK_HIDDEN_SSID_NOT_IN_USE; + } +} diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index 5e49a8a09977..416f11de211d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -66,5 +66,6 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask); void qlink_chandef_q2cfg(struct wiphy *wiphy, const struct qlink_chandef *qch, struct cfg80211_chan_def *chdef); +enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val); #endif /* _QTN_FMAC_QLINK_UTIL_H_ */ From f99201cb084df11cf807ef151d14715467c9ccc2 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:08 -0700 Subject: [PATCH 0907/2177] qtnfmac: pass channel definition to WiFi card on START_AP command Introduce "channel definition" TLV containing full channel description (center frequence for both segments + BW) and pass it to wireless card in a payload to START_AP command. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 9 ------ .../net/wireless/quantenna/qtnfmac/commands.c | 28 ++++++++-------- .../net/wireless/quantenna/qtnfmac/qlink.h | 13 ++++++++ .../wireless/quantenna/qtnfmac/qlink_util.c | 32 +++++++++++++++++++ .../wireless/quantenna/qtnfmac/qlink_util.h | 2 ++ 5 files changed, 62 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index fe157f54b69f..056018ecccbf 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -266,17 +266,8 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - struct qtnf_wmac *mac = wiphy_priv(wiphy); int ret; - if (!cfg80211_chandef_identical(&mac->chandef, &settings->chandef)) { - memcpy(&mac->chandef, &settings->chandef, sizeof(mac->chandef)); - if (vif->vifid != 0) - pr_warn("%s: unexpected chan %u (%u MHz)\n", dev->name, - settings->chandef.chan->hw_value, - settings->chandef.chan->center_freq); - } - ret = qtnf_cmd_send_config_ap(vif, settings); if (ret) { pr_err("VIF%u.%u: failed to push config to FW\n", diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 493c3f86f767..f5bc43b830b1 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -185,8 +185,6 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, const struct cfg80211_ap_settings *s) { struct sk_buff *cmd_skb; - struct cfg80211_chan_def *chandef = &vif->mac->chandef; - struct qlink_tlv_channel *qchan; struct qlink_cmd_config_ap *cmd; struct qlink_auth_encr *aen; u16 res_code = QLINK_CMD_RESULT_OK; @@ -211,17 +209,6 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, cmd->ht_required = s->ht_required; cmd->vht_required = s->vht_required; - if (s->ssid && s->ssid_len > 0 && s->ssid_len <= IEEE80211_MAX_SSID_LEN) - qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, s->ssid, - s->ssid_len); - - qchan = skb_put_zero(cmd_skb, sizeof(*qchan)); - qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL); - qchan->hdr.len = cpu_to_le16(sizeof(*qchan) - - sizeof(struct qlink_tlv_hdr)); - qchan->hw_value = cpu_to_le16( - ieee80211_frequency_to_channel(chandef->chan->center_freq)); - aen = &cmd->aen; aen->auth_type = s->auth_type; aen->privacy = !!s->privacy; @@ -240,6 +227,21 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, aen->control_port_ethertype = cpu_to_le16(be16_to_cpu(s->crypto.control_port_ethertype)); + if (s->ssid && s->ssid_len > 0 && s->ssid_len <= IEEE80211_MAX_SSID_LEN) + qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, s->ssid, + s->ssid_len); + + if (cfg80211_chandef_valid(&s->chandef)) { + struct qlink_tlv_chandef *chtlv = + (struct qlink_tlv_chandef *)skb_put(cmd_skb, + sizeof(*chtlv)); + + chtlv->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANDEF); + chtlv->hdr.len = cpu_to_le16(sizeof(*chtlv) - + sizeof(chtlv->hdr)); + qlink_chandef_cfg2q(&s->chandef, &chtlv->chan); + } + qtnf_bus_lock(vif->mac->bus); ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 68142543fdda..641d2524b1e4 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -941,6 +941,7 @@ enum qlink_tlv_id { QTN_TLV_ID_LRETRY_LIMIT = 0x0204, QTN_TLV_ID_REG_RULE = 0x0207, QTN_TLV_ID_CHANNEL = 0x020F, + QTN_TLV_ID_CHANDEF = 0x0210, QTN_TLV_ID_COVERAGE_CLASS = 0x0213, QTN_TLV_ID_IFACE_LIMIT = 0x0214, QTN_TLV_ID_NUM_IFACE_COMB = 0x0215, @@ -1128,6 +1129,18 @@ struct qlink_tlv_channel { u8 rsvd[2]; } __packed; +/** + * struct qlink_tlv_chandef - data for QTN_TLV_ID_CHANDEF TLV + * + * Channel definition. + * + * @chan: channel definition data. + */ +struct qlink_tlv_chandef { + struct qlink_tlv_hdr hdr; + struct qlink_chandef chan; +} __packed; + struct qlink_chan_stats { __le32 chan_num; __le32 cca_tx; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index 63a74b2aa4a3..61d999affb09 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -128,6 +128,38 @@ void qlink_chandef_q2cfg(struct wiphy *wiphy, } } +static u8 qlink_chanwidth_nl_to_qlink(enum nl80211_chan_width nlwidth) +{ + switch (nlwidth) { + case NL80211_CHAN_WIDTH_20_NOHT: + return QLINK_CHAN_WIDTH_20_NOHT; + case NL80211_CHAN_WIDTH_20: + return QLINK_CHAN_WIDTH_20; + case NL80211_CHAN_WIDTH_40: + return QLINK_CHAN_WIDTH_40; + case NL80211_CHAN_WIDTH_80: + return QLINK_CHAN_WIDTH_80; + case NL80211_CHAN_WIDTH_80P80: + return QLINK_CHAN_WIDTH_80P80; + case NL80211_CHAN_WIDTH_160: + return QLINK_CHAN_WIDTH_160; + case NL80211_CHAN_WIDTH_5: + return QLINK_CHAN_WIDTH_5; + case NL80211_CHAN_WIDTH_10: + return QLINK_CHAN_WIDTH_10; + default: + return -1; + } +} + +void qlink_chandef_cfg2q(const struct cfg80211_chan_def *chdef, + struct qlink_chandef *qch) +{ + qch->center_freq1 = cpu_to_le16(chdef->center_freq1); + qch->center_freq2 = cpu_to_le16(chdef->center_freq2); + qch->width = qlink_chanwidth_nl_to_qlink(chdef->width); +} + enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val) { switch (nl_val) { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index 416f11de211d..260383d6d109 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -66,6 +66,8 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask); void qlink_chandef_q2cfg(struct wiphy *wiphy, const struct qlink_chandef *qch, struct cfg80211_chan_def *chdef); +void qlink_chandef_cfg2q(const struct cfg80211_chan_def *chdef, + struct qlink_chandef *qch); enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val); #endif /* _QTN_FMAC_QLINK_UTIL_H_ */ From 524522c445e114210c198e544133a7ee3a4e141e Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:09 -0700 Subject: [PATCH 0908/2177] qtnfmac: get rid of QTNF_STATE_AP_CONFIG QTNF_STATE_AP_CONFIG is redundant and its usage can be safely removed. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 8 -------- drivers/net/wireless/quantenna/qtnfmac/commands.c | 3 --- drivers/net/wireless/quantenna/qtnfmac/core.h | 1 - 3 files changed, 12 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 056018ecccbf..c6608462e6c2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -275,13 +275,6 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, goto out; } - if (!(vif->bss_status & QTNF_STATE_AP_CONFIG)) { - pr_err("VIF%u.%u: AP config failed in FW\n", vif->mac->macid, - vif->vifid); - ret = -EFAULT; - goto out; - } - ret = qtnf_mgmt_set_appie(vif, &settings->beacon); if (ret) { pr_err("VIF%u.%u: failed to add IEs to beacon\n", @@ -316,7 +309,6 @@ static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev) pr_err("VIF%u.%u: failed to stop AP operation in FW\n", vif->mac->macid, vif->vifid); vif->bss_status &= ~QTNF_STATE_AP_START; - vif->bss_status &= ~QTNF_STATE_AP_CONFIG; netif_carrier_off(vif->netdev); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index f5bc43b830b1..59ca6ca0d20c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -256,8 +256,6 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, goto out; } - vif->bss_status |= QTNF_STATE_AP_CONFIG; - out: qtnf_bus_unlock(vif->mac->bus); return ret; @@ -290,7 +288,6 @@ int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif) } vif->bss_status &= ~QTNF_STATE_AP_START; - vif->bss_status &= ~QTNF_STATE_AP_CONFIG; netif_carrier_off(vif->netdev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 2cd015048598..f8165a7e93c8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -52,7 +52,6 @@ #define QTNF_DEF_WDOG_TIMEOUT 5 #define QTNF_TX_TIMEOUT_TRSHLD 100 -#define QTNF_STATE_AP_CONFIG BIT(2) #define QTNF_STATE_AP_START BIT(1) extern const struct net_device_ops qtnf_netdev_ops; From d7b80052fa915ff4bbcace75c92335e793af2fb9 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:10 -0700 Subject: [PATCH 0909/2177] qtnfmac: get rid of QTNF_STATE_AP_START flag QTNF_STATE_AP_START usage is redundant and imposes additional state synchronization maintenance. We may as well leave state checking to network card and upper layers (cfg80211, nl80211 and userspace). Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 29 +------------------ .../net/wireless/quantenna/qtnfmac/commands.c | 3 -- drivers/net/wireless/quantenna/qtnfmac/core.h | 3 +- .../net/wireless/quantenna/qtnfmac/event.c | 12 -------- 4 files changed, 2 insertions(+), 45 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index c6608462e6c2..08f1f54af64a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -254,11 +254,6 @@ static int qtnf_change_beacon(struct wiphy *wiphy, struct net_device *dev, { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_err("VIF%u.%u: not started\n", vif->mac->macid, vif->vifid); - return -EFAULT; - } - return qtnf_mgmt_set_appie(vif, info); } @@ -283,17 +278,9 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, } ret = qtnf_cmd_send_start_ap(vif); - if (ret) { + if (ret) pr_err("VIF%u.%u: failed to start AP\n", vif->mac->macid, vif->vifid); - goto out; - } - - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_err("VIF%u.%u: FW failed to start AP operation\n", - vif->mac->macid, vif->vifid); - ret = -EFAULT; - } out: return ret; @@ -308,7 +295,6 @@ static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (ret) { pr_err("VIF%u.%u: failed to stop AP operation in FW\n", vif->mac->macid, vif->vifid); - vif->bss_status &= ~QTNF_STATE_AP_START; netif_carrier_off(vif->netdev); } @@ -784,19 +770,6 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, params->chandef.chan->hw_value, params->count, params->radar_required, params->block_tx); - switch (vif->wdev.iftype) { - case NL80211_IFTYPE_AP: - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_warn("AP not started on %s\n", dev->name); - return -ENOTCONN; - } - break; - default: - pr_err("unsupported vif type (%d) on %s\n", - vif->wdev.iftype, dev->name); - return -EOPNOTSUPP; - } - if (!cfg80211_chandef_valid(¶ms->chandef)) { pr_err("%s: invalid channel\n", dev->name); return -EINVAL; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 59ca6ca0d20c..60d65df76f66 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -173,7 +173,6 @@ int qtnf_cmd_send_start_ap(struct qtnf_vif *vif) goto out; } - vif->bss_status |= QTNF_STATE_AP_START; netif_carrier_on(vif->netdev); out: @@ -287,8 +286,6 @@ int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif) goto out; } - vif->bss_status &= ~QTNF_STATE_AP_START; - netif_carrier_off(vif->netdev); out: diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index f8165a7e93c8..b35200d05f24 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -52,9 +52,8 @@ #define QTNF_DEF_WDOG_TIMEOUT 5 #define QTNF_TX_TIMEOUT_TRSHLD 100 -#define QTNF_STATE_AP_START BIT(1) - extern const struct net_device_ops qtnf_netdev_ops; + struct qtnf_bus; struct qtnf_vif; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 7481d5bdf647..d7fb076350dd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -53,12 +53,6 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif, return -EPROTO; } - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_err("VIF%u.%u: STA_ASSOC event when AP is not started\n", - mac->macid, vif->vifid); - return -EPROTO; - } - sta_addr = sta_assoc->sta_addr; frame_control = le16_to_cpu(sta_assoc->frame_control); @@ -127,12 +121,6 @@ qtnf_event_handle_sta_deauth(struct qtnf_wmac *mac, struct qtnf_vif *vif, return -EPROTO; } - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_err("VIF%u.%u: STA_DEAUTH event when AP is not started\n", - mac->macid, vif->vifid); - return -EPROTO; - } - sta_addr = sta_deauth->sta_addr; reason = le16_to_cpu(sta_deauth->reason); From 9766d1dd52ec92d78ad800fefa1d7806d73e6fdb Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:11 -0700 Subject: [PATCH 0910/2177] qtnfmac: do not cache BSS state in per-VIF structure This cached state is used only once immediately after it is initilized, except for BSSID value that is used for events processing. There is no reason in keeping unused data in driver's state. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 40 +++------------ .../net/wireless/quantenna/qtnfmac/commands.c | 49 ++++++++++++------- drivers/net/wireless/quantenna/qtnfmac/core.h | 19 ++----- 3 files changed, 42 insertions(+), 66 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 08f1f54af64a..cf0f19effd20 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -131,6 +131,7 @@ int qtnf_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) vif->netdev = NULL; vif->wdev.iftype = NL80211_IFTYPE_UNSPECIFIED; eth_zero_addr(vif->mac_addr); + eth_zero_addr(vif->bssid); return 0; } @@ -199,6 +200,8 @@ err_mac: qtnf_cmd_send_del_intf(vif); err_cmd: vif->wdev.iftype = NL80211_IFTYPE_UNSPECIFIED; + eth_zero_addr(vif->mac_addr); + eth_zero_addr(vif->bssid); return ERR_PTR(-EFAULT); } @@ -567,7 +570,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - struct qtnf_bss_config *bss_cfg; int ret; if (vif->wdev.iftype != NL80211_IFTYPE_STATION) @@ -576,38 +578,10 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, if (vif->sta_state != QTNF_STA_DISCONNECTED) return -EBUSY; - bss_cfg = &vif->bss_cfg; - memset(bss_cfg, 0, sizeof(*bss_cfg)); - - bss_cfg->ssid_len = sme->ssid_len; - memcpy(&bss_cfg->ssid, sme->ssid, bss_cfg->ssid_len); - bss_cfg->auth_type = sme->auth_type; - bss_cfg->privacy = sme->privacy; - bss_cfg->mfp = sme->mfp; - - if ((sme->bg_scan_period > 0) && - (sme->bg_scan_period <= QTNF_MAX_BG_SCAN_PERIOD)) - bss_cfg->bg_scan_period = sme->bg_scan_period; - else if (sme->bg_scan_period == -1) - bss_cfg->bg_scan_period = QTNF_DEFAULT_BG_SCAN_PERIOD; - else - bss_cfg->bg_scan_period = 0; /* disabled */ - - bss_cfg->connect_flags = 0; - - if (sme->flags & ASSOC_REQ_DISABLE_HT) - bss_cfg->connect_flags |= QLINK_STA_CONNECT_DISABLE_HT; - if (sme->flags & ASSOC_REQ_DISABLE_VHT) - bss_cfg->connect_flags |= QLINK_STA_CONNECT_DISABLE_VHT; - if (sme->flags & ASSOC_REQ_USE_RRM) - bss_cfg->connect_flags |= QLINK_STA_CONNECT_USE_RRM; - - memcpy(&bss_cfg->crypto, &sme->crypto, sizeof(bss_cfg->crypto)); - if (sme->bssid) - ether_addr_copy(bss_cfg->bssid, sme->bssid); + ether_addr_copy(vif->bssid, sme->bssid); else - eth_zero_addr(bss_cfg->bssid); + eth_zero_addr(vif->bssid); ret = qtnf_cmd_send_connect(vif, sme); if (ret) { @@ -1021,7 +995,7 @@ void qtnf_virtual_intf_cleanup(struct net_device *ndev) break; case QTNF_STA_CONNECTING: cfg80211_connect_result(vif->netdev, - vif->bss_cfg.bssid, NULL, 0, + vif->bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL); @@ -1048,7 +1022,7 @@ void qtnf_cfg80211_vif_reset(struct qtnf_vif *vif) switch (vif->sta_state) { case QTNF_STA_CONNECTING: cfg80211_connect_result(vif->netdev, - vif->bss_cfg.bssid, NULL, 0, + vif->bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 60d65df76f66..b65d7059087a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2037,11 +2037,11 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, { struct sk_buff *cmd_skb; struct qlink_cmd_connect *cmd; - struct qtnf_bss_config *bss_cfg = &vif->bss_cfg; struct qlink_auth_encr aen; u16 res_code = QLINK_CMD_RESULT_OK; int ret; int i; + u32 connect_flags = 0; cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, QLINK_CMD_CONNECT, @@ -2053,42 +2053,57 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, cmd = (struct qlink_cmd_connect *)cmd_skb->data; - ether_addr_copy(cmd->bssid, bss_cfg->bssid); + ether_addr_copy(cmd->bssid, vif->bssid); if (sme->channel) cmd->channel = cpu_to_le16(sme->channel->hw_value); else cmd->channel = 0; - cmd->bg_scan_period = cpu_to_le16(bss_cfg->bg_scan_period); + if ((sme->bg_scan_period > 0) && + (sme->bg_scan_period <= QTNF_MAX_BG_SCAN_PERIOD)) + cmd->bg_scan_period = cpu_to_le16(sme->bg_scan_period); + else if (sme->bg_scan_period == -1) + cmd->bg_scan_period = cpu_to_le16(QTNF_DEFAULT_BG_SCAN_PERIOD); + else + cmd->bg_scan_period = 0; /* disabled */ + + if (sme->flags & ASSOC_REQ_DISABLE_HT) + connect_flags |= QLINK_STA_CONNECT_DISABLE_HT; + if (sme->flags & ASSOC_REQ_DISABLE_VHT) + connect_flags |= QLINK_STA_CONNECT_DISABLE_VHT; + if (sme->flags & ASSOC_REQ_USE_RRM) + connect_flags |= QLINK_STA_CONNECT_USE_RRM; + + cmd->flags = cpu_to_le32(connect_flags); memset(&aen, 0, sizeof(aen)); - aen.auth_type = bss_cfg->auth_type; - aen.privacy = !!bss_cfg->privacy; - aen.mfp = bss_cfg->mfp; - aen.wpa_versions = cpu_to_le32(bss_cfg->crypto.wpa_versions); - aen.cipher_group = cpu_to_le32(bss_cfg->crypto.cipher_group); + aen.auth_type = sme->auth_type; + aen.privacy = !!sme->privacy; + aen.mfp = sme->mfp; + aen.wpa_versions = cpu_to_le32(sme->crypto.wpa_versions); + aen.cipher_group = cpu_to_le32(sme->crypto.cipher_group); aen.n_ciphers_pairwise = cpu_to_le32( - bss_cfg->crypto.n_ciphers_pairwise); + sme->crypto.n_ciphers_pairwise); for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++) aen.ciphers_pairwise[i] = cpu_to_le32( - bss_cfg->crypto.ciphers_pairwise[i]); + sme->crypto.ciphers_pairwise[i]); - aen.n_akm_suites = cpu_to_le32(bss_cfg->crypto.n_akm_suites); + aen.n_akm_suites = cpu_to_le32(sme->crypto.n_akm_suites); for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++) aen.akm_suites[i] = cpu_to_le32( - bss_cfg->crypto.akm_suites[i]); + sme->crypto.akm_suites[i]); - aen.control_port = bss_cfg->crypto.control_port; + aen.control_port = sme->crypto.control_port; aen.control_port_no_encrypt = - bss_cfg->crypto.control_port_no_encrypt; + sme->crypto.control_port_no_encrypt; aen.control_port_ethertype = cpu_to_le16(be16_to_cpu( - bss_cfg->crypto.control_port_ethertype)); + sme->crypto.control_port_ethertype)); - qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, bss_cfg->ssid, - bss_cfg->ssid_len); + qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, sme->ssid, + sme->ssid_len); qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen, sizeof(aen)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index b35200d05f24..5234a9e3d746 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -57,20 +57,6 @@ extern const struct net_device_ops qtnf_netdev_ops; struct qtnf_bus; struct qtnf_vif; -struct qtnf_bss_config { - u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 bssid[ETH_ALEN]; - size_t ssid_len; - u8 dtim; - u16 bcn_period; - u16 auth_type; - bool privacy; - enum nl80211_mfp mfp; - struct cfg80211_crypto_settings crypto; - u16 bg_scan_period; - u32 connect_flags; -}; - struct qtnf_sta_node { struct list_head list; u8 mac_addr[ETH_ALEN]; @@ -89,6 +75,8 @@ enum qtnf_sta_state { struct qtnf_vif { struct wireless_dev wdev; + u8 bssid[ETH_ALEN]; + u8 mac_addr[ETH_ALEN]; u8 vifid; u8 bss_priority; u8 bss_status; @@ -96,9 +84,8 @@ struct qtnf_vif { u16 mgmt_frames_bitmask; struct net_device *netdev; struct qtnf_wmac *mac; - u8 mac_addr[ETH_ALEN]; + struct work_struct reset_work; - struct qtnf_bss_config bss_cfg; struct qtnf_sta_list sta_list; unsigned long cons_tx_timeout_cnt; }; From d23d1361316219621d146ba4b396a2ec4ddebfbf Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:12 -0700 Subject: [PATCH 0911/2177] qtnfmac: make encryption info a part of CONNECT command. Encryption info is a constant part of STA settings, no point to pass it as an optional TLV. Remove QTN_TLV_ID_CRYPTO type as it's not used anymore. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/commands.c | 42 +++++++++---------- .../net/wireless/quantenna/qtnfmac/qlink.h | 5 ++- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index b65d7059087a..babdc600c193 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2037,7 +2037,7 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, { struct sk_buff *cmd_skb; struct qlink_cmd_connect *cmd; - struct qlink_auth_encr aen; + struct qlink_auth_encr *aen; u16 res_code = QLINK_CMD_RESULT_OK; int ret; int i; @@ -2049,8 +2049,6 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, if (unlikely(!cmd_skb)) return -ENOMEM; - qtnf_bus_lock(vif->mac->bus); - cmd = (struct qlink_cmd_connect *)cmd_skb->data; ether_addr_copy(cmd->bssid, vif->bssid); @@ -2077,41 +2075,39 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, cmd->flags = cpu_to_le32(connect_flags); - memset(&aen, 0, sizeof(aen)); - aen.auth_type = sme->auth_type; - aen.privacy = !!sme->privacy; - aen.mfp = sme->mfp; - aen.wpa_versions = cpu_to_le32(sme->crypto.wpa_versions); - aen.cipher_group = cpu_to_le32(sme->crypto.cipher_group); - aen.n_ciphers_pairwise = cpu_to_le32( - sme->crypto.n_ciphers_pairwise); + aen = &cmd->aen; + aen->auth_type = sme->auth_type; + aen->privacy = !!sme->privacy; + aen->mfp = sme->mfp; + aen->wpa_versions = cpu_to_le32(sme->crypto.wpa_versions); + aen->cipher_group = cpu_to_le32(sme->crypto.cipher_group); + aen->n_ciphers_pairwise = cpu_to_le32(sme->crypto.n_ciphers_pairwise); for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++) - aen.ciphers_pairwise[i] = cpu_to_le32( - sme->crypto.ciphers_pairwise[i]); + aen->ciphers_pairwise[i] = + cpu_to_le32(sme->crypto.ciphers_pairwise[i]); - aen.n_akm_suites = cpu_to_le32(sme->crypto.n_akm_suites); + aen->n_akm_suites = cpu_to_le32(sme->crypto.n_akm_suites); for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++) - aen.akm_suites[i] = cpu_to_le32( - sme->crypto.akm_suites[i]); + aen->akm_suites[i] = cpu_to_le32(sme->crypto.akm_suites[i]); - aen.control_port = sme->crypto.control_port; - aen.control_port_no_encrypt = + aen->control_port = sme->crypto.control_port; + aen->control_port_no_encrypt = sme->crypto.control_port_no_encrypt; - aen.control_port_ethertype = cpu_to_le16(be16_to_cpu( - sme->crypto.control_port_ethertype)); + aen->control_port_ethertype = + cpu_to_le16(be16_to_cpu(sme->crypto.control_port_ethertype)); qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, sme->ssid, - sme->ssid_len); - qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen, - sizeof(aen)); + sme->ssid_len); if (sme->ie_len != 0) qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_IE_SET, sme->ie, sme->ie_len); + qtnf_bus_lock(vif->mac->bus); + ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); if (unlikely(ret)) diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 641d2524b1e4..7b313d38c30b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -417,8 +417,9 @@ enum qlink_sta_connect_flags { * struct qlink_cmd_connect - data for QLINK_CMD_CONNECT command * * @flags: for future use. - * @freq: center frequence of a channel which should be used to connect. + * @channel: channel which should be used to connect. * @bg_scan_period: period of background scan. + * @aen: authentication information. * @bssid: BSSID of the BSS to connect to. * @payload: variable portion of connection request. */ @@ -427,6 +428,7 @@ struct qlink_cmd_connect { __le32 flags; __le16 channel; __le16 bg_scan_period; + struct qlink_auth_encr aen; u8 bssid[ETH_ALEN]; u8 payload[0]; } __packed; @@ -950,7 +952,6 @@ enum qlink_tlv_id { QTN_TLV_ID_STA_GENERIC_INFO = 0x0301, QTN_TLV_ID_KEY = 0x0302, QTN_TLV_ID_SEQ = 0x0303, - QTN_TLV_ID_CRYPTO = 0x0304, QTN_TLV_ID_IE_SET = 0x0305, }; From ef81e8e9dbbb7f8348f3a62a6ab05e7aea33ea35 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Wed, 4 Oct 2017 18:38:13 -0700 Subject: [PATCH 0912/2177] qtnfmac: do not cache current channel info in driver's state Linux Wireless device structure already has current channel information that can be used when needed. Start using it. Since driver's channel info is not used anymore, remove it. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 7 ++----- drivers/net/wireless/quantenna/qtnfmac/core.h | 1 - drivers/net/wireless/quantenna/qtnfmac/event.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index cf0f19effd20..028bed1acd82 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -630,15 +630,15 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, int idx, struct survey_info *survey) { struct qtnf_wmac *mac = wiphy_priv(wiphy); + struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_supported_band *sband; - struct cfg80211_chan_def *chandef; + const struct cfg80211_chan_def *chandef = &wdev->chandef; struct ieee80211_channel *chan; struct qtnf_chan_stats stats; struct qtnf_vif *vif; int ret; vif = qtnf_netdev_get_priv(dev); - chandef = &mac->chandef; sband = wiphy->bands[NL80211_BAND_2GHZ]; if (sband && idx >= sband->n_channels) { @@ -705,7 +705,6 @@ static int qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef) { - struct qtnf_wmac *mac = wiphy_priv(wiphy); struct net_device *ndev = wdev->netdev; struct qtnf_vif *vif; int ret; @@ -728,8 +727,6 @@ qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, ret = -ENODATA; } - memcpy(&mac->chandef, chandef, sizeof(mac->chandef)); - out: return ret; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 5234a9e3d746..44a2cbb19310 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -126,7 +126,6 @@ struct qtnf_wmac { struct qtnf_mac_info macinfo; struct qtnf_vif iflist[QTNF_MAX_INTF]; struct cfg80211_scan_request *scan_req; - struct cfg80211_chan_def chandef; struct mutex mac_lock; /* lock during wmac speicific ops */ struct timer_list scan_timeout; }; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index d7fb076350dd..f639ea39c682 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -372,8 +372,6 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, mac->macid, chandef.chan->hw_value, chandef.center_freq1, chandef.center_freq2, chandef.width); - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); - for (i = 0; i < QTNF_MAX_INTF; i++) { vif = &mac->iflist[i]; if (vif->wdev.iftype == NL80211_IFTYPE_UNSPECIFIED) From a39644b235c1a45f84d6e16603cacb93740ed2d4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:05:24 +0200 Subject: [PATCH 0913/2177] rsi: fix integer overflow warning gcc produces a harmless warning about a recently introduced signed integer overflow: drivers/net/wireless/rsi/rsi_91x_hal.c: In function 'rsi_prepare_mgmt_desc': include/uapi/linux/swab.h:13:15: error: integer overflow in expression [-Werror=overflow] (((__u16)(x) & (__u16)0x00ffU) << 8) | \ ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~ include/uapi/linux/swab.h:104:2: note: in expansion of macro '___constant_swab16' ___constant_swab16(x) : \ ^~~~~~~~~~~~~~~~~~ include/uapi/linux/byteorder/big_endian.h:34:43: note: in expansion of macro '__swab16' #define __cpu_to_le16(x) ((__force __le16)__swab16((x))) ^~~~~~~~ include/linux/byteorder/generic.h:89:21: note: in expansion of macro '__cpu_to_le16' #define cpu_to_le16 __cpu_to_le16 ^~~~~~~~~~~~~ drivers/net/wireless/rsi/rsi_91x_hal.c:136:3: note: in expansion of macro 'cpu_to_le16' cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) & ^~~~~~~~~~~ The problem is that the 'mask' value is a signed integer that gets turned into a negative number when truncated to 16 bits. Making it an unsigned constant avoids this. Fixes: eac4eed3224b ("rsi: tx and rx path enhancements for p2p mode") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_mgmt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index b9d0802c1b0f..e21723013f8d 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -189,7 +189,7 @@ IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) -#define RSI_DESC_VAP_ID_MASK 0xC000 +#define RSI_DESC_VAP_ID_MASK 0xC000u #define RSI_DESC_VAP_ID_OFST 14 #define RSI_DATA_DESC_MAC_BBP_INFO BIT(0) #define RSI_DATA_DESC_NO_ACK_IND BIT(9) From 96378bd2c6cda5f04d0f6da2cd35d4670a982c38 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 4 Oct 2017 12:22:55 +0300 Subject: [PATCH 0914/2177] ath10k: fix core PCI suspend when WoWLAN is supported but disabled For devices where the FW supports WoWLAN but user-space has not configured it, we don't do any PCI-specific suspend/resume operations, because mac80211 doesn't call drv_suspend() when !wowlan. This has particularly bad effects for some platforms, because we don't stop the power-save timer, and if this timer goes off after the PCI controller has suspended the link, Bad Things will happen. Commit 32faa3f0ee50 ("ath10k: add the PCI PM core suspend/resume ops") got some of this right, in that it understood there was a problem on non-WoWLAN firmware. But it forgot the $subject case. Fix this by moving all the PCI driver suspend/resume logic exclusively into the driver PM hooks. This shouldn't affect WoWLAN support much (this just gets executed later on). I would just as well kill the entirety of ath10k_hif_suspend(), as it's not even implemented on the USB or SDIO drivers. I expect that we don't need the callback, except to return "supported" (i.e., 0) or "not supported" (i.e., -EOPNOTSUPP). Fixes: 32faa3f0ee50 ("ath10k: add the PCI PM core suspend/resume ops") Fixes: 77258d409ce4 ("ath10k: enable pci soc powersaving") Signed-off-by: Brian Norris Cc: Ryan Hsu Cc: Kalle Valo Cc: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index bc1633945a56..4655c944e3fd 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2580,6 +2580,12 @@ void ath10k_pci_hif_power_down(struct ath10k *ar) #ifdef CONFIG_PM static int ath10k_pci_hif_suspend(struct ath10k *ar) +{ + /* Nothing to do; the important stuff is in the driver suspend. */ + return 0; +} + +static int ath10k_pci_suspend(struct ath10k *ar) { /* The grace timer can still be counting down and ar->ps_awake be true. * It is known that the device may be asleep after resuming regardless @@ -2592,6 +2598,12 @@ static int ath10k_pci_hif_suspend(struct ath10k *ar) } static int ath10k_pci_hif_resume(struct ath10k *ar) +{ + /* Nothing to do; the important stuff is in the driver resume. */ + return 0; +} + +static int ath10k_pci_resume(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); struct pci_dev *pdev = ar_pci->pdev; @@ -3403,11 +3415,7 @@ static int ath10k_pci_pm_suspend(struct device *dev) struct ath10k *ar = dev_get_drvdata(dev); int ret; - if (test_bit(ATH10K_FW_FEATURE_WOWLAN_SUPPORT, - ar->running_fw->fw_file.fw_features)) - return 0; - - ret = ath10k_hif_suspend(ar); + ret = ath10k_pci_suspend(ar); if (ret) ath10k_warn(ar, "failed to suspend hif: %d\n", ret); @@ -3419,11 +3427,7 @@ static int ath10k_pci_pm_resume(struct device *dev) struct ath10k *ar = dev_get_drvdata(dev); int ret; - if (test_bit(ATH10K_FW_FEATURE_WOWLAN_SUPPORT, - ar->running_fw->fw_file.fw_features)) - return 0; - - ret = ath10k_hif_resume(ar); + ret = ath10k_pci_resume(ar); if (ret) ath10k_warn(ar, "failed to resume hif: %d\n", ret); From 36d9cdb6fb4a8b97af51bdd74683dd3e46b97b20 Mon Sep 17 00:00:00 2001 From: Venkateswara Naralasetty Date: Wed, 4 Oct 2017 12:22:57 +0300 Subject: [PATCH 0915/2177] ath10k: check power save support in STA mode through FW IE Currently ath10k host enables power save support in station mode by default for all firmwares but Power save for station mode still not supported in some of the firmware versions. Which results in firmware crash while issueing multiple scan commands. Fix this problem by introducing new FW feature flag to check power save support in firmware and then the firmware image can tell to ath10k that power save mode is not supported in station mode. Signed-off-by: Venkateswara Naralasetty Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 1 + drivers/net/wireless/ath/ath10k/core.h | 3 +++ drivers/net/wireless/ath/ath10k/mac.c | 9 +++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index a4f635820f35..7cc426c41817 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -377,6 +377,7 @@ static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", [ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST] = "allows-mesh-bcast", + [ATH10K_FW_FEATURE_NO_PS] = "no-ps", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 949ebb3e967b..643041ef3271 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -612,6 +612,9 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST = 16, + /* Firmware does not support power save in station mode. */ + ATH10K_FW_FEATURE_NO_PS = 17, + /* keep last */ ATH10K_FW_FEATURE_COUNT, }; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 5683f1a5330e..10fb83784832 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -8146,8 +8146,13 @@ int ath10k_mac_register(struct ath10k *ar) BIT(NL80211_IFTYPE_P2P_GO); ieee80211_hw_set(ar->hw, SIGNAL_DBM); - ieee80211_hw_set(ar->hw, SUPPORTS_PS); - ieee80211_hw_set(ar->hw, SUPPORTS_DYNAMIC_PS); + + if (!test_bit(ATH10K_FW_FEATURE_NO_PS, + ar->running_fw->fw_file.fw_features)) { + ieee80211_hw_set(ar->hw, SUPPORTS_PS); + ieee80211_hw_set(ar->hw, SUPPORTS_DYNAMIC_PS); + } + ieee80211_hw_set(ar->hw, MFP_CAPABLE); ieee80211_hw_set(ar->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(ar->hw, HAS_RATE_CONTROL); From 2ea9f12cefe4b6bf291e1717512b0ccb04bf71e9 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Mon, 25 Sep 2017 15:29:41 -0700 Subject: [PATCH 0916/2177] ath10k: add new cipher suite support QCA99x0 and QCA4019 family chips support CCMP-256, GCMP-128, and GCMP-256 ciphers in hardware, so advertise support for these. As firmware does not support group management frame ciphers (BIP), handle them in software (mac80211). Reviewed-by: Sebastian Gottschall Cc: Jouni Malinen Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 12 +++++++ drivers/net/wireless/ath/ath10k/hw.h | 3 ++ drivers/net/wireless/ath/ath10k/mac.c | 43 ++++++++++++++++++++++++-- drivers/net/wireless/ath/ath10k/wmi.h | 1 + 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 7cc426c41817..b29fdbd21ead 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -74,6 +74,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA9887_HW_1_0_VERSION, @@ -97,6 +98,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA6174_HW_2_1_VERSION, @@ -119,6 +121,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA6174_HW_2_1_VERSION, @@ -141,6 +144,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA6174_HW_3_0_VERSION, @@ -163,6 +167,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA6174_HW_3_2_VERSION, @@ -188,6 +193,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -216,6 +222,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 4, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 11, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -249,6 +256,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { */ .vht160_mcs_rx_highest = 1560, .vht160_mcs_tx_highest = 1560, + .n_cipher_suites = 11, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -281,6 +289,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { */ .vht160_mcs_rx_highest = 780, .vht160_mcs_tx_highest = 780, + .n_cipher_suites = 11, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -303,6 +312,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -327,6 +337,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 8, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -356,6 +367,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .spectral_bin_discard = 4, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, + .n_cipher_suites = 11, }, }; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index f80840fae517..05f26e5858ad 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -550,6 +550,9 @@ struct ath10k_hw_params { */ int vht160_mcs_rx_highest; int vht160_mcs_tx_highest; + + /* Number of ciphers supported (i.e First N) in cipher_suites array */ + int n_cipher_suites; }; struct htt_rx_desc; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 10fb83784832..6dbf8a2453ba 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -242,6 +242,16 @@ static int ath10k_send_key(struct ath10k_vif *arvif, case WLAN_CIPHER_SUITE_WEP104: arg.key_cipher = WMI_CIPHER_WEP; break; + case WLAN_CIPHER_SUITE_CCMP_256: + arg.key_cipher = WMI_CIPHER_AES_CCM; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + arg.key_cipher = WMI_CIPHER_AES_GCM; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: case WLAN_CIPHER_SUITE_AES_CMAC: WARN_ON(1); return -EINVAL; @@ -5723,7 +5733,10 @@ static int ath10k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, u32 flags2; /* this one needs to be done in software */ - if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256 || + key->cipher == WLAN_CIPHER_SUITE_BIP_CMAC_256) return 1; if (arvif->nohwcrypt) @@ -8074,7 +8087,22 @@ int ath10k_mac_register(struct ath10k *ar) WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, + + /* Do not add hardware supported ciphers before this line. + * Allow software encryption for all chips. Don't forget to + * update n_cipher_suites below. + */ WLAN_CIPHER_SUITE_AES_CMAC, + WLAN_CIPHER_SUITE_BIP_CMAC_256, + WLAN_CIPHER_SUITE_BIP_GMAC_128, + WLAN_CIPHER_SUITE_BIP_GMAC_256, + + /* Only QCA99x0 and QCA4019 varients support GCMP-128, GCMP-256 + * and CCMP-256 in hardware. + */ + WLAN_CIPHER_SUITE_GCMP, + WLAN_CIPHER_SUITE_GCMP_256, + WLAN_CIPHER_SUITE_CCMP_256, }; struct ieee80211_supported_band *band; void *channels; @@ -8318,7 +8346,18 @@ int ath10k_mac_register(struct ath10k *ar) } ar->hw->wiphy->cipher_suites = cipher_suites; - ar->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); + + /* QCA988x and QCA6174 family chips do not support CCMP-256, GCMP-128 + * and GCMP-256 ciphers in hardware. Fetch number of ciphers supported + * from chip specific hw_param table. + */ + if (!ar->hw_params.n_cipher_suites || + ar->hw_params.n_cipher_suites > ARRAY_SIZE(cipher_suites)) { + ath10k_err(ar, "invalid hw_params.n_cipher_suites %d\n", + ar->hw_params.n_cipher_suites); + ar->hw_params.n_cipher_suites = 8; + } + ar->hw->wiphy->n_cipher_suites = ar->hw_params.n_cipher_suites; wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 7a3606dde227..c02b21cff38d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -4751,6 +4751,7 @@ struct wmi_key_seq_counter { #define WMI_CIPHER_WAPI 0x5 #define WMI_CIPHER_CKIP 0x6 #define WMI_CIPHER_AES_CMAC 0x7 +#define WMI_CIPHER_AES_GCM 0x8 struct wmi_vdev_install_key_cmd { __le32 vdev_id; From ee0a47186e2fa9aa1c56cadcea470ca0ba8c8692 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 27 Sep 2017 09:13:34 +0800 Subject: [PATCH 0917/2177] ath9k: fix tx99 potential info leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user sets count to zero the string buffer would remain completely uninitialized which causes the kernel to parse its own stack data, potentially leading to an info leak. In addition to that, the string might be not terminated properly when the user data does not contain a 0-terminator. Signed-off-by: Miaoqing Pan Reviewed-by: Christoph Böhmwalder Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/tx99.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index 49ed1afb913c..fe3a8263b224 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -179,6 +179,9 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, ssize_t len; int r; + if (count < 1) + return -EINVAL; + if (sc->cur_chan->nvifs > 1) return -EOPNOTSUPP; @@ -186,6 +189,8 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, if (copy_from_user(buf, user_buf, len)) return -EFAULT; + buf[len] = '\0'; + if (strtobool(buf, &start)) return -EINVAL; From 0a590a38778dd77571a5309cc0d80a17b7d3427c Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 13 Oct 2017 14:55:48 +0300 Subject: [PATCH 0918/2177] ath10k: store coverage-class in case firmware is not booted This way, we can apply the values when the NIC does come up. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/hw.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index 07df7c6bc05b..88955bbe20bd 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -604,8 +604,13 @@ static void ath10k_hw_qca988x_set_coverage_class(struct ath10k *ar, /* Only modify registers if the core is started. */ if ((ar->state != ATH10K_STATE_ON) && - (ar->state != ATH10K_STATE_RESTARTED)) + (ar->state != ATH10K_STATE_RESTARTED)) { + spin_lock_bh(&ar->data_lock); + /* Store config value for when radio boots up */ + ar->fw_coverage.coverage_class = value; + spin_unlock_bh(&ar->data_lock); goto unlock; + } /* Retrieve the current values of the two registers that need to be * adjusted. @@ -637,7 +642,7 @@ static void ath10k_hw_qca988x_set_coverage_class(struct ath10k *ar, ar->fw_coverage.reg_ack_cts_timeout_orig = timeout_reg; ar->fw_coverage.reg_phyclk = phyclk_reg; - /* Calculat new value based on the (original) firmware calculation. */ + /* Calculate new value based on the (original) firmware calculation. */ slottime_reg = ar->fw_coverage.reg_slottime_orig; timeout_reg = ar->fw_coverage.reg_ack_cts_timeout_orig; From 1188e2a9ef223f5c670301d54a6f65d223a87582 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 14:04:30 +0200 Subject: [PATCH 0919/2177] cfg80211: don't print log output for building shipped-certs Building an allmodconfig kernel with 'make -s' now prints a single line: GEN net/wireless/shipped-certs.c Using '$(kecho)' here will skip the output with 'make -s' but otherwise keeps printing it, which is consistent with how we handle all the other output. Fixes: 90a53e4432b1 ("cfg80211: implement regdb signature checking") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/wireless/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 219baea57e4e..e585f3f71f77 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -23,7 +23,7 @@ cfg80211-y += extra-certs.o endif $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) - @echo " GEN $@" + @$(kecho) " GEN $@" @echo '#include "reg.h"' > $@ @echo 'const u8 shipped_regdb_certs[] = {' >> $@ @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done @@ -32,7 +32,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) - @echo " GEN $@" + @$(kecho) " GEN $@" @echo '#include "reg.h"' > $@ @echo 'const u8 extra_regdb_certs[] = {' >> $@ @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done From 88230ef1f31bf2d8fcf42c20e5743ff4b3618a29 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 14:04:31 +0200 Subject: [PATCH 0920/2177] cfg80211: fix CFG80211_EXTRA_REGDB_KEYDIR typo The missing CONFIG_ prefix means this macro is never defined, leading to a possible Kbuild warning: net/wireless/reg.c:666:20: error: 'load_keys_from_buffer' defined but not used [-Werror=unused-function] static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen) When we use the correct symbol, the warning also goes away. Fixes: 90a53e4432b1 ("cfg80211: implement regdb signature checking") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 58319c82ecb3..3871998059de 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -723,7 +723,7 @@ static int __init load_builtin_regdb_keys(void) #ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len); #endif -#ifdef CFG80211_EXTRA_REGDB_KEYDIR +#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0') load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len); #endif From b1b1ae2c1c150f8db5d3523c74e81eaf8cae5cbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Oct 2017 15:26:01 +0300 Subject: [PATCH 0921/2177] mac80211: don't track HT capability changes The code here (more or less accidentally) tracks the HT capability of the AP when connected, and we found at least one AP that erroneously toggles its 20/40 capability bit when changing between 20/40 MHz. The connection to the AP is then broken because we set the 40 MHz disable flag based on this, as soon as it switches to 20 MHz, but because the flag then changed, we disconnect. I'd be inclined to just ignore this issue, since we then reconnect while the AP is in 20 MHz mode and never use 40 MHz with it again, but this code is a bit strange anyway - we don't use the capabilities for anything else. Change the code to simply not track the HT capabilities at all, which assumes that the AP at least sets 20/40 capability when operating in 40 MHz (or higher). If not, rate scaling might end up using only the narrower bandwidth. The new behaviour also mirrors what VHT does, where we only check the VHT operation. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ee5ca1bc5a20..e4ededa1909d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -145,7 +145,6 @@ static u32 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_channel *channel, - const struct ieee80211_ht_cap *ht_cap, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, struct cfg80211_chan_def *chandef, bool tracking) @@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, chandef->center_freq1 = channel->center_freq; chandef->center_freq2 = 0; - if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { + if (!ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } chandef->width = NL80211_CHAN_WIDTH_20; - if (!(ht_cap->cap_info & - cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) { - ret = IEEE80211_STA_DISABLE_40MHZ; - vht_chandef = *chandef; - goto out; - } - ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ @@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, - ht_cap, ht_oper, vht_oper, + ht_oper, vht_oper, &chandef, true); /* @@ -4312,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, - ht_cap, ht_oper, vht_oper, + ht_oper, vht_oper, &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), From 14c04493cb77bc38404dbcb39d5ccbb667831ad7 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:17 +0200 Subject: [PATCH 0922/2177] tipc: add ability to order and receive topology events in driver As preparation for introducing communication groups, we add the ability to issue topology subscriptions and receive topology events from kernel space. This will make it possible for group member sockets to keep track of other group members. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/core.h | 5 ++ net/tipc/msg.h | 1 + net/tipc/server.c | 121 +++++++++++++++++++++++++++++++++++----------- net/tipc/server.h | 5 +- net/tipc/socket.c | 32 +++++++----- 5 files changed, 124 insertions(+), 40 deletions(-) diff --git a/net/tipc/core.h b/net/tipc/core.h index 5cc5398be722..964342689f2c 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline struct tipc_server *tipc_topsrv(struct net *net) +{ + return tipc_net(net)->topsrv; +} + static inline unsigned int tipc_hashfn(u32 addr) { return addr & (NODE_HTABLE_SIZE - 1); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c843fd2bc48d..d058b1c464e9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -78,6 +78,7 @@ struct plist; #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 #define SOCK_WAKEUP 14 /* pseudo user */ +#define TOP_SRV 15 /* pseudo user */ /* * Message header sizes diff --git a/net/tipc/server.c b/net/tipc/server.c index 3cd6402e812c..713077536d0c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -36,6 +36,8 @@ #include "server.h" #include "core.h" #include "socket.h" +#include "addr.h" +#include "msg.h" #include #include @@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); sock_release(sock); con->sock = NULL; - - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); } - + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); tipc_clean_outqueues(con); kfree(con); } @@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - tipc_unregister_callbacks(con); + if (con->sock) + tipc_unregister_callbacks(con); if (con->conid) s->tipc_conn_release(con->conid, con->usr_data); @@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con) * are harmless for us here as we have already deleted this * connection from server connection list. */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); - + if (con->sock) + kernel_sock_shutdown(con->sock, SHUT_RDWR); conn_put(con); } } @@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, + u32 lower, u32 upper, int *conid) +{ + struct tipc_subscriber *scbr; + struct tipc_subscr sub; + struct tipc_server *s; + struct tipc_conn *con; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = TIPC_SUB_PORTS; + *(u32 *)&sub.usr_handle = port; + + con = tipc_alloc_conn(tipc_topsrv(net)); + if (!con) + return false; + + *conid = con->conid; + s = con->server; + scbr = s->tipc_conn_new(*conid); + if (!scbr) { + tipc_close_conn(con); + return false; + } + + con->usr_data = scbr; + con->sock = NULL; + s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub)); + return true; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + tipc_close_conn(con); + conn_put(con); +} + +static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + static void tipc_send_to_sock(struct tipc_conn *con) { - int count = 0; struct tipc_server *s = con->server; struct outqueue_entry *e; + struct tipc_event *evt; struct msghdr msg; + int count = 0; int ret; spin_lock_bh(&con->outqueue_lock); while (test_bit(CF_CONNECTED, &con->flags)) { - e = list_entry(con->outqueue.next, struct outqueue_entry, - list); + e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == &con->outqueue) break; + spin_unlock_bh(&con->outqueue_lock); - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; - - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); + if (con->sock) { + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + } else { + evt = e->iov.iov_base; + tipc_send_kern_top_evt(s->net, evt); } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; - } - /* Don't starve users filling buffers */ if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); diff --git a/net/tipc/server.h b/net/tipc/server.h index 34f8055afa3b..2113c9192633 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -83,13 +83,16 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, + u32 lower, u32 upper, int *conid); +void tipc_topsrv_kern_unsubscr(struct net *net, int conid); + /** * tipc_conn_terminate - terminate connection with server * * Note: Must call it in process context since it might sleep */ void tipc_conn_terminate(struct tipc_server *s, int conid); - int tipc_server_start(struct tipc_server *s); void tipc_server_stop(struct tipc_server *s); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d50edd6e0019..9a7e7b5cf23f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -896,6 +896,10 @@ exit: kfree_skb(skb); } +static void tipc_sk_top_evt(struct tipc_sock *tsk, struct tipc_event *evt) +{ +} + /** * tipc_sendmsg - send message in connectionless manner * @sock: socket structure @@ -1671,20 +1675,24 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); unsigned int limit = rcvbuf_limit(sk, skb); int err = TIPC_OK; - int usr = msg_user(hdr); - u32 onode; - if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb, xmitq); - return false; - } - - if (unlikely(usr == SOCK_WAKEUP)) { - onode = msg_orignode(hdr); + if (unlikely(!msg_isdata(hdr))) { + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_proto_rcv(tsk, skb, xmitq); + return false; + case SOCK_WAKEUP: + u32_del(&tsk->cong_links, msg_orignode(hdr)); + tsk->cong_link_cnt--; + sk->sk_write_space(sk); + break; + case TOP_SRV: + tipc_sk_top_evt(tsk, (void *)msg_data(hdr)); + break; + default: + break; + } kfree_skb(skb); - u32_del(&tsk->cong_links, onode); - tsk->cong_link_cnt--; - sk->sk_write_space(sk); return false; } From 23998835be98a6842e5698fa1824f404c7de850d Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:18 +0200 Subject: [PATCH 0923/2177] tipc: improve address sanity check in tipc_connect() The address given to tipc_connect() is not completely sanity checked, under the assumption that this will be done later in the function __tipc_sendmsg() when the address is used there. However, the latter functon will in the next commits serve as caller to several other send functions, so we want to move the corresponding sanity check there to the beginning of that function, before we possibly need to grab the address stored by tipc_connect(). We must therefore be able to trust that this address already has been thoroughly checked. We do this in this commit. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9a7e7b5cf23f..7659e792ecdb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1911,28 +1911,27 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int previous; int res = 0; + if (destlen != sizeof(struct sockaddr_tipc)) + return -EINVAL; + lock_sock(sk); + if (dst->family == AF_UNSPEC) { + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); + if (!tipc_sk_type_connectionless(sk)) + res = -EINVAL; + goto exit; + } else if (dst->family != AF_TIPC) { + res = -EINVAL; + } + if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) + res = -EINVAL; + if (res) + goto exit; + /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { - if (dst->family == AF_UNSPEC) { - memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); - } else if (destlen != sizeof(struct sockaddr_tipc)) { - res = -EINVAL; - } else { - memcpy(&tsk->peer, dest, destlen); - } - goto exit; - } - - /* - * Reject connection attempt using multicast address - * - * Note: send_msg() validates the rest of the address fields, - * so there's no need to do it here - */ - if (dst->addrtype == TIPC_ADDR_MCAST) { - res = -EINVAL; + memcpy(&tsk->peer, dest, destlen); goto exit; } From 38077b8ef831daba55913f7e24732b062d0bdebb Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:19 +0200 Subject: [PATCH 0924/2177] tipc: add ability to obtain node availability status from other files In the coming commits, functions at the socket level will need the ability to read the availability status of a given node. We therefore introduce a new function for this purpose, while renaming the existing static function currently having the wanted name. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 26 +++++++++++++++++++++----- net/tipc/node.h | 1 + 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 198dbc7adbe1..6cc1ae600820 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -157,7 +157,7 @@ static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static void tipc_node_put(struct tipc_node *node); -static bool tipc_node_is_up(struct tipc_node *n); +static bool node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, *slot1 = i; } - if (!tipc_node_is_up(n)) { + if (!node_is_up(n)) { if (tipc_link_peer_is_down(l)) tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); @@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_sk_rcv(n->net, &le->inputq); } -static bool tipc_node_is_up(struct tipc_node *n) +static bool node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } +bool tipc_node_is_up(struct net *net, u32 addr) +{ + struct tipc_node *n; + bool retval = false; + + if (in_own_node(net, addr)) + return true; + + n = tipc_node_find(net, addr); + if (!n) + return false; + retval = node_is_up(n); + tipc_node_put(n); + return retval; +} + void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *b, u16 capabilities, u32 signature, @@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) goto attr_msg_full; - if (tipc_node_is_up(node)) + if (node_is_up(node)) if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) goto attr_msg_full; @@ -1249,7 +1265,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) dst = n->addr; if (in_own_node(net, dst)) continue; - if (!tipc_node_is_up(n)) + if (!node_is_up(n)) continue; txskb = pskb_copy(skb, GFP_ATOMIC); if (!txskb) diff --git a/net/tipc/node.h b/net/tipc/node.h index 898c22916984..8db59feb122f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -76,6 +76,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +bool tipc_node_is_up(struct net *net, u32 addr); u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); From 64ac5f5977df5b276374fb2f051082129f5cdb22 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:20 +0200 Subject: [PATCH 0925/2177] tipc: refactor function filter_rcv() In the following commits we will need to handle multiple incoming and rejected/returned buffers in the function socket.c::filter_rcv(). As a preparation for this, we generalize the function by handling buffer queues instead of individual buffers. We also introduce a help function tipc_skb_reject(), and rename filter_rcv() to tipc_sk_filter_rcv() in line with other functions in socket.c. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 7 ++ net/tipc/msg.h | 2 + net/tipc/socket.c | 165 +++++++++++++++++++++++----------------------- 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 17146c16ee2d..1649d456e22d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -666,3 +666,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, } kfree_skb(skb); } + +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + __skb_queue_tail(xmitq, skb); +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d058b1c464e9..be3e38aa9dd2 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -819,6 +819,8 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7659e792ecdb..bc226f5a1be3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -111,7 +111,7 @@ struct tipc_sock { struct rcu_head rcu; }; -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); @@ -453,7 +453,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_shutdown = 0; - sk->sk_backlog_rcv = tipc_backlog_rcv; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; @@ -850,16 +850,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, } /** - * tipc_sk_proto_rcv - receive a connection mng protocol message + * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { - struct sock *sk = &tsk->sk; - u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); + u32 onode = tsk_own_node(tsk); + struct sock *sk = &tsk->sk; int mtyp = msg_type(hdr); bool conn_cong; @@ -1536,14 +1536,41 @@ static void tipc_sock_destruct(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); } +static void tipc_sk_proto_rcv(struct sock *sk, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + return; + case SOCK_WAKEUP: + u32_del(&tsk->cong_links, msg_orignode(hdr)); + tsk->cong_link_cnt--; + sk->sk_write_space(sk); + break; + case TOP_SRV: + tipc_sk_top_evt(tsk, (void *)msg_data(hdr)); + break; + default: + break; + } + + kfree_skb(skb); +} + /** - * filter_connect - Handle all incoming messages for a connection-based socket + * tipc_filter_connect - Handle incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns true if everything ok, false otherwise */ -static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -1657,7 +1684,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) } /** - * filter_rcv - validate incoming message + * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. * @@ -1666,75 +1693,49 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Called with socket lock already taken * - * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { + bool sk_conn = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); - unsigned int limit = rcvbuf_limit(sk, skb); - int err = TIPC_OK; + struct net *net = sock_net(sk); + struct sk_buff_head inputq; + int limit, err = TIPC_OK; - if (unlikely(!msg_isdata(hdr))) { - switch (msg_user(hdr)) { - case CONN_MANAGER: - tipc_sk_proto_rcv(tsk, skb, xmitq); - return false; - case SOCK_WAKEUP: - u32_del(&tsk->cong_links, msg_orignode(hdr)); - tsk->cong_link_cnt--; - sk->sk_write_space(sk); - break; - case TOP_SRV: - tipc_sk_top_evt(tsk, (void *)msg_data(hdr)); - break; - default: - break; - } - kfree_skb(skb); - return false; - } - - /* Drop if illegal message type */ - if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { - kfree_skb(skb); - return false; - } - - /* Reject if wrong message type for current socket state */ - if (tipc_sk_type_connectionless(sk)) { - if (msg_connected(hdr)) { - err = TIPC_ERR_NO_PORT; - goto reject; - } - } else if (unlikely(!filter_connect(tsk, skb))) { - err = TIPC_ERR_NO_PORT; - goto reject; - } - - /* Reject message if there isn't room to queue it */ - if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { - err = TIPC_ERR_OVERLOAD; - goto reject; - } - - /* Enqueue message */ TIPC_SKB_CB(skb)->bytes_read = 0; - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); + __skb_queue_head_init(&inputq); + __skb_queue_tail(&inputq, skb); - sk->sk_data_ready(sk); - return true; + if (unlikely(!msg_isdata(hdr))) + tipc_sk_proto_rcv(sk, &inputq, xmitq); + else if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) + return kfree_skb(skb); -reject: - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) - __skb_queue_tail(xmitq, skb); - return false; + /* Validate and add to receive buffer if there is space */ + while ((skb = __skb_dequeue(&inputq))) { + hdr = buf_msg(skb); + limit = rcvbuf_limit(sk, skb); + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + (!sk_conn && msg_connected(hdr))) + err = TIPC_ERR_NO_PORT; + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) + err = TIPC_ERR_OVERLOAD; + + if (unlikely(err)) { + tipc_skb_reject(net, err, skb, xmitq); + err = TIPC_OK; + continue; + } + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk); + } } /** - * tipc_backlog_rcv - handle incoming message from backlog queue + * tipc_sk_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @skb: message * @@ -1742,27 +1743,25 @@ reject: * * Returns 0 */ -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - unsigned int truesize = skb->truesize; + unsigned int before = sk_rmem_alloc_get(sk); struct sk_buff_head xmitq; u32 dnode, selector; + unsigned int added; __skb_queue_head_init(&xmitq); - if (likely(filter_rcv(sk, skb, &xmitq))) { - atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); - return 0; + tipc_sk_filter_rcv(sk, skb, &xmitq); + added = sk_rmem_alloc_get(sk) - before; + atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); + + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); } - - if (skb_queue_empty(&xmitq)) - return 0; - - /* Send response/rejected message */ - skb = __skb_dequeue(&xmitq); - dnode = msg_destnode(buf_msg(skb)); - selector = msg_origport(buf_msg(skb)); - tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1794,7 +1793,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb, xmitq); + tipc_sk_filter_rcv(sk, skb, xmitq); continue; } From f70d37b796241f617107d5585ee96a7e1b660b63 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:21 +0200 Subject: [PATCH 0926/2177] tipc: add new function for sending multiple small messages We see an increasing need to send multiple single-buffer messages of TIPC_SYSTEM_IMPORTANCE to different individual destination nodes. Instead of looping over the send queue and sending each buffer individually, as we do now, we add a new help function tipc_node_distr_xmit() to do this. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 16 ++++++++++++++++ net/tipc/node.h | 1 + net/tipc/socket.c | 14 ++------------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6cc1ae600820..89f8ac73bf65 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1254,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations + * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected + */ +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + u32 selector, dnode; + + while ((skb = __skb_dequeue(xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, selector); + } + return 0; +} + void tipc_node_broadcast(struct net *net, struct sk_buff *skb) { struct sk_buff *txskb; diff --git a/net/tipc/node.h b/net/tipc/node.h index 8db59feb122f..df2f2197c4ad 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -68,6 +68,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bc226f5a1be3..c7c674934474 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1740,14 +1740,11 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, * @skb: message * * Caller must hold socket lock - * - * Returns 0 */ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int before = sk_rmem_alloc_get(sk); struct sk_buff_head xmitq; - u32 dnode, selector; unsigned int added; __skb_queue_head_init(&xmitq); @@ -1757,11 +1754,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); /* Send pending response/rejected messages, if any */ - while ((skb = __skb_dequeue(&xmitq))) { - selector = msg_origport(buf_msg(skb)); - dnode = msg_destnode(buf_msg(skb)); - tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); - } + tipc_node_distr_xmit(sock_net(sk), &xmitq); return 0; } @@ -1840,10 +1833,7 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) spin_unlock_bh(&sk->sk_lock.slock); } /* Send pending response/rejected messages, if any */ - while ((skb = __skb_dequeue(&xmitq))) { - dnode = msg_destnode(buf_msg(skb)); - tipc_node_xmit_skb(net, skb, dnode, dport); - } + tipc_node_distr_xmit(sock_net(sk), &xmitq); sock_put(sk); continue; } From a80ae5306a7346d4e52f59462878beb8362f4bbd Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:22 +0200 Subject: [PATCH 0927/2177] tipc: improve destination linked list We often see a need for a linked list of destination identities, sometimes containing a port number, sometimes a node identity, and sometimes both. The currently defined struct u32_list is not generic enough to cover all cases, so we extend it to contain two u32 integers and rename it to struct tipc_dest_list. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 18 ++++----- net/tipc/name_table.c | 89 ++++++++++++++++++++++--------------------- net/tipc/name_table.h | 22 +++++++---- net/tipc/socket.c | 12 +++--- 4 files changed, 74 insertions(+), 67 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a140dd4a84af..329325bd553e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_nlist *dests, u16 *cong_link_cnt) { + struct tipc_dest *dst, *tmp; struct sk_buff_head _pkts; - struct u32_item *n, *tmp; - u32 dst, selector; + u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); skb_queue_head_init(&_pkts); - list_for_each_entry_safe(n, tmp, &dests->list, list) { - dst = n->value; - if (!tipc_msg_pskb_copy(dst, pkts, &_pkts)) + list_for_each_entry_safe(dst, tmp, &dests->list, list) { + dnode = dst->node; + if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) return -ENOMEM; /* Any other return value than -ELINKCONG is ignored */ - if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG) + if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) (*cong_link_cnt)++; } return 0; @@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = true; - else if (u32_push(&nl->list, node)) + else if (tipc_dest_push(&nl->list, node, 0)) nl->remote++; } @@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = false; - else if (u32_del(&nl->list, node)) + else if (tipc_dest_del(&nl->list, node, 0)) nl->remote--; } void tipc_nlist_purge(struct tipc_nlist *nl) { - u32_list_purge(&nl->list); + tipc_dest_list_purge(&nl->list); nl->remote = 0; nl->local = 0; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bd0aac87b41a..76bd2777baaf 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -634,7 +634,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, info = sseq->info; list_for_each_entry(publ, &info->node_list, node_list) { if (publ->scope <= limit) - u32_push(dports, publ->ref); + tipc_dest_push(dports, 0, publ->ref); } if (info->cluster_list_size != info->node_list_size) @@ -1057,78 +1057,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -bool u32_find(struct list_head *l, u32 value) +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return true; + list_for_each_entry(dst, l, list) { + if (dst->value != value) + continue; + return dst; } - return false; + return NULL; } -bool u32_push(struct list_head *l, u32 value) +bool tipc_dest_push(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return false; - } - item = kmalloc(sizeof(*item), GFP_ATOMIC); - if (unlikely(!item)) + if (tipc_dest_find(l, node, port)) return false; - item->value = value; - list_add(&item->list, l); + dst = kmalloc(sizeof(*dst), GFP_ATOMIC); + if (unlikely(!dst)) + return false; + dst->value = value; + list_add(&dst->list, l); return true; } -u32 u32_pop(struct list_head *l) +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port) { - struct u32_item *item; - u32 value = 0; + struct tipc_dest *dst; if (list_empty(l)) - return 0; - item = list_first_entry(l, typeof(*item), list); - value = item->value; - list_del(&item->list); - kfree(item); - return value; + return false; + dst = list_first_entry(l, typeof(*dst), list); + if (port) + *port = dst->port; + if (node) + *node = dst->node; + list_del(&dst->list); + kfree(dst); + return true; } -bool u32_del(struct list_head *l, u32 value) +bool tipc_dest_del(struct list_head *l, u32 node, u32 port) { - struct u32_item *item, *tmp; + struct tipc_dest *dst; - list_for_each_entry_safe(item, tmp, l, list) { - if (item->value != value) - continue; - list_del(&item->list); - kfree(item); - return true; - } - return false; + dst = tipc_dest_find(l, node, port); + if (!dst) + return false; + list_del(&dst->list); + kfree(dst); + return true; } -void u32_list_purge(struct list_head *l) +void tipc_dest_list_purge(struct list_head *l) { - struct u32_item *item, *tmp; + struct tipc_dest *dst, *tmp; - list_for_each_entry_safe(item, tmp, l, list) { - list_del(&item->list); - kfree(item); + list_for_each_entry_safe(dst, tmp, l, list) { + list_del(&dst->list); + kfree(dst); } } -int u32_list_len(struct list_head *l) +int tipc_dest_list_len(struct list_head *l) { - struct u32_item *item; + struct tipc_dest *dst; int i = 0; - list_for_each_entry(item, l, list) { + list_for_each_entry(dst, l, list) { i++; } return i; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 6ebdeb1d84a5..d121175a92b5 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -120,16 +120,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); -struct u32_item { +struct tipc_dest { struct list_head list; - u32 value; + union { + struct { + u32 port; + u32 node; + }; + u64 value; + }; }; -bool u32_push(struct list_head *l, u32 value); -u32 u32_pop(struct list_head *l); -bool u32_find(struct list_head *l, u32 value); -bool u32_del(struct list_head *l, u32 value); -void u32_list_purge(struct list_head *l); -int u32_list_len(struct list_head *l); +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); +bool tipc_dest_push(struct list_head *l, u32 node, u32 port); +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); +bool tipc_dest_del(struct list_head *l, u32 node, u32 port); +void tipc_dest_list_purge(struct list_head *l); +int tipc_dest_list_len(struct list_head *l); #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c7c674934474..daf7c4df4531 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -565,7 +565,7 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ release_sock(sk); - u32_list_purge(&tsk->cong_links); + tipc_dest_list_purge(&tsk->cong_links); tsk->cong_link_cnt = 0; call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; @@ -826,8 +826,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), msg_nameupper(msg), scope, &dports); - portid = u32_pop(&dports); - for (; portid; portid = u32_pop(&dports)) { + while (tipc_dest_pop(&dports, NULL, &portid)) { _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); @@ -1000,7 +999,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) } /* Block or return if destination link is congested */ - rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode)); + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(clinks, dnode, 0)); if (unlikely(rc)) return rc; @@ -1012,7 +1012,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - u32_push(clinks, dnode); + tipc_dest_push(clinks, dnode, 0); tsk->cong_link_cnt++; rc = 0; } @@ -1549,7 +1549,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, tipc_sk_conn_proto_rcv(tsk, skb, xmitq); return; case SOCK_WAKEUP: - u32_del(&tsk->cong_links, msg_orignode(hdr)); + tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); tsk->cong_link_cnt--; sk->sk_write_space(sk); break; From 75da2163dbb6af9f2dce1d80056d11d290dd19a5 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:23 +0200 Subject: [PATCH 0928/2177] tipc: introduce communication groups As a preparation for introducing flow control for multicast and datagram messaging we need a more strictly defined framework than we have now. A socket must be able keep track of exactly how many and which other sockets it is allowed to communicate with at any moment, and keep the necessary state for those. We therefore introduce a new concept we have named Communication Group. Sockets can join a group via a new setsockopt() call TIPC_GROUP_JOIN. The call takes four parameters: 'type' serves as group identifier, 'instance' serves as an logical member identifier, and 'scope' indicates the visibility of the group (node/cluster/zone). Finally, 'flags' makes it possible to set certain properties for the member. For now, there is only one flag, indicating if the creator of the socket wants to receive a copy of broadcast or multicast messages it is sending via the socket, and if wants to be eligible as destination for its own anycasts. A group is closed, i.e., sockets which have not joined a group will not be able to send messages to or receive messages from members of the group, and vice versa. Any member of a group can send multicast ('group broadcast') messages to all group members, optionally including itself, using the primitive send(). The messages are received via the recvmsg() primitive. A socket can only be member of one group at a time. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 14 ++ net/tipc/Makefile | 2 +- net/tipc/group.c | 404 ++++++++++++++++++++++++++++++++++++++ net/tipc/group.h | 64 ++++++ net/tipc/link.c | 3 +- net/tipc/msg.h | 50 ++++- net/tipc/name_table.c | 44 +++-- net/tipc/name_table.h | 3 + net/tipc/node.h | 3 +- net/tipc/socket.c | 209 +++++++++++++++++--- 10 files changed, 748 insertions(+), 48 deletions(-) create mode 100644 net/tipc/group.c create mode 100644 net/tipc/group.h diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 5351b08c897a..5f7b2c4a09ab 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -231,6 +231,20 @@ struct sockaddr_tipc { #define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */ #define TIPC_MCAST_BROADCAST 133 /* Default: TIPC selects. No arg */ #define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */ +#define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */ +#define TIPC_GROUP_LEAVE 136 /* No argument */ + +/* + * Flag values + */ +#define TIPC_GROUP_LOOPBACK 0x1 /* Receive copy of sent msg when match */ + +struct tipc_group_req { + __u32 type; /* group id */ + __u32 instance; /* member id */ + __u32 scope; /* zone/cluster/node */ + __u32 flags; +}; /* * Maximum sizes of TIPC bearer-related names (including terminating NULL) diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 31b9f9c52974..a3af73ec0b78 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -8,7 +8,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - server.o socket.o + server.o socket.o group.o tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/group.c b/net/tipc/group.c new file mode 100644 index 000000000000..3f0e1ce1e3b9 --- /dev/null +++ b/net/tipc/group.c @@ -0,0 +1,404 @@ +/* + * net/tipc/group.c: TIPC group messaging code + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "addr.h" +#include "group.h" +#include "bcast.h" +#include "server.h" +#include "msg.h" +#include "socket.h" +#include "node.h" +#include "name_table.h" +#include "subscr.h" + +#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) +#define ADV_IDLE ADV_UNIT + +enum mbr_state { + MBR_QUARANTINED, + MBR_DISCOVERED, + MBR_JOINING, + MBR_PUBLISHED, + MBR_JOINED, + MBR_LEAVING +}; + +struct tipc_member { + struct rb_node tree_node; + struct list_head list; + u32 node; + u32 port; + enum mbr_state state; + u16 bc_rcv_nxt; +}; + +struct tipc_group { + struct rb_root members; + struct tipc_nlist dests; + struct net *net; + int subid; + u32 type; + u32 instance; + u32 domain; + u32 scope; + u32 portid; + u16 member_cnt; + u16 bc_snd_nxt; + bool loopback; +}; + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq); + +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) +{ + return grp->bc_snd_nxt; +} + +static bool tipc_group_is_receiver(struct tipc_member *m) +{ + return m && m->state >= MBR_JOINED; +} + +int tipc_group_size(struct tipc_group *grp) +{ + return grp->member_cnt; +} + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq) +{ + struct tipc_group *grp; + u32 type = mreq->type; + + grp = kzalloc(sizeof(*grp), GFP_ATOMIC); + if (!grp) + return NULL; + tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + grp->members = RB_ROOT; + grp->net = net; + grp->portid = portid; + grp->domain = addr_domain(net, mreq->scope); + grp->type = type; + grp->instance = mreq->instance; + grp->scope = mreq->scope; + grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) + return grp; + kfree(grp); + return NULL; +} + +void tipc_group_delete(struct net *net, struct tipc_group *grp) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + list_del(&m->list); + kfree(m); + } + tipc_node_distr_xmit(net, &xmitq); + tipc_nlist_purge(&grp->dests); + tipc_topsrv_kern_unsubscr(net, grp->subid); + kfree(grp); +} + +struct tipc_member *tipc_group_find_member(struct tipc_group *grp, + u32 node, u32 port) +{ + struct rb_node *n = grp->members.rb_node; + u64 nkey, key = (u64)node << 32 | port; + struct tipc_member *m; + + while (n) { + m = container_of(n, struct tipc_member, tree_node); + nkey = (u64)m->node << 32 | m->port; + if (key < nkey) + n = n->rb_left; + else if (key > nkey) + n = n->rb_right; + else + return m; + } + return NULL; +} + +static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, + u32 node) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (m->node == node) + return m; + } + return NULL; +} + +static void tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) +{ + u64 nkey, key = (u64)m->node << 32 | m->port; + struct rb_node **n, *parent = NULL; + struct tipc_member *tmp; + + n = &grp->members.rb_node; + while (*n) { + tmp = container_of(*n, struct tipc_member, tree_node); + parent = *n; + tmp = container_of(parent, struct tipc_member, tree_node); + nkey = (u64)tmp->node << 32 | tmp->port; + if (key < nkey) + n = &(*n)->rb_left; + else if (key > nkey) + n = &(*n)->rb_right; + else + return; + } + rb_link_node(&m->tree_node, parent, n); + rb_insert_color(&m->tree_node, &grp->members); +} + +static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, + u32 node, u32 port, + int state) +{ + struct tipc_member *m; + + m = kzalloc(sizeof(*m), GFP_ATOMIC); + if (!m) + return NULL; + INIT_LIST_HEAD(&m->list); + m->node = node; + m->port = port; + grp->member_cnt++; + tipc_group_add_to_tree(grp, m); + tipc_nlist_add(&grp->dests, m->node); + m->state = state; + return m; +} + +void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) +{ + tipc_group_create_member(grp, node, port, MBR_DISCOVERED); +} + +static void tipc_group_delete_member(struct tipc_group *grp, + struct tipc_member *m) +{ + rb_erase(&m->tree_node, &grp->members); + grp->member_cnt--; + list_del_init(&m->list); + + /* If last member on a node, remove node from dest list */ + if (!tipc_group_find_node(grp, m->node)) + tipc_nlist_del(&grp->dests, m->node); + + kfree(m); +} + +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) +{ + return &grp->dests; +} + +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope) +{ + seq->type = grp->type; + seq->lower = grp->instance; + seq->upper = grp->instance; + *scope = grp->scope; +} + +void tipc_group_update_bc_members(struct tipc_group *grp) +{ + grp->bc_snd_nxt++; +} + +/* tipc_group_filter_msg() - determine if we should accept arriving message + */ +void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_member *m; + struct tipc_msg *hdr; + u32 node, port; + int mtyp; + + if (!skb) + return; + + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + node = msg_orignode(hdr); + port = msg_origport(hdr); + + if (!msg_in_group(hdr)) + goto drop; + + m = tipc_group_find_member(grp, node, port); + if (!tipc_group_is_receiver(m)) + goto drop; + + __skb_queue_tail(inputq, skb); + + m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; + return; +drop: + kfree_skb(skb); +} + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + + skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, + m->node, tipc_own_addr(grp->net), + m->port, grp->portid, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + if (mtyp == GRP_JOIN_MSG) + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + __skb_queue_tail(xmitq, skb); +} + +void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) +{ + u32 node = msg_orignode(hdr); + u32 port = msg_origport(hdr); + struct tipc_member *m; + + if (!grp) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (msg_type(hdr)) { + case GRP_JOIN_MSG: + if (!m) + m = tipc_group_create_member(grp, node, port, + MBR_QUARANTINED); + if (!m) + return; + m->bc_rcv_nxt = msg_grp_bc_syncpt(hdr); + + /* Wait until PUBLISH event is received */ + if (m->state == MBR_DISCOVERED) + m->state = MBR_JOINING; + else if (m->state == MBR_PUBLISHED) + m->state = MBR_JOINED; + return; + case GRP_LEAVE_MSG: + if (!m) + return; + + /* Wait until WITHDRAW event is received */ + if (m->state != MBR_LEAVING) { + m->state = MBR_LEAVING; + return; + } + /* Otherwise deliver already received WITHDRAW event */ + tipc_group_delete_member(grp, m); + return; + default: + pr_warn("Received unknown GROUP_PROTO message\n"); + } +} + +/* tipc_group_member_evt() - receive and handle a member up/down event + */ +void tipc_group_member_evt(struct tipc_group *grp, + struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_event *evt = (void *)msg_data(hdr); + u32 node = evt->port.node; + u32 port = evt->port.ref; + struct tipc_member *m; + struct net *net; + u32 self; + + if (!grp) + goto drop; + + net = grp->net; + self = tipc_own_addr(net); + if (!grp->loopback && node == self && port == grp->portid) + goto drop; + + m = tipc_group_find_member(grp, node, port); + + if (evt->event == TIPC_PUBLISHED) { + if (!m) + m = tipc_group_create_member(grp, node, port, + MBR_DISCOVERED); + if (!m) + goto drop; + + /* Wait if JOIN message not yet received */ + if (m->state == MBR_DISCOVERED) + m->state = MBR_PUBLISHED; + else + m->state = MBR_JOINED; + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + } else if (evt->event == TIPC_WITHDRAWN) { + if (!m) + goto drop; + + /* Keep back event if more messages might be expected */ + if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) + m->state = MBR_LEAVING; + else + tipc_group_delete_member(grp, m); + } +drop: + kfree_skb(skb); +} diff --git a/net/tipc/group.h b/net/tipc/group.h new file mode 100644 index 000000000000..9bdf4479fc03 --- /dev/null +++ b/net/tipc/group.h @@ -0,0 +1,64 @@ +/* + * net/tipc/group.h: Include file for TIPC group unicast/multicast functions + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_GROUP_H +#define _TIPC_GROUP_H + +#include "core.h" + +struct tipc_group; +struct tipc_member; +struct tipc_msg; + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq); +void tipc_group_delete(struct net *net, struct tipc_group *grp); +void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope); +void tipc_group_filter_msg(struct tipc_group *grp, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_member_evt(struct tipc_group *grp, + struct sk_buff *skb, + struct sk_buff_head *xmitq); +void tipc_group_proto_rcv(struct tipc_group *grp, + struct tipc_msg *hdr, + struct sk_buff_head *xmitq); +void tipc_group_update_bc_members(struct tipc_group *grp); +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +int tipc_group_size(struct tipc_group *grp); +#endif diff --git a/net/tipc/link.c b/net/tipc/link.c index ac0144f532aa..bd25bff63925 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1046,11 +1046,12 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) { + if (unlikely(msg_mcast(hdr))) { skb_queue_tail(l->bc_rcvlink->inputq, skb); return true; } case CONN_MANAGER: + case GROUP_PROTOCOL: skb_queue_tail(inputq, skb); return true; case NAME_DISTRIBUTOR: diff --git a/net/tipc/msg.h b/net/tipc/msg.h index be3e38aa9dd2..dad400935405 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014-2015 Ericsson AB + * Copyright (c) 2000-2007, 2014-2017 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -61,10 +61,11 @@ struct plist; /* * Payload message types */ -#define TIPC_CONN_MSG 0 -#define TIPC_MCAST_MSG 1 -#define TIPC_NAMED_MSG 2 -#define TIPC_DIRECT_MSG 3 +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 +#define TIPC_GRP_BCAST_MSG 4 /* * Internal message users @@ -73,6 +74,7 @@ struct plist; #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 +#define GROUP_PROTOCOL 9 #define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 @@ -87,6 +89,7 @@ struct plist; #define BASIC_H_SIZE 32 /* Basic payload message */ #define NAMED_H_SIZE 40 /* Named payload message */ #define MCAST_H_SIZE 44 /* Multicast payload message */ +#define GROUP_H_SIZE 44 /* Group payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ @@ -252,6 +255,11 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 29, 0x7, n); } +static inline int msg_in_group(struct tipc_msg *m) +{ + return (msg_type(m) == TIPC_GRP_BCAST_MSG); +} + static inline u32 msg_named(struct tipc_msg *m) { return msg_type(m) == TIPC_NAMED_MSG; @@ -259,7 +267,9 @@ static inline u32 msg_named(struct tipc_msg *m) static inline u32 msg_mcast(struct tipc_msg *m) { - return msg_type(m) == TIPC_MCAST_MSG; + int mtyp = msg_type(m); + + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) @@ -514,6 +524,12 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 +/* + * Group protocol message types + */ +#define GRP_JOIN_MSG 0 +#define GRP_LEAVE_MSG 1 + /* * Word 1 */ @@ -795,6 +811,28 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } +static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +/* Word 10 + */ +static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 10, 16, 0xffff); +} + +static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 10, 16, 0xffff, n); +} + static inline bool msg_peer_link_is_up(struct tipc_msg *m) { if (likely(msg_user(m) != LINK_PROTOCOL)) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 76bd2777baaf..114d72bab827 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -43,6 +43,7 @@ #include "bcast.h" #include "addr.h" #include "node.h" +#include "group.h" #include #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -596,18 +597,6 @@ not_found: return ref; } -/** - * tipc_nametbl_mc_translate - find multicast destinations - * - * Creates list of all local ports that overlap the given multicast address; - * also determines if any off-node ports overlap. - * - * Note: Publications with a scope narrower than 'limit' are ignored. - * (i.e. local node-scope publications mustn't receive messages arriving - * from another node, even if the multcast link brought it here) - * - * Returns non-zero if any off-node ports overlap - */ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, u32 limit, struct list_head *dports) { @@ -679,6 +668,37 @@ exit: rcu_read_unlock(); } +/* tipc_nametbl_build_group - build list of communication group members + */ +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain) +{ + struct sub_seq *sseq, *stop; + struct name_info *info; + struct publication *p; + struct name_seq *seq; + + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (!seq) + goto exit; + + spin_lock_bh(&seq->lock); + sseq = seq->sseqs; + stop = seq->sseqs + seq->first_free; + for (; sseq != stop; sseq++) { + info = sseq->info; + list_for_each_entry(p, &info->zone_list, zone_list) { + if (!tipc_in_scope(domain, p->node)) + continue; + tipc_group_add_member(grp, p->node, p->ref); + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); +} + /* * tipc_nametbl_publish - add name publication to network name tables */ diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index d121175a92b5..97646b17a4a2 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -40,6 +40,7 @@ struct tipc_subscription; struct tipc_plist; struct tipc_nlist; +struct tipc_group; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -101,6 +102,8 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, u32 limit, struct list_head *dports); +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, u32 upper, u32 domain, struct tipc_nlist *nodes); diff --git a/net/tipc/node.h b/net/tipc/node.h index df2f2197c4ad..acd58d23a70e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -48,7 +48,8 @@ enum { TIPC_BCAST_SYNCH = (1 << 1), TIPC_BCAST_STATE_NACK = (1 << 2), TIPC_BLOCK_FLOWCTL = (1 << 3), - TIPC_BCAST_RCAST = (1 << 4) + TIPC_BCAST_RCAST = (1 << 4), + TIPC_MCAST_GROUPS = (1 << 5) }; #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index daf7c4df4531..64bbf9d03629 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2016, Ericsson AB + * Copyright (c) 2001-2007, 2012-2017, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -45,6 +45,7 @@ #include "socket.h" #include "bcast.h" #include "netlink.h" +#include "group.h" #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ @@ -78,7 +79,7 @@ enum { * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links - * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @snt_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer * @peer: 'connected' peer for dgram/rdm * @node: hash table node @@ -109,6 +110,7 @@ struct tipc_sock { struct rhash_head node; struct tipc_mc_method mc_method; struct rcu_head rcu; + struct tipc_group *group; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -123,6 +125,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); +static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); @@ -559,6 +562,7 @@ static int tipc_release(struct socket *sock) __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; + tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -601,7 +605,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, res = tipc_sk_withdraw(tsk, 0, NULL); goto exit; } - + if (tsk->group) { + res = -EACCES; + goto exit; + } if (uaddr_len < sizeof(struct sockaddr_tipc)) { res = -EINVAL; goto exit; @@ -698,6 +705,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; u32 mask = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -718,8 +726,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, mask |= (POLLIN | POLLRDNORM); break; case TIPC_OPEN: - if (!tsk->cong_link_cnt) - mask |= POLLOUT; + if (!grp || tipc_group_size(grp)) + if (!tsk->cong_link_cnt) + mask |= POLLOUT; if (tipc_sk_type_connectionless(sk) && (!skb_queue_empty(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); @@ -757,6 +766,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_nlist dsts; int rc; + if (tsk->group) + return -EACCES; + /* Block or return if any destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); if (unlikely(rc)) @@ -793,6 +805,64 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, return rc ? rc : dlen; } +/** + * tipc_send_group_bcast - send message to all members in communication group + * @sk: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_nlist *dsts = tipc_group_dests(grp); + struct tipc_mc_method *method = &tsk->mc_method; + struct tipc_msg *hdr = &tsk->phdr; + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + int rc = -EHOSTUNREACH; + + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + + /* Block or return if any destination link is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_hdr_sz(hdr, MCAST_H_SIZE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_nameinst(hdr, 0); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_mcast_xmit(net, &pkts, method, dsts, + &tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Update broadcast sequence number */ + tipc_group_update_bc_members(tsk->group); + + return dlen; +} + /** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets * @arrvq: queue with arriving messages, to be cloned after destination lookup @@ -803,13 +873,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - struct tipc_msg *msg; - struct list_head dports; - u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - struct sk_buff_head tmpq; - uint hsz; + u32 self = tipc_own_addr(net); struct sk_buff *skb, *_skb; + u32 lower = 0, upper = ~0; + struct sk_buff_head tmpq; + u32 portid, oport, onode; + struct list_head dports; + struct tipc_msg *msg; + int hsz; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); @@ -818,14 +890,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { msg = buf_msg(skb); hsz = skb_headroom(skb) + msg_hdr_sz(msg); - - if (in_own_node(net, msg_orignode(msg))) + oport = msg_origport(msg); + onode = msg_orignode(msg); + if (onode == self) scope = TIPC_NODE_SCOPE; /* Create destination port list and message clones: */ - tipc_nametbl_mc_translate(net, - msg_nametype(msg), msg_namelower(msg), - msg_nameupper(msg), scope, &dports); + if (!msg_in_group(msg)) { + lower = msg_namelower(msg); + upper = msg_nameupper(msg); + } + tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, + scope, &dports); while (tipc_dest_pop(&dports, NULL, &portid)) { _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); if (_skb) { @@ -895,10 +971,6 @@ exit: kfree_skb(skb); } -static void tipc_sk_top_evt(struct tipc_sock *tsk, struct tipc_event *evt) -{ -} - /** * tipc_sendmsg - send message in connectionless manner * @sock: socket structure @@ -934,6 +1006,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; @@ -944,6 +1017,9 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; + if (unlikely(grp)) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (unlikely(!dest)) { dest = &tsk->peer; if (!syn || dest->family != AF_TIPC) @@ -1543,6 +1619,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, struct sk_buff *skb = __skb_dequeue(inputq); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); + struct tipc_group *grp = tsk->group; switch (msg_user(hdr)) { case CONN_MANAGER: @@ -1553,8 +1630,12 @@ static void tipc_sk_proto_rcv(struct sock *sk, tsk->cong_link_cnt--; sk->sk_write_space(sk); break; + case GROUP_PROTOCOL: + tipc_group_proto_rcv(grp, hdr, xmitq); + break; case TOP_SRV: - tipc_sk_top_evt(tsk, (void *)msg_data(hdr)); + tipc_group_member_evt(tsk->group, skb, xmitq); + skb = NULL; break; default: break; @@ -1699,6 +1780,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, { bool sk_conn = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; @@ -1710,15 +1792,19 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(!msg_isdata(hdr))) tipc_sk_proto_rcv(sk, &inputq, xmitq); - else if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) + else if (unlikely(msg_type(hdr) > TIPC_GRP_BCAST_MSG)) return kfree_skb(skb); + if (unlikely(grp)) + tipc_group_filter_msg(grp, &inputq, xmitq); + /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); limit = rcvbuf_limit(sk, skb); if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || - (!sk_conn && msg_connected(hdr))) + (!sk_conn && msg_connected(hdr)) || + (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) err = TIPC_ERR_OVERLOAD; @@ -1837,7 +1923,6 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) sock_put(sk); continue; } - /* No destination socket => dequeue skb if still there */ skb = tipc_skb_dequeue(inputq, dport); if (!skb) @@ -1905,6 +1990,11 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, lock_sock(sk); + if (tsk->group) { + res = -EINVAL; + goto exit; + } + if (dst->family == AF_UNSPEC) { memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); if (!tipc_sk_type_connectionless(sk)) @@ -2341,6 +2431,52 @@ void tipc_sk_rht_destroy(struct net *net) rhashtable_destroy(&tn->sk_rht); } +static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) +{ + struct net *net = sock_net(&tsk->sk); + u32 domain = addr_domain(net, mreq->scope); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_name_seq seq; + int rc; + + if (mreq->type < TIPC_RESERVED_TYPES) + return -EACCES; + if (grp) + return -EACCES; + grp = tipc_group_create(net, tsk->portid, mreq); + if (!grp) + return -ENOMEM; + tsk->group = grp; + msg_set_lookup_scope(hdr, mreq->scope); + msg_set_nametype(hdr, mreq->type); + msg_set_dest_droppable(hdr, true); + seq.type = mreq->type; + seq.lower = mreq->instance; + seq.upper = seq.lower; + tipc_nametbl_build_group(net, grp, mreq->type, domain); + rc = tipc_sk_publish(tsk, mreq->scope, &seq); + if (rc) + tipc_group_delete(net, grp); + return rc; +} + +static int tipc_sk_leave(struct tipc_sock *tsk) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_name_seq seq; + int scope; + + if (!grp) + return -EINVAL; + tipc_group_self(grp, &seq, &scope); + tipc_group_delete(net, grp); + tsk->group = NULL; + tipc_sk_withdraw(tsk, scope, &seq); + return 0; +} + /** * tipc_setsockopt - set socket option * @sock: socket structure @@ -2359,6 +2495,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group_req mreq; u32 value = 0; int res = 0; @@ -2374,9 +2511,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_CONN_TIMEOUT: if (ol < sizeof(value)) return -EINVAL; - res = get_user(value, (u32 __user *)ov); - if (res) - return res; + if (get_user(value, (u32 __user *)ov)) + return -EFAULT; + break; + case TIPC_GROUP_JOIN: + if (ol < sizeof(mreq)) + return -EINVAL; + if (copy_from_user(&mreq, ov, sizeof(mreq))) + return -EFAULT; break; default: if (ov || ol) @@ -2409,6 +2551,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; break; + case TIPC_GROUP_JOIN: + res = tipc_sk_join(tsk, &mreq); + break; + case TIPC_GROUP_LEAVE: + res = tipc_sk_leave(tsk); + break; default: res = -EINVAL; } @@ -2436,7 +2584,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - int len; + struct tipc_name_seq seq; + int len, scope; u32 value; int res; @@ -2470,6 +2619,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_GROUP_JOIN: + seq.type = 0; + if (tsk->group) + tipc_group_self(tsk->group, &seq, &scope); + value = seq.type; + break; default: res = -EINVAL; } From 31c82a2d9d51fccbb85cbd2be983eb115225301c Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:24 +0200 Subject: [PATCH 0929/2177] tipc: add second source address to recvmsg()/recvfrom() With group communication, it becomes important for a message receiver to identify not only from which socket (identfied by a node:port tuple) the message was sent, but also the logical identity (type:instance) of the sending member. We fix this by adding a second instance of struct sockaddr_tipc to the source address area when a message is read. The extra address struct is filled in with data found in the received message header (type,) and in the local member representation struct (instance.) Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 3 +++ net/tipc/msg.h | 1 + net/tipc/socket.c | 49 +++++++++++++++++++++++++++++++---------------- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 3f0e1ce1e3b9..beb214a3420c 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -61,6 +61,7 @@ struct tipc_member { struct list_head list; u32 node; u32 port; + u32 instance; enum mbr_state state; u16 bc_rcv_nxt; }; @@ -282,6 +283,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!tipc_group_is_receiver(m)) goto drop; + TIPC_SKB_CB(skb)->orig_member = m->instance; __skb_queue_tail(inputq, skb); m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; @@ -388,6 +390,7 @@ void tipc_group_member_evt(struct tipc_group *grp, m->state = MBR_PUBLISHED; else m->state = MBR_JOINED; + m->instance = evt->found_lower; tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); } else if (evt->event == TIPC_WITHDRAWN) { if (!m) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index dad400935405..e438716d2372 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -100,6 +100,7 @@ struct plist; struct tipc_skb_cb { u32 bytes_read; + u32 orig_member; struct sk_buff *tail; bool validated; u16 chain_imp; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 64bbf9d03629..25ecf1201527 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -62,6 +62,11 @@ enum { TIPC_CONNECTING = TCP_SYN_SENT, }; +struct sockaddr_pair { + struct sockaddr_tipc sock; + struct sockaddr_tipc member; +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -1222,26 +1227,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, } /** - * set_orig_addr - capture sender's address for received message + * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @msg: received message header + * @hdr: received message header * * Note: Address is not captured if not requested by receiver. */ -static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) +static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); + DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); + struct tipc_msg *hdr = buf_msg(skb); - if (addr) { - addr->family = AF_TIPC; - addr->addrtype = TIPC_ADDR_ID; - memset(&addr->addr, 0, sizeof(addr->addr)); - addr->addr.id.ref = msg_origport(msg); - addr->addr.id.node = msg_orignode(msg); - addr->addr.name.domain = 0; /* could leave uninitialized */ - addr->scope = 0; /* could leave uninitialized */ - m->msg_namelen = sizeof(struct sockaddr_tipc); - } + if (!srcaddr) + return; + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; + srcaddr->sock.scope = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) + return; + + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; + m->msg_namelen = sizeof(*srcaddr); } /** @@ -1432,7 +1449,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, } while (1); /* Collect msg meta data, including error code and rejected data */ - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (unlikely(rc)) goto exit; @@ -1526,7 +1543,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Collect msg meta data, incl. error code and rejected data */ if (!copied) { - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (rc) break; From ae236fb208a6fbbd2e7a6913385e8fb78ac807f8 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:25 +0200 Subject: [PATCH 0930/2177] tipc: receive group membership events via member socket Like with any other service, group members' availability can be subscribed for by connecting to be topology server. However, because the events arrive via a different socket than the member socket, there is a real risk that membership events my arrive out of synch with the actual JOIN/LEAVE action. I.e., it is possible to receive the first messages from a new member before the corresponding JOIN event arrives, just as it is possible to receive the last messages from a leaving member after the LEAVE event has already been received. Since each member socket is internally also subscribing for membership events, we now fix this problem by passing those events on to the user via the member socket. We leverage the already present member synch- ronization protocol to guarantee correct message/event order. An event is delivered to the user as an empty message where the two source addresses identify the new/lost member. Furthermore, we set the MSG_OOB bit in the message flags to mark it as an event. If the event is an indication about a member loss we also set the MSG_EOR bit, so it can be distinguished from a member addition event. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 1 + net/tipc/group.c | 60 ++++++++++++++++++++++++++++++--------- net/tipc/group.h | 2 ++ net/tipc/msg.h | 22 ++++++++++++-- net/tipc/socket.c | 49 ++++++++++++++++++++------------ 5 files changed, 101 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 5f7b2c4a09ab..ef41c11a7f38 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -238,6 +238,7 @@ struct sockaddr_tipc { * Flag values */ #define TIPC_GROUP_LOOPBACK 0x1 /* Receive copy of sent msg when match */ +#define TIPC_GROUP_MEMBER_EVTS 0x2 /* Receive membership events in socket */ struct tipc_group_req { __u32 type; /* group id */ diff --git a/net/tipc/group.c b/net/tipc/group.c index beb214a3420c..1bfa9348b26d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -59,6 +59,7 @@ enum mbr_state { struct tipc_member { struct rb_node tree_node; struct list_head list; + struct sk_buff *event_msg; u32 node; u32 port; u32 instance; @@ -79,6 +80,7 @@ struct tipc_group { u16 member_cnt; u16 bc_snd_nxt; bool loopback; + bool events; }; static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, @@ -117,6 +119,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->instance = mreq->instance; grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) return grp; kfree(grp); @@ -279,6 +282,13 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!msg_in_group(hdr)) goto drop; + if (mtyp == TIPC_GRP_MEMBER_EVT) { + if (!grp->events) + goto drop; + __skb_queue_tail(inputq, skb); + return; + } + m = tipc_group_find_member(grp, node, port); if (!tipc_group_is_receiver(m)) goto drop; @@ -311,6 +321,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, } void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { u32 node = msg_orignode(hdr); @@ -332,10 +343,12 @@ void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, m->bc_rcv_nxt = msg_grp_bc_syncpt(hdr); /* Wait until PUBLISH event is received */ - if (m->state == MBR_DISCOVERED) + if (m->state == MBR_DISCOVERED) { m->state = MBR_JOINING; - else if (m->state == MBR_PUBLISHED) + } else if (m->state == MBR_PUBLISHED) { m->state = MBR_JOINED; + __skb_queue_tail(inputq, m->event_msg); + } return; case GRP_LEAVE_MSG: if (!m) @@ -347,6 +360,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, return; } /* Otherwise deliver already received WITHDRAW event */ + __skb_queue_tail(inputq, m->event_msg); tipc_group_delete_member(grp, m); return; default: @@ -354,16 +368,17 @@ void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, } } -/* tipc_group_member_evt() - receive and handle a member up/down event - */ void tipc_group_member_evt(struct tipc_group *grp, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_event *evt = (void *)msg_data(hdr); + u32 instance = evt->found_lower; u32 node = evt->port.node; u32 port = evt->port.ref; + int event = evt->event; struct tipc_member *m; struct net *net; u32 self; @@ -376,32 +391,51 @@ void tipc_group_member_evt(struct tipc_group *grp, if (!grp->loopback && node == self && port == grp->portid) goto drop; + /* Convert message before delivery to user */ + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_GRP_MEMBER_EVT); + msg_set_origport(hdr, port); + msg_set_orignode(hdr, node); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + m = tipc_group_find_member(grp, node, port); - if (evt->event == TIPC_PUBLISHED) { + if (event == TIPC_PUBLISHED) { if (!m) m = tipc_group_create_member(grp, node, port, MBR_DISCOVERED); if (!m) goto drop; - /* Wait if JOIN message not yet received */ - if (m->state == MBR_DISCOVERED) + /* Hold back event if JOIN message not yet received */ + if (m->state == MBR_DISCOVERED) { + m->event_msg = skb; m->state = MBR_PUBLISHED; - else + } else { + __skb_queue_tail(inputq, skb); m->state = MBR_JOINED; - m->instance = evt->found_lower; + } + m->instance = instance; + TIPC_SKB_CB(skb)->orig_member = m->instance; tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); - } else if (evt->event == TIPC_WITHDRAWN) { + } else if (event == TIPC_WITHDRAWN) { if (!m) goto drop; - /* Keep back event if more messages might be expected */ - if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) + TIPC_SKB_CB(skb)->orig_member = m->instance; + + /* Hold back event if more messages might be expected */ + if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) { + m->event_msg = skb; m->state = MBR_LEAVING; - else + } else { + __skb_queue_tail(inputq, skb); tipc_group_delete_member(grp, m); + } } + return; drop: kfree_skb(skb); } diff --git a/net/tipc/group.h b/net/tipc/group.h index 9bdf4479fc03..5d3f10d28967 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -54,9 +54,11 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *xmitq); void tipc_group_member_evt(struct tipc_group *grp, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_update_bc_members(struct tipc_group *grp); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e438716d2372..1b527b154e46 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -65,7 +65,8 @@ struct plist; #define TIPC_MCAST_MSG 1 #define TIPC_NAMED_MSG 2 #define TIPC_DIRECT_MSG 3 -#define TIPC_GRP_BCAST_MSG 4 +#define TIPC_GRP_MEMBER_EVT 4 +#define TIPC_GRP_BCAST_MSG 5 /* * Internal message users @@ -258,7 +259,14 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n) static inline int msg_in_group(struct tipc_msg *m) { - return (msg_type(m) == TIPC_GRP_BCAST_MSG); + int mtyp = msg_type(m); + + return (mtyp == TIPC_GRP_BCAST_MSG) || (mtyp == TIPC_GRP_MEMBER_EVT); +} + +static inline bool msg_is_grp_evt(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_GRP_MEMBER_EVT; } static inline u32 msg_named(struct tipc_msg *m) @@ -824,6 +832,16 @@ static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) /* Word 10 */ +static inline u16 msg_grp_evt(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x3); +} + +static inline void msg_set_grp_evt(struct tipc_msg *m, int n) +{ + msg_set_bits(m, 10, 0, 0x3, n); +} + static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) { return msg_bits(m, 10, 16, 0xffff); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 25ecf1201527..0a2eac309177 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -709,41 +709,43 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; - u32 mask = 0; + u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + revents |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + revents |= POLLHUP; switch (sk->sk_state) { case TIPC_ESTABLISHED: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) - mask |= POLLOUT; + revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: case TIPC_CONNECTING: - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); + if (skb) + revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: if (!grp || tipc_group_size(grp)) if (!tsk->cong_link_cnt) - mask |= POLLOUT; - if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) - mask |= (POLLIN | POLLRDNORM); + revents |= POLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; + if (!skb) + break; + revents |= POLLIN | POLLRDNORM; break; case TIPC_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); + revents = POLLIN | POLLRDNORM | POLLHUP; break; } - - return mask; + return revents; } /** @@ -1415,11 +1417,12 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) { struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *skb; - struct tipc_msg *hdr; bool connected = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool grp_evt; long timeout; /* Catch invalid receive requests */ @@ -1443,6 +1446,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); err = msg_errcode(hdr); + grp_evt = msg_is_grp_evt(hdr); if (likely(dlen || err)) break; tsk_advance_rx_queue(sk); @@ -1469,11 +1473,20 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; + /* Mark message as group event if applicable */ + if (unlikely(grp_evt)) { + if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) + m->msg_flags |= MSG_EOR; + m->msg_flags |= MSG_OOB; + copy = 0; + } + /* Caption of data or error code/rejected data was successful */ if (unlikely(flags & MSG_PEEK)) goto exit; tsk_advance_rx_queue(sk); + if (likely(!connected)) goto exit; @@ -1648,10 +1661,10 @@ static void tipc_sk_proto_rcv(struct sock *sk, sk->sk_write_space(sk); break; case GROUP_PROTOCOL: - tipc_group_proto_rcv(grp, hdr, xmitq); + tipc_group_proto_rcv(grp, hdr, inputq, xmitq); break; case TOP_SRV: - tipc_group_member_evt(tsk->group, skb, xmitq); + tipc_group_member_evt(tsk->group, skb, inputq, xmitq); skb = NULL; break; default: From b7d42635517fde2b095deddd0fba37be2302a285 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:26 +0200 Subject: [PATCH 0931/2177] tipc: introduce flow control for group broadcast messages We introduce an end-to-end flow control mechanism for group broadcast messages. This ensures that no messages are ever lost because of destination receive buffer overflow, with minimal impact on performance. For now, the algorithm is based on the assumption that there is only one active transmitter at any moment in time. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 148 ++++++++++++++++++++++++++++++++++++++++++++-- net/tipc/group.h | 11 ++-- net/tipc/msg.h | 5 +- net/tipc/socket.c | 48 +++++++++++---- 4 files changed, 190 insertions(+), 22 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 1bfa9348b26d..b8ed70abba01 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -46,6 +46,7 @@ #define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) #define ADV_IDLE ADV_UNIT +#define ADV_ACTIVE (ADV_UNIT * 12) enum mbr_state { MBR_QUARANTINED, @@ -59,16 +60,22 @@ enum mbr_state { struct tipc_member { struct rb_node tree_node; struct list_head list; + struct list_head congested; struct sk_buff *event_msg; + struct tipc_group *group; u32 node; u32 port; u32 instance; enum mbr_state state; + u16 advertised; + u16 window; u16 bc_rcv_nxt; + bool usr_pending; }; struct tipc_group { struct rb_root members; + struct list_head congested; struct tipc_nlist dests; struct net *net; int subid; @@ -86,11 +93,24 @@ struct tipc_group { static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); +static int tipc_group_rcvbuf_limit(struct tipc_group *grp) +{ + int mcnt = grp->member_cnt + 1; + + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ + return mcnt * ADV_ACTIVE * FLOWCTL_BLK_SZ * 4; +} + u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) { return grp->bc_snd_nxt; } +static bool tipc_group_is_enabled(struct tipc_member *m) +{ + return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING; +} + static bool tipc_group_is_receiver(struct tipc_member *m) { return m && m->state >= MBR_JOINED; @@ -111,6 +131,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, if (!grp) return NULL; tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + INIT_LIST_HEAD(&grp->congested); grp->members = RB_ROOT; grp->net = net; grp->portid = portid; @@ -213,6 +234,8 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, if (!m) return NULL; INIT_LIST_HEAD(&m->list); + INIT_LIST_HEAD(&m->congested); + m->group = grp; m->node = node; m->port = port; grp->member_cnt++; @@ -233,6 +256,7 @@ static void tipc_group_delete_member(struct tipc_group *grp, rb_erase(&m->tree_node, &grp->members); grp->member_cnt--; list_del_init(&m->list); + list_del_init(&m->congested); /* If last member on a node, remove node from dest list */ if (!tipc_group_find_node(grp, m->node)) @@ -255,11 +279,59 @@ void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, *scope = grp->scope; } -void tipc_group_update_bc_members(struct tipc_group *grp) +void tipc_group_update_member(struct tipc_member *m, int len) { + struct tipc_group *grp = m->group; + struct tipc_member *_m, *tmp; + + if (!tipc_group_is_enabled(m)) + return; + + m->window -= len; + + if (m->window >= ADV_IDLE) + return; + + if (!list_empty(&m->congested)) + return; + + /* Sort member into congested members' list */ + list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { + if (m->window > _m->window) + continue; + list_add_tail(&m->congested, &_m->congested); + return; + } + list_add_tail(&m->congested, &grp->congested); +} + +void tipc_group_update_bc_members(struct tipc_group *grp, int len) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (tipc_group_is_enabled(m)) + tipc_group_update_member(m, len); + } grp->bc_snd_nxt++; } +bool tipc_group_bc_cong(struct tipc_group *grp, int len) +{ + struct tipc_member *m; + + if (list_empty(&grp->congested)) + return false; + + m = list_first_entry(&grp->congested, struct tipc_member, congested); + if (m->window >= len) + return false; + + return true; +} + /* tipc_group_filter_msg() - determine if we should accept arriving message */ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, @@ -302,11 +374,36 @@ drop: kfree_skb(skb); } +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (!m) + return; + + m->advertised -= blks; + + switch (m->state) { + case MBR_JOINED: + if (m->advertised <= (ADV_ACTIVE - ADV_UNIT)) + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + break; + case MBR_DISCOVERED: + case MBR_JOINING: + case MBR_LEAVING: + default: + break; + } +} + static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq) { struct tipc_msg *hdr; struct sk_buff *skb; + int adv = 0; skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, m->node, tipc_own_addr(grp->net), @@ -314,14 +411,24 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, if (!skb) return; + if (m->state == MBR_JOINED) + adv = ADV_ACTIVE - m->advertised; + hdr = buf_msg(skb); - if (mtyp == GRP_JOIN_MSG) + + if (mtyp == GRP_JOIN_MSG) { msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_ADV_MSG) { + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } __skb_queue_tail(xmitq, skb); } -void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, - struct sk_buff_head *inputq, +void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, + struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { u32 node = msg_orignode(hdr); @@ -341,14 +448,22 @@ void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, if (!m) return; m->bc_rcv_nxt = msg_grp_bc_syncpt(hdr); + m->window += msg_adv_win(hdr); /* Wait until PUBLISH event is received */ if (m->state == MBR_DISCOVERED) { m->state = MBR_JOINING; } else if (m->state == MBR_PUBLISHED) { m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); __skb_queue_tail(inputq, m->event_msg); } + if (m->window < ADV_IDLE) + tipc_group_update_member(m, 0); + else + list_del_init(&m->congested); return; case GRP_LEAVE_MSG: if (!m) @@ -361,14 +476,28 @@ void tipc_group_proto_rcv(struct tipc_group *grp, struct tipc_msg *hdr, } /* Otherwise deliver already received WITHDRAW event */ __skb_queue_tail(inputq, m->event_msg); + *usr_wakeup = m->usr_pending; tipc_group_delete_member(grp, m); + list_del_init(&m->congested); + return; + case GRP_ADV_MSG: + if (!m) + return; + m->window += msg_adv_win(hdr); + *usr_wakeup = m->usr_pending; + m->usr_pending = false; + list_del_init(&m->congested); return; default: pr_warn("Received unknown GROUP_PROTO message\n"); } } +/* tipc_group_member_evt() - receive and handle a member up/down event + */ void tipc_group_member_evt(struct tipc_group *grp, + bool *usr_wakeup, + int *sk_rcvbuf, struct sk_buff *skb, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) @@ -416,16 +545,25 @@ void tipc_group_member_evt(struct tipc_group *grp, } else { __skb_queue_tail(inputq, skb); m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; } m->instance = instance; TIPC_SKB_CB(skb)->orig_member = m->instance; tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + if (m->window < ADV_IDLE) + tipc_group_update_member(m, 0); + else + list_del_init(&m->congested); } else if (event == TIPC_WITHDRAWN) { if (!m) goto drop; TIPC_SKB_CB(skb)->orig_member = m->instance; + *usr_wakeup = m->usr_pending; + m->usr_pending = false; + /* Hold back event if more messages might be expected */ if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) { m->event_msg = skb; @@ -434,7 +572,9 @@ void tipc_group_member_evt(struct tipc_group *grp, __skb_queue_tail(inputq, skb); tipc_group_delete_member(grp, m); } + list_del_init(&m->congested); } + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); return; drop: kfree_skb(skb); diff --git a/net/tipc/group.h b/net/tipc/group.h index 5d3f10d28967..0e2740e1da90 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -52,15 +52,18 @@ void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); -void tipc_group_member_evt(struct tipc_group *grp, - struct sk_buff *skb, +void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, + int *sk_rcvbuf, struct sk_buff *skb, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); -void tipc_group_proto_rcv(struct tipc_group *grp, +void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); -void tipc_group_update_bc_members(struct tipc_group *grp); +void tipc_group_update_bc_members(struct tipc_group *grp, int len); +bool tipc_group_bc_cong(struct tipc_group *grp, int len); +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 1b527b154e46..237d007499f9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -538,6 +538,7 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) */ #define GRP_JOIN_MSG 0 #define GRP_LEAVE_MSG 1 +#define GRP_ADV_MSG 2 /* * Word 1 @@ -790,12 +791,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_adv_win(struct tipc_msg *m) +static inline u16 msg_adv_win(struct tipc_msg *m) { return msg_bits(m, 9, 0, 0xffff); } -static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) +static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 0, 0xffff, n); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0a2eac309177..50145c95ac96 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -201,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk) return tsk->snt_unacked > tsk->snd_win; } +static u16 tsk_blocks(int len) +{ + return ((len / FLOWCTL_BLK_SZ) + 1); +} + /* tsk_blocks(): translate a buffer size in bytes to number of * advertisable blocks, taking into account the ratio truesize(len)/len * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ @@ -831,6 +836,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, struct tipc_group *grp = tsk->group; struct tipc_nlist *dsts = tipc_group_dests(grp); struct tipc_mc_method *method = &tsk->mc_method; + int blks = tsk_blocks(MCAST_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; int mtu = tipc_bcast_get_mtu(net); struct sk_buff_head pkts; @@ -839,14 +845,15 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, if (!dsts->local && !dsts->remote) return -EHOSTUNREACH; - /* Block or return if any destination link is congested */ - rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); + /* Block or return if any destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && + !tipc_group_bc_cong(grp, blks)); if (unlikely(rc)) return rc; /* Complete message header */ msg_set_type(hdr, TIPC_GRP_BCAST_MSG); - msg_set_hdr_sz(hdr, MCAST_H_SIZE); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); msg_set_nameinst(hdr, 0); @@ -864,9 +871,8 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, if (unlikely(rc)) return rc; - /* Update broadcast sequence number */ - tipc_group_update_bc_members(tsk->group); - + /* Update broadcast sequence number and send windows */ + tipc_group_update_bc_members(tsk->group, blks); return dlen; } @@ -1024,7 +1030,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; - if (unlikely(grp)) + if (unlikely(grp && !dest)) return tipc_send_group_bcast(sock, m, dlen, timeout); if (unlikely(!dest)) { @@ -1420,6 +1426,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; bool grp_evt; @@ -1436,8 +1443,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + /* Step rcv queue to first msg with data or error; wait if necessary */ do { - /* Look at first msg in receive queue; wait if necessary */ rc = tipc_wait_for_rcvmsg(sock, &timeout); if (unlikely(rc)) goto exit; @@ -1485,12 +1492,21 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(flags & MSG_PEEK)) goto exit; + /* Send group flow control advertisement when applicable */ + if (tsk->group && msg_in_group(hdr) && !grp_evt) { + skb_queue_head_init(&xmitq); + tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), + msg_orignode(hdr), msg_origport(hdr), + &xmitq); + tipc_node_distr_xmit(sock_net(sk), &xmitq); + } + tsk_advance_rx_queue(sk); if (likely(!connected)) goto exit; - /* Send connection flow control ack when applicable */ + /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); @@ -1650,6 +1666,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); struct tipc_group *grp = tsk->group; + bool wakeup = false; switch (msg_user(hdr)) { case CONN_MANAGER: @@ -1658,19 +1675,23 @@ static void tipc_sk_proto_rcv(struct sock *sk, case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); tsk->cong_link_cnt--; - sk->sk_write_space(sk); + wakeup = true; break; case GROUP_PROTOCOL: - tipc_group_proto_rcv(grp, hdr, inputq, xmitq); + tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); break; case TOP_SRV: - tipc_group_member_evt(tsk->group, skb, inputq, xmitq); + tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, + skb, inputq, xmitq); skb = NULL; break; default: break; } + if (wakeup) + sk->sk_write_space(sk); + kfree_skb(skb); } @@ -1785,6 +1806,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); + if (unlikely(msg_in_group(hdr))) + return sk->sk_rcvbuf; + if (unlikely(!msg_connected(hdr))) return sk->sk_rcvbuf << msg_importance(hdr); From 27bd9ec027f396457d1a147043c92ff22fc4c71e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:27 +0200 Subject: [PATCH 0932/2177] tipc: introduce group unicast messaging We now make it possible to send connectionless unicast messages within a communication group. To send a message, the sender can use either a direct port address, aka port identity, or an indirect port name to be looked up. This type of messages are subject to the same start synchronization and flow control mechanism as group broadcast messages. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 45 ++++++++++++++++++- net/tipc/group.h | 3 ++ net/tipc/msg.h | 3 +- net/tipc/socket.c | 112 +++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 150 insertions(+), 13 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index b8ed70abba01..18440be5b5fc 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -186,6 +186,17 @@ struct tipc_member *tipc_group_find_member(struct tipc_group *grp, return NULL; } +static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, + u32 node, u32 port) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (m && tipc_group_is_enabled(m)) + return m; + return NULL; +} + static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, u32 node) { @@ -318,9 +329,39 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len) grp->bc_snd_nxt++; } +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **mbr) +{ + struct sk_buff_head xmitq; + struct tipc_member *m; + int adv, state; + + m = tipc_group_find_dest(grp, dnode, dport); + *mbr = m; + if (!m) + return false; + if (m->usr_pending) + return true; + if (m->window >= len) + return false; + m->usr_pending = true; + + /* If not fully advertised, do it now to prevent mutual blocking */ + adv = m->advertised; + state = m->state; + if (state < MBR_JOINED) + return true; + if (state == MBR_JOINED && adv == ADV_IDLE) + return true; + skb_queue_head_init(&xmitq); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); + tipc_node_distr_xmit(grp->net, &xmitq); + return true; +} + bool tipc_group_bc_cong(struct tipc_group *grp, int len) { - struct tipc_member *m; + struct tipc_member *m = NULL; if (list_empty(&grp->congested)) return false; @@ -329,7 +370,7 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) if (m->window >= len) return false; - return true; + return tipc_group_cong(grp, m->node, m->port, len, &m); } /* tipc_group_filter_msg() - determine if we should accept arriving message diff --git a/net/tipc/group.h b/net/tipc/group.h index 0e2740e1da90..8f77290bb415 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -61,9 +61,12 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_update_bc_members(struct tipc_group *grp, int len); +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +void tipc_group_update_member(struct tipc_member *m, int len); int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 237d007499f9..f5033f4a7951 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -67,6 +67,7 @@ struct plist; #define TIPC_DIRECT_MSG 3 #define TIPC_GRP_MEMBER_EVT 4 #define TIPC_GRP_BCAST_MSG 5 +#define TIPC_GRP_UCAST_MSG 6 /* * Internal message users @@ -261,7 +262,7 @@ static inline int msg_in_group(struct tipc_msg *m) { int mtyp = msg_type(m); - return (mtyp == TIPC_GRP_BCAST_MSG) || (mtyp == TIPC_GRP_MEMBER_EVT); + return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; } static inline bool msg_is_grp_evt(struct tipc_msg *m) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 50145c95ac96..e71c8d23acb9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -817,6 +817,93 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, return rc ? rc : dlen; } +/** + * tipc_send_group_msg - send a message to a member in the group + * @net: network namespace + * @m: message to send + * @mb: group member + * @dnode: destination node + * @dport: destination port + * @dlen: total length of message data + */ +static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, + struct msghdr *m, struct tipc_member *mb, + u32 dnode, u32 dport, int dlen) +{ + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct sk_buff_head pkts; + int mtu, rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_UCAST_MSG); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, dport); + msg_set_destnode(hdr, dnode); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(&tsk->cong_links, dnode, 0); + tsk->cong_link_cnt++; + } + + /* Update send window and sequence number */ + tipc_group_update_member(mb, blks); + + return dlen; +} + +/** + * tipc_send_group_unicast - send message to a member in the group + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct net *net = sock_net(sk); + struct tipc_member *mb = NULL; + u32 node, port; + int rc; + + node = dest->addr.id.node; + port = dest->addr.id.ref; + if (!port && !node) + return -EHOSTUNREACH; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(&tsk->cong_links, node, 0) && + !tipc_group_cong(grp, node, port, blks, &mb)); + if (unlikely(rc)) + return rc; + + if (unlikely(!mb)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); + + return rc ? rc : dlen; +} + /** * tipc_send_group_bcast - send message to all members in communication group * @sk: socket structure @@ -1030,8 +1117,20 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; - if (unlikely(grp && !dest)) - return tipc_send_group_bcast(sock, m, dlen, timeout); + if (likely(dest)) { + if (unlikely(m->msg_namelen < sizeof(*dest))) + return -EINVAL; + if (unlikely(dest->family != AF_TIPC)) + return -EINVAL; + } + + if (grp) { + if (!dest) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_ID) + return tipc_send_group_unicast(sock, m, dlen, timeout); + return -EINVAL; + } if (unlikely(!dest)) { dest = &tsk->peer; @@ -1039,12 +1138,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return -EDESTADDRREQ; } - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - - if (unlikely(dest->family != AF_TIPC)) - return -EINVAL; - if (unlikely(syn)) { if (sk->sk_state == TIPC_LISTEN) return -EPIPE; @@ -1077,7 +1170,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_destport(hdr, dport); if (unlikely(!dport && !dnode)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(hdr, TIPC_DIRECT_MSG); @@ -1846,7 +1938,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(!msg_isdata(hdr))) tipc_sk_proto_rcv(sk, &inputq, xmitq); - else if (unlikely(msg_type(hdr) > TIPC_GRP_BCAST_MSG)) + else if (unlikely(msg_type(hdr) > TIPC_GRP_UCAST_MSG)) return kfree_skb(skb); if (unlikely(grp)) From ee106d7f942dabce1352e01c6fe9ca4a720c2331 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:28 +0200 Subject: [PATCH 0933/2177] tipc: introduce group anycast messaging In this commit, we make it possible to send connectionless unicast messages to any member corresponding to the given member identity, when there is more than one such member. The sender must use a TIPC_ADDR_NAME address to achieve this effect. We also perform load balancing between the destinations, i.e., we primarily select one which has advertised sufficient send window to not cause a block/EAGAIN delay, if any. This mechanism is overlayed on the always present round-robin selection. Anycast messages are subject to the same start synchronization and flow control mechanism as group broadcast messages. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 7 ++++ net/tipc/group.h | 3 ++ net/tipc/name_table.c | 41 +++++++++++++++++++++ net/tipc/name_table.h | 3 ++ net/tipc/socket.c | 84 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+) diff --git a/net/tipc/group.c b/net/tipc/group.c index 18440be5b5fc..16aaaa97a005 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -116,6 +116,13 @@ static bool tipc_group_is_receiver(struct tipc_member *m) return m && m->state >= MBR_JOINED; } +u32 tipc_group_exclude(struct tipc_group *grp) +{ + if (!grp->loopback) + return grp->portid; + return 0; +} + int tipc_group_size(struct tipc_group *grp) { return grp->member_cnt; diff --git a/net/tipc/group.h b/net/tipc/group.h index 8f77290bb415..e432066a211e 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -49,6 +49,7 @@ void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, int *scope); +u32 tipc_group_exclude(struct tipc_group *grp); void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); @@ -68,5 +69,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); +struct tipc_member *tipc_group_find_sender(struct tipc_group *grp, + u32 node, u32 port); int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 114d72bab827..2856e19e036e 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -597,6 +597,47 @@ not_found: return ref; } +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all) +{ + u32 self = tipc_own_addr(net); + struct publication *publ; + struct name_info *info; + struct name_seq *seq; + struct sub_seq *sseq; + + if (!tipc_in_scope(domain, self)) + return false; + + *dstcnt = 0; + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (unlikely(!seq)) + goto exit; + spin_lock_bh(&seq->lock); + sseq = nameseq_find_subseq(seq, instance); + if (likely(sseq)) { + info = sseq->info; + list_for_each_entry(publ, &info->zone_list, zone_list) { + if (!tipc_in_scope(domain, publ->node)) + continue; + if (publ->ref == exclude && publ->node == self) + continue; + tipc_dest_push(dsts, publ->node, publ->ref); + (*dstcnt)++; + if (all) + continue; + list_move_tail(&publ->zone_list, &info->zone_list); + break; + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); + return !list_empty(dsts); +} + int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, u32 limit, struct list_head *dports) { diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 97646b17a4a2..71926e429446 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -107,6 +107,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, u32 upper, u32 domain, struct tipc_nlist *nodes); +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all); struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e71c8d23acb9..66165e12d2f4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -904,6 +904,88 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, return rc ? rc : dlen; } +/** + * tipc_send_group_anycast - send message to any member with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct list_head *cong_links = &tsk->cong_links; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_group *grp = tsk->group; + struct tipc_member *first = NULL; + struct tipc_member *mbr = NULL; + struct net *net = sock_net(sk); + u32 node, port, exclude; + u32 type, inst, domain; + struct list_head dsts; + int lookups = 0; + int dstcnt, rc; + bool cong; + + INIT_LIST_HEAD(&dsts); + + type = dest->addr.name.name.type; + inst = dest->addr.name.name.instance; + domain = addr_domain(net, dest->scope); + exclude = tipc_group_exclude(grp); + + while (++lookups < 4) { + first = NULL; + + /* Look for a non-congested destination member, if any */ + while (1) { + if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + &dstcnt, exclude, false)) + return -EHOSTUNREACH; + tipc_dest_pop(&dsts, &node, &port); + cong = tipc_group_cong(grp, node, port, blks, &mbr); + if (!cong) + break; + if (mbr == first) + break; + if (!first) + first = mbr; + } + + /* Start over if destination was not in member list */ + if (unlikely(!mbr)) + continue; + + if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) + break; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(cong_links, node, 0) && + !tipc_group_cong(grp, node, port, + blks, &mbr)); + if (unlikely(rc)) + return rc; + + /* Send, unless destination disappeared while waiting */ + if (likely(mbr)) + break; + } + + if (unlikely(lookups >= 4)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); + + return rc ? rc : dlen; +} + /** * tipc_send_group_bcast - send message to all members in communication group * @sk: socket structure @@ -1127,6 +1209,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (grp) { if (!dest) return tipc_send_group_bcast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_NAME) + return tipc_send_group_anycast(sock, m, dlen, timeout); if (dest->addrtype == TIPC_ADDR_ID) return tipc_send_group_unicast(sock, m, dlen, timeout); return -EINVAL; From 5b8dddb63769587badc50725ec9857caaeba4de0 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:29 +0200 Subject: [PATCH 0934/2177] tipc: introduce group multicast messaging The previously introduced message transport to all group members is based on the tipc multicast service, but is logically a broadcast service within the group, and that is what we call it. We now add functionality for sending messages to all group members having a certain identity. Correspondingly, we call this feature 'group multicast'. The service is using unicast when only one destination is found, otherwise it will use the bearer broadcast service to transfer the messages. In the latter case, the receiving members filter arriving messages by looking at the intended destination instance. If there is no match, the message will be dropped, while still being considered received and read as seen by the flow control mechanism. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 14 +++++++++++- net/tipc/msg.h | 11 ++++++++-- net/tipc/socket.c | 56 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 16aaaa97a005..ffac2f33fce2 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -413,10 +413,22 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!tipc_group_is_receiver(m)) goto drop; + m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; + + /* Drop multicast here if not for this member */ + if (mtyp == TIPC_GRP_MCAST_MSG) { + if (msg_nameinst(hdr) != grp->instance) { + m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; + tipc_group_update_rcv_win(grp, msg_blocks(hdr), + node, port, xmitq); + kfree_skb(skb); + return; + } + } + TIPC_SKB_CB(skb)->orig_member = m->instance; __skb_queue_tail(inputq, skb); - m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; return; drop: kfree_skb(skb); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index f5033f4a7951..d6f98215267e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -67,7 +67,8 @@ struct plist; #define TIPC_DIRECT_MSG 3 #define TIPC_GRP_MEMBER_EVT 4 #define TIPC_GRP_BCAST_MSG 5 -#define TIPC_GRP_UCAST_MSG 6 +#define TIPC_GRP_MCAST_MSG 6 +#define TIPC_GRP_UCAST_MSG 7 /* * Internal message users @@ -195,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m) return msg_bits(m, 0, 0, 0x1ffff); } +static inline u32 msg_blocks(struct tipc_msg *m) +{ + return (msg_size(m) / 1024) + 1; +} + static inline u32 msg_data_sz(struct tipc_msg *m) { return msg_size(m) - msg_hdr_sz(m); @@ -279,7 +285,8 @@ static inline u32 msg_mcast(struct tipc_msg *m) { int mtyp = msg_type(m); - return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG)); + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || + (mtyp == TIPC_GRP_MCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 66165e12d2f4..8fdd969e12bd 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -999,6 +999,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); @@ -1021,11 +1022,16 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return rc; /* Complete message header */ - msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + if (dest) { + msg_set_type(hdr, TIPC_GRP_MCAST_MSG); + msg_set_nameinst(hdr, dest->addr.name.name.instance); + } else { + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_nameinst(hdr, 0); + } msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_nameinst(hdr, 0); msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); /* Build message as chain of buffers */ @@ -1045,6 +1051,48 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return dlen; } +/** + * tipc_send_group_mcast - send message to all members with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_name_seq *seq = &dest->addr.nameseq; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct net *net = sock_net(sk); + u32 domain, exclude, dstcnt; + struct list_head dsts; + + INIT_LIST_HEAD(&dsts); + + if (seq->lower != seq->upper) + return -ENOTSUPP; + + domain = addr_domain(net, dest->scope); + exclude = tipc_group_exclude(grp); + if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, + &dsts, &dstcnt, exclude, true)) + return -EHOSTUNREACH; + + if (dstcnt == 1) { + tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + return tipc_send_group_unicast(sock, m, dlen, timeout); + } + + tipc_dest_list_purge(&dsts); + return tipc_send_group_bcast(sock, m, dlen, timeout); +} + /** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets * @arrvq: queue with arriving messages, to be cloned after destination lookup @@ -1213,6 +1261,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return tipc_send_group_anycast(sock, m, dlen, timeout); if (dest->addrtype == TIPC_ADDR_ID) return tipc_send_group_unicast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_MCAST) + return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; } @@ -2022,8 +2072,6 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(!msg_isdata(hdr))) tipc_sk_proto_rcv(sk, &inputq, xmitq); - else if (unlikely(msg_type(hdr) > TIPC_GRP_UCAST_MSG)) - return kfree_skb(skb); if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); From b87a5ea31c935a7f7e11ca85df2ec7917921e96d Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:30 +0200 Subject: [PATCH 0935/2177] tipc: guarantee group unicast doesn't bypass group broadcast Group unicast messages don't follow the same path as broadcast messages, and there is a high risk that unicasts sent from a socket might bypass previously sent broadcasts from the same socket. We fix this by letting all unicast messages carry the sequence number of the next sent broadcast from the same node, but without updating this number at the receiver. This way, a receiver can check and if necessary re-order such messages before they are added to the socket receive buffer. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 89 ++++++++++++++++++++++++++++++++++++++--------- net/tipc/socket.c | 2 ++ 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index ffac2f33fce2..985e0ce32e8e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -62,6 +62,7 @@ struct tipc_member { struct list_head list; struct list_head congested; struct sk_buff *event_msg; + struct sk_buff_head deferredq; struct tipc_group *group; u32 node; u32 port; @@ -253,6 +254,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, return NULL; INIT_LIST_HEAD(&m->list); INIT_LIST_HEAD(&m->congested); + __skb_queue_head_init(&m->deferredq); m->group = grp; m->node = node; m->port = port; @@ -380,29 +382,54 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) return tipc_group_cong(grp, m->node, m->port, len, &m); } +/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number + */ +static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) +{ + struct tipc_msg *_hdr, *hdr = buf_msg(skb); + u16 bc_seqno = msg_grp_bc_seqno(hdr); + struct sk_buff *_skb, *tmp; + int mtyp = msg_type(hdr); + + /* Bcast may be bypassed by unicast, - sort it in */ + if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (!less(bc_seqno, msg_grp_bc_seqno(_hdr))) + continue; + __skb_queue_before(defq, _skb, skb); + return; + } + /* Bcast was not bypassed, - add to tail */ + } + /* Unicasts are never bypassed, - always add to tail */ + __skb_queue_tail(defq, skb); +} + /* tipc_group_filter_msg() - determine if we should accept arriving message */ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct sk_buff *skb = __skb_dequeue(inputq); + struct sk_buff_head *defq; struct tipc_member *m; struct tipc_msg *hdr; + bool deliver, update; u32 node, port; - int mtyp; + int mtyp, blks; if (!skb) return; hdr = buf_msg(skb); - mtyp = msg_type(hdr); node = msg_orignode(hdr); port = msg_origport(hdr); if (!msg_in_group(hdr)) goto drop; - if (mtyp == TIPC_GRP_MEMBER_EVT) { + if (msg_is_grp_evt(hdr)) { if (!grp->events) goto drop; __skb_queue_tail(inputq, skb); @@ -413,22 +440,52 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!tipc_group_is_receiver(m)) goto drop; - m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; - - /* Drop multicast here if not for this member */ - if (mtyp == TIPC_GRP_MCAST_MSG) { - if (msg_nameinst(hdr) != grp->instance) { - m->bc_rcv_nxt = msg_grp_bc_seqno(hdr) + 1; - tipc_group_update_rcv_win(grp, msg_blocks(hdr), - node, port, xmitq); - kfree_skb(skb); - return; - } - } + if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + goto drop; TIPC_SKB_CB(skb)->orig_member = m->instance; - __skb_queue_tail(inputq, skb); + defq = &m->deferredq; + tipc_group_sort_msg(skb, defq); + while ((skb = skb_peek(defq))) { + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + deliver = true; + update = false; + + if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + break; + + /* Decide what to do with message */ + switch (mtyp) { + case TIPC_GRP_MCAST_MSG: + if (msg_nameinst(hdr) != grp->instance) { + update = true; + deliver = false; + } + /* Fall thru */ + case TIPC_GRP_BCAST_MSG: + m->bc_rcv_nxt++; + break; + case TIPC_GRP_UCAST_MSG: + break; + default: + break; + } + + /* Execute decisions */ + __skb_dequeue(defq); + if (deliver) + __skb_queue_tail(inputq, skb); + else + kfree_skb(skb); + + if (!update) + continue; + + blks = msg_blocks(hdr); + tipc_group_update_rcv_win(grp, blks, node, port, xmitq); + } return; drop: kfree_skb(skb); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8fdd969e12bd..3276b7a0d445 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -830,6 +830,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, struct msghdr *m, struct tipc_member *mb, u32 dnode, u32 dport, int dlen) { + u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; struct sk_buff_head pkts; @@ -840,6 +841,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, dport); msg_set_destnode(hdr, dnode); + msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ skb_queue_head_init(&pkts); From 2f487712b89376fce267223bbb0db93d393d4b09 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:31 +0200 Subject: [PATCH 0936/2177] tipc: guarantee that group broadcast doesn't bypass group unicast We need a mechanism guaranteeing that group unicasts sent out from a socket are not bypassed by later sent broadcasts from the same socket. We do this as follows: - Each time a unicast is sent, we set a the broadcast method for the socket to "replicast" and "mandatory". This forces the first subsequent broadcast message to follow the same network and data path as the preceding unicast to a destination, hence preventing it from overtaking the latter. - In order to make the 'same data path' statement above true, we let group unicasts pass through the multicast link input queue, instead of as previously through the unicast link input queue. - In the first broadcast following a unicast, we set a new header flag, requiring all recipients to immediately acknowledge its reception. - During the period before all the expected acknowledges are received, the socket refuses to accept any more broadcast attempts, i.e., by blocking or returning EAGAIN. This period should typically not be longer than a few microseconds. - When all acknowledges have been received, the sending socket will open up for subsequent broadcasts, this time giving the link layer freedom to itself select the best transmission method. - The forced and/or abrupt transmission method changes described above may lead to broadcasts arriving out of order to the recipients. We remedy this by introducing code that checks and if necessary re-orders such messages at the receiving end. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 47 +++++++++++++++++++++++++++++++++++++++++------ net/tipc/group.h | 4 +--- net/tipc/link.c | 5 ++--- net/tipc/msg.h | 21 +++++++++++++++++++++ net/tipc/socket.c | 34 +++++++++++++++++++++++++++++----- 5 files changed, 94 insertions(+), 17 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 985e0ce32e8e..eab862e047dc 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -71,6 +71,7 @@ struct tipc_member { u16 advertised; u16 window; u16 bc_rcv_nxt; + u16 bc_acked; bool usr_pending; }; @@ -87,6 +88,7 @@ struct tipc_group { u32 portid; u16 member_cnt; u16 bc_snd_nxt; + u16 bc_ackers; bool loopback; bool events; }; @@ -258,6 +260,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->group = grp; m->node = node; m->port = port; + m->bc_acked = grp->bc_snd_nxt - 1; grp->member_cnt++; tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); @@ -275,6 +278,11 @@ static void tipc_group_delete_member(struct tipc_group *grp, { rb_erase(&m->tree_node, &grp->members); grp->member_cnt--; + + /* Check if we were waiting for replicast ack from this member */ + if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1)) + grp->bc_ackers--; + list_del_init(&m->list); list_del_init(&m->congested); @@ -325,16 +333,23 @@ void tipc_group_update_member(struct tipc_member *m, int len) list_add_tail(&m->congested, &grp->congested); } -void tipc_group_update_bc_members(struct tipc_group *grp, int len) +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) { + u16 prev = grp->bc_snd_nxt - 1; struct tipc_member *m; struct rb_node *n; for (n = rb_first(&grp->members); n; n = rb_next(n)) { m = container_of(n, struct tipc_member, tree_node); - if (tipc_group_is_enabled(m)) + if (tipc_group_is_enabled(m)) { tipc_group_update_member(m, len); + m->bc_acked = prev; + } } + + /* Mark number of acknowledges to expect, if any */ + if (ack) + grp->bc_ackers = grp->member_cnt; grp->bc_snd_nxt++; } @@ -372,6 +387,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) { struct tipc_member *m = NULL; + /* If prev bcast was replicast, reject until all receivers have acked */ + if (grp->bc_ackers) + return true; + if (list_empty(&grp->congested)) return false; @@ -391,7 +410,7 @@ static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) struct sk_buff *_skb, *tmp; int mtyp = msg_type(hdr); - /* Bcast may be bypassed by unicast, - sort it in */ + /* Bcast may be bypassed by unicast or other bcast, - sort it in */ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { skb_queue_walk_safe(defq, _skb, tmp) { _hdr = buf_msg(_skb); @@ -412,10 +431,10 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct sk_buff *skb = __skb_dequeue(inputq); + bool ack, deliver, update; struct sk_buff_head *defq; struct tipc_member *m; struct tipc_msg *hdr; - bool deliver, update; u32 node, port; int mtyp, blks; @@ -451,6 +470,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, hdr = buf_msg(skb); mtyp = msg_type(hdr); deliver = true; + ack = false; update = false; if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) @@ -466,6 +486,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, /* Fall thru */ case TIPC_GRP_BCAST_MSG: m->bc_rcv_nxt++; + ack = msg_grp_bc_ack_req(hdr); break; case TIPC_GRP_UCAST_MSG: break; @@ -480,6 +501,9 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, else kfree_skb(skb); + if (ack) + tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + if (!update) continue; @@ -540,6 +564,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, } else if (mtyp == GRP_ADV_MSG) { msg_set_adv_win(hdr, adv); m->advertised += adv; + } else if (mtyp == GRP_ACK_MSG) { + msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); } __skb_queue_tail(xmitq, skb); } @@ -593,7 +619,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, } /* Otherwise deliver already received WITHDRAW event */ __skb_queue_tail(inputq, m->event_msg); - *usr_wakeup = m->usr_pending; + *usr_wakeup = true; tipc_group_delete_member(grp, m); list_del_init(&m->congested); return; @@ -605,6 +631,15 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, m->usr_pending = false; list_del_init(&m->congested); return; + case GRP_ACK_MSG: + if (!m) + return; + m->bc_acked = msg_grp_bc_acked(hdr); + if (--grp->bc_ackers) + break; + *usr_wakeup = true; + m->usr_pending = false; + return; default: pr_warn("Received unknown GROUP_PROTO message\n"); } @@ -678,7 +713,7 @@ void tipc_group_member_evt(struct tipc_group *grp, TIPC_SKB_CB(skb)->orig_member = m->instance; - *usr_wakeup = m->usr_pending; + *usr_wakeup = true; m->usr_pending = false; /* Hold back event if more messages might be expected */ diff --git a/net/tipc/group.h b/net/tipc/group.h index e432066a211e..d525e1cd7de5 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -61,7 +61,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); -void tipc_group_update_bc_members(struct tipc_group *grp, int len); +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); @@ -69,7 +69,5 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); -struct tipc_member *tipc_group_find_sender(struct tipc_group *grp, - u32 node, u32 port); int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index bd25bff63925..70a21499804d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1046,13 +1046,12 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (unlikely(msg_mcast(hdr))) { + if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { skb_queue_tail(l->bc_rcvlink->inputq, skb); return true; } - case CONN_MANAGER: case GROUP_PROTOCOL: - skb_queue_tail(inputq, skb); + case CONN_MANAGER: return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d6f98215267e..52c6a2e01995 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -547,6 +547,7 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define GRP_JOIN_MSG 0 #define GRP_LEAVE_MSG 1 #define GRP_ADV_MSG 2 +#define GRP_ACK_MSG 3 /* * Word 1 @@ -839,6 +840,16 @@ static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_grp_bc_acked(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + /* Word 10 */ static inline u16 msg_grp_evt(struct tipc_msg *m) @@ -851,6 +862,16 @@ static inline void msg_set_grp_evt(struct tipc_msg *m, int n) msg_set_bits(m, 10, 0, 0x3, n); } +static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x1); +} + +static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) +{ + msg_set_bits(m, 10, 0, 0x1, n); +} + static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) { return msg_bits(m, 10, 16, 0xffff); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3276b7a0d445..b1f1c3c2b1e2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -831,6 +831,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, u32 dnode, u32 dport, int dlen) { u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); + struct tipc_mc_method *method = &tsk->mc_method; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; struct sk_buff_head pkts; @@ -857,9 +858,12 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, tsk->cong_link_cnt++; } - /* Update send window and sequence number */ + /* Update send window */ tipc_group_update_member(mb, blks); + /* A broadcast sent within next EXPIRE period must follow same path */ + method->rcast = true; + method->mandatory = true; return dlen; } @@ -1008,6 +1012,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, struct tipc_group *grp = tsk->group; struct tipc_nlist *dsts = tipc_group_dests(grp); struct tipc_mc_method *method = &tsk->mc_method; + bool ack = method->mandatory && method->rcast; int blks = tsk_blocks(MCAST_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; int mtu = tipc_bcast_get_mtu(net); @@ -1036,6 +1041,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_destnode(hdr, 0); msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); + /* Avoid getting stuck with repeated forced replicasts */ + msg_set_grp_bc_ack_req(hdr, ack); + /* Build message as chain of buffers */ skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); @@ -1043,13 +1051,17 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return rc; /* Send message */ - rc = tipc_mcast_xmit(net, &pkts, method, dsts, - &tsk->cong_link_cnt); + rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); if (unlikely(rc)) return rc; /* Update broadcast sequence number and send windows */ - tipc_group_update_bc_members(tsk->group, blks); + tipc_group_update_bc_members(tsk->group, blks, ack); + + /* Broadcast link is now free to choose method for next broadcast */ + method->mandatory = false; + method->expires = jiffies; + return dlen; } @@ -1113,7 +1125,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, u32 portid, oport, onode; struct list_head dports; struct tipc_msg *msg; - int hsz; + int user, mtyp, hsz; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); @@ -1121,6 +1133,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { msg = buf_msg(skb); + user = msg_user(msg); + mtyp = msg_type(msg); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + __skb_dequeue(arrvq); + __skb_queue_tail(inputq, skb); + } + refcount_dec(&skb->users); + spin_unlock_bh(&inputq->lock); + continue; + } hsz = skb_headroom(skb) + msg_hdr_sz(msg); oport = msg_origport(msg); onode = msg_orignode(msg); From 399574d41963285e72ba28dd46783c96316a81d1 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:32 +0200 Subject: [PATCH 0937/2177] tipc: guarantee delivery of UP event before first broadcast The following scenario is possible: - A user joins a group, and immediately sends out a broadcast message to its members. - The broadcast message, following a different data path than the initial JOIN message sent out during the joining procedure, arrives to a receiver before the latter.. - The receiver drops the message, since it is not ready to accept any messages until the JOIN has arrived. We avoid this by treating group protocol JOIN messages like unicast messages. - We let them pass through the recipient's multicast input queue, just like ordinary unicasts. - We force the first following broadacst to be sent as replicated unicast and being acknowledged by the recipient before accepting any more broadcast transmissions. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 7 +++++-- net/tipc/socket.c | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 70a21499804d..723dd6998684 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1039,6 +1039,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker, static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { + struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; struct tipc_msg *hdr = buf_msg(skb); switch (msg_user(hdr)) { @@ -1047,12 +1048,14 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { - skb_queue_tail(l->bc_rcvlink->inputq, skb); + skb_queue_tail(mc_inputq, skb); return true; } - case GROUP_PROTOCOL: case CONN_MANAGER: return true; + case GROUP_PROTOCOL: + skb_queue_tail(mc_inputq, skb); + return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; skb_queue_tail(l->namedq, skb); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b1f1c3c2b1e2..2bbab4fe2f53 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2762,6 +2762,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) tipc_group_delete(net, grp); + + /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; return rc; } From a3bada70660fb020430135ec8a774ae1ea6bc9a9 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:33 +0200 Subject: [PATCH 0938/2177] tipc: guarantee delivery of last broadcast before DOWN event The following scenario is possible: - A user sends a broadcast message, and thereafter immediately leaves the group. - The LEAVE message, following a different path than the broadcast, arrives ahead of the broadcast, and the sending member is removed from the receiver's list. - The broadcast message arrives, but is dropped because the sender now is unknown to the receipient. We fix this by sequence numbering membership events, just like ordinary unicast messages. Currently, when a JOIN is sent to a peer, it contains a synchronization point, - the sequence number of the next sent broadcast, in order to give the receiver a start synchronization point. We now let even LEAVE messages contain such an "end synchronization" point, so that the recipient can delay the removal of the sending member until it knows that all messages have been received. The received synchronization points are added as sequence numbers to the generated membership events, making it possible to handle them almost the same way as regular unicasts in the receiving filter function. In particular, a DOWN event with a too high sequence number will be kept in the reordering queue until the missing broadcast(s) arrive and have been delivered. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index eab862e047dc..8f0eb5d22e8f 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -71,6 +71,7 @@ struct tipc_member { u16 advertised; u16 window; u16 bc_rcv_nxt; + u16 bc_syncpt; u16 bc_acked; bool usr_pending; }; @@ -410,7 +411,7 @@ static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) struct sk_buff *_skb, *tmp; int mtyp = msg_type(hdr); - /* Bcast may be bypassed by unicast or other bcast, - sort it in */ + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { skb_queue_walk_safe(defq, _skb, tmp) { _hdr = buf_msg(_skb); @@ -431,7 +432,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct sk_buff *skb = __skb_dequeue(inputq); - bool ack, deliver, update; + bool ack, deliver, update, leave = false; struct sk_buff_head *defq; struct tipc_member *m; struct tipc_msg *hdr; @@ -448,13 +449,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (!msg_in_group(hdr)) goto drop; - if (msg_is_grp_evt(hdr)) { - if (!grp->events) - goto drop; - __skb_queue_tail(inputq, skb); - return; - } - m = tipc_group_find_member(grp, node, port); if (!tipc_group_is_receiver(m)) goto drop; @@ -490,6 +484,12 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, break; case TIPC_GRP_UCAST_MSG: break; + case TIPC_GRP_MEMBER_EVT: + if (m->state == MBR_LEAVING) + leave = true; + if (!grp->events) + deliver = false; + break; default: break; } @@ -504,6 +504,11 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, if (ack) tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + if (leave) { + tipc_group_delete_member(grp, m); + __skb_queue_purge(defq); + break; + } if (!update) continue; @@ -561,6 +566,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); msg_set_adv_win(hdr, adv); m->advertised += adv; + } else if (mtyp == GRP_LEAVE_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); } else if (mtyp == GRP_ADV_MSG) { msg_set_adv_win(hdr, adv); m->advertised += adv; @@ -577,6 +584,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, u32 node = msg_orignode(hdr); u32 port = msg_origport(hdr); struct tipc_member *m; + struct tipc_msg *ehdr; if (!grp) return; @@ -590,7 +598,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, MBR_QUARANTINED); if (!m) return; - m->bc_rcv_nxt = msg_grp_bc_syncpt(hdr); + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + m->bc_rcv_nxt = m->bc_syncpt; m->window += msg_adv_win(hdr); /* Wait until PUBLISH event is received */ @@ -601,6 +610,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, *usr_wakeup = true; m->usr_pending = false; tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + ehdr = buf_msg(m->event_msg); + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); } if (m->window < ADV_IDLE) @@ -611,6 +622,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, case GRP_LEAVE_MSG: if (!m) return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); /* Wait until WITHDRAW event is received */ if (m->state != MBR_LEAVING) { @@ -618,9 +630,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, return; } /* Otherwise deliver already received WITHDRAW event */ + ehdr = buf_msg(m->event_msg); + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); *usr_wakeup = true; - tipc_group_delete_member(grp, m); list_del_init(&m->congested); return; case GRP_ADV_MSG: @@ -662,6 +675,7 @@ void tipc_group_member_evt(struct tipc_group *grp, int event = evt->event; struct tipc_member *m; struct net *net; + bool node_up; u32 self; if (!grp) @@ -695,6 +709,7 @@ void tipc_group_member_evt(struct tipc_group *grp, m->event_msg = skb; m->state = MBR_PUBLISHED; } else { + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); __skb_queue_tail(inputq, skb); m->state = MBR_JOINED; *usr_wakeup = true; @@ -715,14 +730,18 @@ void tipc_group_member_evt(struct tipc_group *grp, *usr_wakeup = true; m->usr_pending = false; + node_up = tipc_node_is_up(net, node); /* Hold back event if more messages might be expected */ - if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) { + if (m->state != MBR_LEAVING && node_up) { m->event_msg = skb; m->state = MBR_LEAVING; } else { + if (node_up) + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + else + msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); __skb_queue_tail(inputq, skb); - tipc_group_delete_member(grp, m); } list_del_init(&m->congested); } From 04d7b574b245c66001a33cb9da2c0311063af73f Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 13 Oct 2017 11:04:34 +0200 Subject: [PATCH 0939/2177] tipc: add multipoint-to-point flow control We already have point-to-multipoint flow control within a group. But we even need the opposite; -a scheme which can handle that potentially hundreds of sources may try to send messages to the same destination simultaneously without causing buffer overflow at the recipient. This commit adds such a mechanism. The algorithm works as follows: - When a member detects a new, joining member, it initially set its state to JOINED and advertises a minimum window to the new member. This window is chosen so that the new member can send exactly one maximum sized message, or several smaller ones, to the recipient before it must stop and wait for an additional advertisement. This minimum window ADV_IDLE is set to 65 1kB blocks. - When a member receives the first data message from a JOINED member, it changes the state of the latter to ACTIVE, and advertises a larger window ADV_ACTIVE = 12 x ADV_IDLE blocks to the sender, so it can continue sending with minimal disturbances to the data flow. - The active members are kept in a dedicated linked list. Each time a message is received from an active member, it will be moved to the tail of that list. This way, we keep a record of which members have been most (tail) and least (head) recently active. - There is a maximum number (16) of permitted simultaneous active senders per receiver. When this limit is reached, the receiver will not advertise anything immediately to a new sender, but instead put it in a PENDING state, and add it to a corresponding queue. At the same time, it will pick the least recently active member, send it an advertisement RECLAIM message, and set this member to state RECLAIMING. - The reclaimee member has to respond with a REMIT message, meaning that it goes back to a send window of ADV_IDLE, and returns its unused advertised blocks beyond that value to the reclaiming member. - When the reclaiming member receives the REMIT message, it unlinks the reclaimee from its active list, resets its state to JOINED, and notes that it is now back at ADV_IDLE advertised blocks to that member. If there are still unread data messages sent out by reclaimee before the REMIT, the member goes into an intermediate state REMITTED, where it stays until the said messages have been consumed. - The returned advertised blocks can now be re-advertised to the pending member, which is now set to state ACTIVE and added to the active member list. - To be proactive, i.e., to minimize the risk that any member will end up in the pending queue, we start reclaiming resources already when the number of active members exceeds 3/4 of the permitted maximum. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/group.c | 131 ++++++++++++++++++++++++++++++++++++++++++++--- net/tipc/msg.h | 12 +++++ 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 8f0eb5d22e8f..7821085a7dd8 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -54,6 +54,10 @@ enum mbr_state { MBR_JOINING, MBR_PUBLISHED, MBR_JOINED, + MBR_PENDING, + MBR_ACTIVE, + MBR_RECLAIMING, + MBR_REMITTED, MBR_LEAVING }; @@ -79,6 +83,9 @@ struct tipc_member { struct tipc_group { struct rb_root members; struct list_head congested; + struct list_head pending; + struct list_head active; + struct list_head reclaiming; struct tipc_nlist dests; struct net *net; int subid; @@ -88,6 +95,8 @@ struct tipc_group { u32 scope; u32 portid; u16 member_cnt; + u16 active_cnt; + u16 max_active; u16 bc_snd_nxt; u16 bc_ackers; bool loopback; @@ -97,12 +106,29 @@ struct tipc_group { static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); +static void tipc_group_decr_active(struct tipc_group *grp, + struct tipc_member *m) +{ + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + grp->active_cnt--; +} + static int tipc_group_rcvbuf_limit(struct tipc_group *grp) { + int max_active, active_pool, idle_pool; int mcnt = grp->member_cnt + 1; + /* Limit simultaneous reception from other members */ + max_active = min(mcnt / 8, 64); + max_active = max(max_active, 16); + grp->max_active = max_active; + + /* Reserve blocks for active and idle members */ + active_pool = max_active * ADV_ACTIVE; + idle_pool = (mcnt - max_active) * ADV_IDLE; + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ - return mcnt * ADV_ACTIVE * FLOWCTL_BLK_SZ * 4; + return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4; } u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) @@ -143,6 +169,9 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, return NULL; tipc_nlist_init(&grp->dests, tipc_own_addr(net)); INIT_LIST_HEAD(&grp->congested); + INIT_LIST_HEAD(&grp->active); + INIT_LIST_HEAD(&grp->pending); + INIT_LIST_HEAD(&grp->reclaiming); grp->members = RB_ROOT; grp->net = net; grp->portid = portid; @@ -286,6 +315,7 @@ static void tipc_group_delete_member(struct tipc_group *grp, list_del_init(&m->list); list_del_init(&m->congested); + tipc_group_decr_active(grp, m); /* If last member on a node, remove node from dest list */ if (!tipc_group_find_node(grp, m->node)) @@ -378,6 +408,10 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, return true; if (state == MBR_JOINED && adv == ADV_IDLE) return true; + if (state == MBR_ACTIVE && adv == ADV_ACTIVE) + return true; + if (state == MBR_PENDING && adv == ADV_IDLE) + return true; skb_queue_head_init(&xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); tipc_node_distr_xmit(grp->net, &xmitq); @@ -523,7 +557,11 @@ drop: void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq) { - struct tipc_member *m; + struct list_head *active = &grp->active; + int max_active = grp->max_active; + int reclaim_limit = max_active * 3 / 4; + int active_cnt = grp->active_cnt; + struct tipc_member *m, *rm; m = tipc_group_find_member(grp, node, port); if (!m) @@ -533,9 +571,41 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, switch (m->state) { case MBR_JOINED: - if (m->advertised <= (ADV_ACTIVE - ADV_UNIT)) - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + /* Reclaim advertised space from least active member */ + if (!list_empty(active) && active_cnt >= reclaim_limit) { + rm = list_first_entry(active, struct tipc_member, list); + rm->state = MBR_RECLAIMING; + list_move_tail(&rm->list, &grp->reclaiming); + tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); + } + /* If max active, become pending and wait for reclaimed space */ + if (active_cnt >= max_active) { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + break; + } + /* Otherwise become active */ + m->state = MBR_ACTIVE; + list_add_tail(&m->list, &grp->active); + grp->active_cnt++; + /* Fall through */ + case MBR_ACTIVE: + if (!list_is_last(&m->list, &grp->active)) + list_move_tail(&m->list, &grp->active); + if (m->advertised > (ADV_ACTIVE * 3 / 4)) + break; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); break; + case MBR_REMITTED: + if (m->advertised > ADV_IDLE) + break; + m->state = MBR_JOINED; + if (m->advertised < ADV_IDLE) { + pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } + break; + case MBR_RECLAIMING: case MBR_DISCOVERED: case MBR_JOINING: case MBR_LEAVING: @@ -557,8 +627,10 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, if (!skb) return; - if (m->state == MBR_JOINED) + if (m->state == MBR_ACTIVE) adv = ADV_ACTIVE - m->advertised; + else if (m->state == MBR_JOINED || m->state == MBR_PENDING) + adv = ADV_IDLE - m->advertised; hdr = buf_msg(skb); @@ -573,6 +645,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, m->advertised += adv; } else if (mtyp == GRP_ACK_MSG) { msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); + } else if (mtyp == GRP_REMIT_MSG) { + msg_set_grp_remitted(hdr, m->window); } __skb_queue_tail(xmitq, skb); } @@ -583,8 +657,9 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, { u32 node = msg_orignode(hdr); u32 port = msg_origport(hdr); - struct tipc_member *m; + struct tipc_member *m, *pm; struct tipc_msg *ehdr; + u16 remitted, in_flight; if (!grp) return; @@ -626,6 +701,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, /* Wait until WITHDRAW event is received */ if (m->state != MBR_LEAVING) { + tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; return; } @@ -653,6 +729,48 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, *usr_wakeup = true; m->usr_pending = false; return; + case GRP_RECLAIM_MSG: + if (!m) + return; + *usr_wakeup = m->usr_pending; + m->usr_pending = false; + tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); + m->window = ADV_IDLE; + return; + case GRP_REMIT_MSG: + if (!m || m->state != MBR_RECLAIMING) + return; + + list_del_init(&m->list); + grp->active_cnt--; + remitted = msg_grp_remitted(hdr); + + /* Messages preceding the REMIT still in receive queue */ + if (m->advertised > remitted) { + m->state = MBR_REMITTED; + in_flight = m->advertised - remitted; + } + /* All messages preceding the REMIT have been read */ + if (m->advertised <= remitted) { + m->state = MBR_JOINED; + in_flight = 0; + } + /* ..and the REMIT overtaken by more messages => re-advertise */ + if (m->advertised < remitted) + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + + m->advertised = ADV_IDLE + in_flight; + + /* Set oldest pending member to active and advertise */ + if (list_empty(&grp->pending)) + return; + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + if (pm->advertised <= (ADV_ACTIVE * 3 / 4)) + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + return; default: pr_warn("Received unknown GROUP_PROTO message\n"); } @@ -735,6 +853,7 @@ void tipc_group_member_evt(struct tipc_group *grp, /* Hold back event if more messages might be expected */ if (m->state != MBR_LEAVING && node_up) { m->event_msg = skb; + tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; } else { if (node_up) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 52c6a2e01995..cedf811317fb 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -548,6 +548,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define GRP_LEAVE_MSG 1 #define GRP_ADV_MSG 2 #define GRP_ACK_MSG 3 +#define GRP_RECLAIM_MSG 4 +#define GRP_REMIT_MSG 5 /* * Word 1 @@ -850,6 +852,16 @@ static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_grp_remitted(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + /* Word 10 */ static inline u16 msg_grp_evt(struct tipc_msg *m) From 4e8b86c062695454df0b76f3fee4fab8dc4bb716 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:06 -0700 Subject: [PATCH 0940/2177] mqprio: Introduce new hardware offload mode and shaper in mqprio The offload types currently supported in mqprio are 0 (no offload) and 1 (offload only TCs) by setting these values for the 'hw' option. If offloads are supported by setting the 'hw' option to 1, the default offload mode is 'dcb' where only the TC values are offloaded to the device. This patch introduces a new hardware offload mode called 'channel' with 'hw' set to 1 in mqprio which makes full use of the mqprio options, the TCs, the queue configurations and the QoS parameters for the TCs. This is achieved through a new netlink attribute for the 'mode' option which takes values such as 'dcb' (default) and 'channel'. The 'channel' mode also supports QoS attributes for traffic class such as minimum and maximum values for bandwidth rate limits. This patch enables configuring additional HW shaper attributes associated with a traffic class. Currently the shaper for bandwidth rate limiting is supported which takes options such as minimum and maximum bandwidth rates and are offloaded to the hardware in the 'channel' mode. The min and max limits for bandwidth rates are provided by the user along with the TCs and the queue configurations when creating the mqprio qdisc. The interface can be extended to support new HW shapers in future through the 'shaper' attribute. Introduces a new data structure 'tc_mqprio_qopt_offload' for offloading mqprio queue options and use this to be shared between the kernel and device driver. This contains a copy of the existing data structure for mqprio queue options. This new data structure can be extended when adding new attributes for traffic class such as mode, shaper, shaper parameters (bandwidth rate limits). The existing data structure for mqprio queue options will be shared between the kernel and userspace. Example: queues 4@0 4@4 hw 1 mode channel shaper bw_rlimit\ min_rate 1Gbit 2Gbit max_rate 4Gbit 5Gbit To dump the bandwidth rates: qdisc mqprio 804a: root tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 queues:(0:3) (4:7) mode:channel shaper:bw_rlimit min_rate:1Gbit 2Gbit max_rate:4Gbit 5Gbit Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/net/pkt_cls.h | 9 ++ include/uapi/linux/pkt_sched.h | 32 ++++++ net/sched/sch_mqprio.c | 183 +++++++++++++++++++++++++++++++-- 3 files changed, 215 insertions(+), 9 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f5263743076b..60d39789e4f0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -546,6 +546,15 @@ struct tc_cls_bpf_offload { u32 gen_flags; }; +struct tc_mqprio_qopt_offload { + /* struct tc_mqprio_qopt must always be the first element */ + struct tc_mqprio_qopt qopt; + u16 mode; + u16 shaper; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; +}; /* This structure holds cookie structure that is passed from user * to the kernel for actions and classifiers diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 099bf5528fed..e95b5c9b9fad 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -625,6 +625,22 @@ enum { #define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; @@ -633,6 +649,22 @@ struct tc_mqprio_qopt { __u16 offset[TC_QOPT_MAX_QUEUE]; }; +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + /* SFB */ enum { diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6bcdfe6e7b63..f1ae9be83934 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -18,10 +18,16 @@ #include #include #include +#include struct mqprio_sched { struct Qdisc **qdiscs; + u16 mode; + u16 shaper; int hw_offload; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; }; static void mqprio_destroy(struct Qdisc *sch) @@ -39,9 +45,17 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { - struct tc_mqprio_qopt mqprio = {}; + struct tc_mqprio_qopt_offload mqprio = { { 0 } }; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio); + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + case TC_MQPRIO_MODE_CHANNEL: + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, + &mqprio); + break; + default: + return; + } } else { netdev_set_num_tc(dev, 0); } @@ -97,6 +111,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return 0; } +static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = { + [TCA_MQPRIO_MODE] = { .len = sizeof(u16) }, + [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) }, + [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED }, +}; + +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); @@ -105,6 +139,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; + struct nlattr *tb[TCA_MQPRIO_MAX + 1]; + struct nlattr *attr; + int rem; + int len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -122,6 +160,58 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (mqprio_parse_opt(dev, qopt)) return -EINVAL; + if (len > 0) { + err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, + sizeof(*qopt)); + if (err < 0) + return err; + + if (!qopt->hw) + return -EINVAL; + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + } + /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); @@ -146,14 +236,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_mqprio_qopt mqprio = *qopt; + struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt}; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + if (priv->shaper != TC_MQPRIO_SHAPER_DCB) + return -EINVAL; + break; + case TC_MQPRIO_MODE_CHANNEL: + mqprio.flags = priv->flags; + if (priv->flags & TC_MQPRIO_F_MODE) + mqprio.mode = priv->mode; + if (priv->flags & TC_MQPRIO_F_SHAPER) + mqprio.shaper = priv->shaper; + if (priv->flags & TC_MQPRIO_F_MIN_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.min_rate[i] = priv->min_rate[i]; + if (priv->flags & TC_MQPRIO_F_MAX_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.max_rate[i] = priv->max_rate[i]; + break; + default: + return -EINVAL; + } + err = dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_MQPRIO, &mqprio); if (err) return err; - priv->hw_offload = mqprio.hw; + priv->hw_offload = mqprio.qopt.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -223,11 +335,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return 0; } +static int dump_rates(struct mqprio_sched *priv, + struct tc_mqprio_qopt *opt, struct sk_buff *skb) +{ + struct nlattr *nest; + int i; + + if (priv->flags & TC_MQPRIO_F_MIN_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MIN_RATE64, + sizeof(priv->min_rate[i]), + &priv->min_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + + if (priv->flags & TC_MQPRIO_F_MAX_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MAX_RATE64, + sizeof(priv->max_rate[i]), + &priv->max_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; @@ -258,12 +410,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) + if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt)) goto nla_put_failure; - return skb->len; + if ((priv->flags & TC_MQPRIO_F_MODE) && + nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_SHAPER) && + nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MIN_RATE || + priv->flags & TC_MQPRIO_F_MAX_RATE) && + (dump_rates(priv, &opt, skb) != 0)) + goto nla_put_failure; + + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } From ff4241881232265dbc237591901116fe272967d6 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:11 -0700 Subject: [PATCH 0941/2177] i40e: Add macro for PF reset bit Introduce a macro for the bit setting the PF reset flag and update its usages. This makes it easier to use this flag in functions to be introduced in future without encountering checkpatch issues related to alignment and line over 80 characters. Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 3 +-- drivers/net/ethernet/intel/i40e/i40e_main.c | 9 ++++----- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 8139b4ee1dc3..e7c7a853cf7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -157,6 +157,8 @@ enum i40e_state_t { __I40E_STATE_SIZE__, }; +#define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED) + /* VSI state flags */ enum i40e_vsi_state_t { __I40E_VSI_DOWN, diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 6f2725fc50a1..2b8bbc84e34f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, */ if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, - BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4de52001a2b9..6190257eecfe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5747,7 +5747,7 @@ err_setup_rx: err_setup_tx: i40e_vsi_free_tx_resources(vsi); if (vsi == pf->vsi[pf->lan_vsi]) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return err; } @@ -5875,7 +5875,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) wr32(&pf->hw, I40E_GLGEN_RTRIG, val); i40e_flush(&pf->hw); - } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) { + } else if (reset_flags & I40E_PF_RESET_FLAG) { /* Request a PF Reset * @@ -9223,7 +9223,7 @@ static int i40e_set_features(struct net_device *netdev, need_reset = i40e_set_ntuple(pf, features); if (need_reset) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return 0; } @@ -9475,8 +9475,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; else pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), - true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); break; } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 0c4fa225c7be..e7f98e306554 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1425,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, - BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } return i40e_pci_sriov_enable(pdev, num_vfs); } @@ -1434,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); return -EINVAL; From 8f88b3034db3be2eb600b9f57012bc63f1ea197f Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:17 -0700 Subject: [PATCH 0942/2177] i40e: Add infrastructure for queue channel support This patch sets up the infrastructure for offloading TCs and queue configurations to the hardware by creating HW channels(VSI). A new channel is created for each of the traffic class configuration offloaded via mqprio framework except for the first TC (TC0). TC0 for the main VSI is also reconfigured as per user provided queue parameters. Queue counts that are not power-of-2 are handled by reconfiguring RSS by reprogramming LUTs using the queue count value. This patch also handles configuring the TX rings for the channels, setting up the RX queue map for channel. Also, the channels so created are removed and all the queue configuration is set to default when the qdisc is detached from the root of the device. Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 32 + drivers/net/ethernet/intel/i40e/i40e_main.c | 718 +++++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 + 3 files changed, 743 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index e7c7a853cf7f..bde982541772 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -87,6 +87,7 @@ #define I40E_AQ_LEN 256 #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_MAX_USER_PRIORITY 8 +#define I40E_MAX_QUEUES_PER_CH 64 #define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) #define I40E_DEFAULT_MSG_ENABLE 4 #define I40E_QUEUE_WAIT_RETRY_LIMIT 10 @@ -340,6 +341,23 @@ struct i40e_flex_pit { u8 pit_index; }; +struct i40e_channel { + struct list_head list; + bool initialized; + u8 type; + u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */ + u16 stat_counter_idx; + u16 base_queue; + u16 num_queue_pairs; /* Requested by user */ + u16 seid; + + u8 enabled_tc; + struct i40e_aqc_vsi_properties_data info; + + /* track this channel belongs to which VSI */ + struct i40e_vsi *parent_vsi; +}; + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -456,6 +474,7 @@ struct i40e_pf { #define I40E_FLAG_CLIENT_RESET BIT(26) #define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27) #define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28) +#define I40E_FLAG_TC_MQPRIO BIT(29) struct i40e_client_instance *cinst; bool stat_offsets_loaded; @@ -536,6 +555,8 @@ struct i40e_pf { u32 ioremap_len; u32 fd_inv; u16 phy_led_val; + + u16 override_q_count; }; /** @@ -700,6 +721,15 @@ struct i40e_vsi { bool current_isup; /* Sync 'link up' logging */ enum i40e_aq_link_speed current_speed; /* Sync link speed logging */ + /* channel specific fields */ + u16 cnt_q_avail; /* num of queues available for channel usage */ + u16 orig_rss_size; + u16 current_rss_size; + + u16 next_base_queue; /* next queue to be used for channel setup */ + + struct list_head ch_list; + void *priv; /* client driver data reference. */ /* VSI specific handlers */ @@ -1004,4 +1034,6 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) { return !!vsi->xdp_prog; } + +int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6190257eecfe..e23105bee6d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2881,7 +2881,7 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { int cpu; - if (!ring->q_vector || !ring->netdev) + if (!ring->q_vector || !ring->netdev || ring->ch) return; /* We only initialize XPS once, so as not to overwrite user settings */ @@ -2944,7 +2944,14 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) * initialization. This has to be done regardless of * DCB as by default everything is mapped to TC0. */ - tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + + if (ring->ch) + tx_ctx.rdylist = + le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]); + + else + tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + tx_ctx.rdylist_act = 0; /* clear the context in the HMC */ @@ -2966,12 +2973,23 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) } /* Now associate this queue with this PCI function */ - if (vsi->type == I40E_VSI_VMDQ2) { - qtx_ctl = I40E_QTX_CTL_VM_QUEUE; - qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & - I40E_QTX_CTL_VFVM_INDX_MASK; + if (ring->ch) { + if (ring->ch->type == I40E_VSI_VMDQ2) + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + else + return -EINVAL; + + qtx_ctl |= (ring->ch->vsi_number << + I40E_QTX_CTL_VFVM_INDX_SHIFT) & + I40E_QTX_CTL_VFVM_INDX_MASK; } else { - qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + if (vsi->type == I40E_VSI_VMDQ2) { + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & + I40E_QTX_CTL_VFVM_INDX_MASK; + } else { + qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + } } qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & @@ -5161,6 +5179,643 @@ out: return ret; } +/** + * i40e_remove_queue_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * + * Remove queue channels for the TCs + **/ +static void i40e_remove_queue_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret, i; + + /* Reset rss size that was stored when reconfiguring rss for + * channel VSIs with non-power-of-2 queue count. + */ + vsi->current_rss_size = 0; + + /* perform cleanup for channels if they exist */ + if (list_empty(&vsi->ch_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct i40e_vsi *p_vsi; + + list_del(&ch->list); + p_vsi = ch->parent_vsi; + if (!p_vsi || !ch->initialized) { + kfree(ch); + continue; + } + /* Reset queue contexts */ + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } + + /* delete VSI from FW */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, p_vsi->seid); + kfree(ch); + } + INIT_LIST_HEAD(&vsi->ch_list); +} + +/** + * i40e_is_any_channel - channel exist or not + * @vsi: ptr to VSI to which channels are associated with + * + * Returns true or false if channel(s) exist for associated VSI or not + **/ +static bool i40e_is_any_channel(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (ch->initialized) + return true; + } + + return false; +} + +/** + * i40e_get_max_queues_for_channel + * @vsi: ptr to VSI to which channels are associated with + * + * Helper function which returns max value among the queue counts set on the + * channels/TCs created. + **/ +static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int max = 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + continue; + if (ch->num_queue_pairs > max) + max = ch->num_queue_pairs; + } + + return max; +} + +/** + * i40e_validate_num_queues - validate num_queues w.r.t channel + * @pf: ptr to PF device + * @num_queues: number of queues + * @vsi: the parent VSI + * @reconfig_rss: indicates should the RSS be reconfigured or not + * + * This function validates number of queues in the context of new channel + * which is being established and determines if RSS should be reconfigured + * or not for parent VSI. + **/ +static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues, + struct i40e_vsi *vsi, bool *reconfig_rss) +{ + int max_ch_queues; + + if (!reconfig_rss) + return -EINVAL; + + *reconfig_rss = false; + + if (num_queues > I40E_MAX_QUEUES_PER_CH) { + dev_err(&pf->pdev->dev, + "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n", + num_queues, I40E_MAX_QUEUES_PER_CH); + return -EINVAL; + } + + if (vsi->current_rss_size) { + if (num_queues > vsi->current_rss_size) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) > vsi's current_size(%d)\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } else if ((num_queues < vsi->current_rss_size) && + (!is_power_of_2(num_queues))) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } + } + + if (!is_power_of_2(num_queues)) { + /* Find the max num_queues configured for channel if channel + * exist. + * if channel exist, then enforce 'num_queues' to be more than + * max ever queues configured for channel. + */ + max_ch_queues = i40e_get_max_queues_for_channel(vsi); + if (num_queues < max_ch_queues) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < max queues configured for channel(%d)\n", + num_queues, max_ch_queues); + return -EINVAL; + } + *reconfig_rss = true; + } + + return 0; +} + +/** + * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size + * @vsi: the VSI being setup + * @rss_size: size of RSS, accordingly LUT gets reprogrammed + * + * This function reconfigures RSS by reprogramming LUTs using 'rss_size' + **/ +static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + struct i40e_hw *hw = &pf->hw; + int local_rss_size; + u8 *lut; + int ret; + + if (!vsi->rss_size) + return -EINVAL; + + if (rss_size > vsi->rss_size) + return -EINVAL; + + local_rss_size = min_t(int, vsi->rss_size, rss_size); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Ignoring user configured lut if there is one */ + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + kfree(lut); + return ret; + } + kfree(lut); + + /* Do the update w.r.t. storing rss_size */ + if (!vsi->orig_rss_size) + vsi->orig_rss_size = vsi->rss_size; + vsi->current_rss_size = local_rss_size; + + return ret; +} + +/** + * i40e_channel_setup_queue_map - Setup a channel queue map + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ctxt: VSI context structure + * @ch: ptr to channel structure + * + * Setup queue map for a specific channel + **/ +static void i40e_channel_setup_queue_map(struct i40e_pf *pf, + struct i40e_vsi_context *ctxt, + struct i40e_channel *ch) +{ + u16 qcount, qmap, sections = 0; + u8 offset = 0; + int pow; + + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + + qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix); + ch->num_queue_pairs = qcount; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(qcount); + if (!is_power_of_2(qcount)) + pow++; + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + + ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */ + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); +} + +/** + * i40e_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @uplink_seid: underlying HW switching element (VEB) ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + **/ +static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, + struct i40e_channel *ch) +{ + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u8 enabled_tc = 0x1; /* TC0 enabled */ + int ret; + + if (ch->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "add new vsi failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + if (ch->type == I40E_VSI_VMDQ2) + ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; + + if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) { + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + } + + /* Set queue map for a given VSI context */ + i40e_channel_setup_queue_map(pf, &ctxt, ch); + + /* Now time to create VSI */ + ret = i40e_aq_add_vsi(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "add new vsi failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return -ENOENT; + } + + /* Success, update channel */ + ch->enabled_tc = enabled_tc; + ch->seid = ctxt.seid; + ch->vsi_number = ctxt.vsi_number; + ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); + + /* copy just the sections touched not the entire info + * since not all sections are valid as returned by + * update vsi params + */ + ch->info.mapping_flags = ctxt.info.mapping_flags; + memcpy(&ch->info.queue_mapping, + &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping)); + memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping, + sizeof(ctxt.info.tc_mapping)); + + return 0; +} + +static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch, + u8 *bw_share) +{ + struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + i40e_status ret; + int i; + + bw_data.tc_valid_bits = ch->enabled_tc; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + bw_data.tc_bw_credits[i] = bw_share[i]; + + ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid, + &bw_data, NULL); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n", + vsi->back->hw.aq.asq_last_status, ch->seid); + return -EINVAL; + } + + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + ch->info.qs_handle[i] = bw_data.qs_handles[i]; + + return 0; +} + +/** + * i40e_channel_config_tx_ring - config TX ring associated with new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure TX rings associated with channel (VSI) since queues are being + * from parent VSI. + **/ +static int i40e_channel_config_tx_ring(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + i40e_status ret; + int i; + u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; + + /* Enable ETS TCs with equal BW Share for now across all VSIs */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (ch->enabled_tc & BIT(i)) + bw_share[i] = 1; + } + + /* configure BW for new VSI */ + ret = i40e_channel_config_bw(vsi, ch, bw_share); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed configuring TC map %d for channel (seid %u)\n", + ch->enabled_tc, ch->seid); + return ret; + } + + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + + /* Get to TX ring ptr of main VSI, for re-setup TX queue + * context + */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = ch; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = ch; + } + + return 0; +} + +/** + * i40e_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @uplink_seid: underlying HW switching element (VEB) ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures TX rings accordingly + **/ +static inline int i40e_setup_hw_channel(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch, + u16 uplink_seid, u8 type) +{ + int ret; + + ch->initialized = false; + ch->base_queue = vsi->next_base_queue; + ch->type = type; + + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(pf, uplink_seid, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to add_channel using uplink_seid %u\n", + uplink_seid); + return ret; + } + + /* Mark the successful creation of channel */ + ch->initialized = true; + + /* Reconfigure TX queues using QTX_CTL register */ + ret = i40e_channel_config_tx_ring(pf, vsi, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to configure TX rings for channel %u\n", + ch->seid); + return ret; + } + + /* update 'next_base_queue' */ + vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs; + dev_dbg(&pf->pdev->dev, + "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n", + ch->seid, ch->vsi_number, ch->stat_counter_idx, + ch->num_queue_pairs, + vsi->next_base_queue); + return ret; +} + +/** + * i40e_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @type: type of channel to be created (VMDq2/VF) + * @uplink_seid: underlying HW switching element (VEB) ID + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element (uplink_seid) + **/ +static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + u8 vsi_type; + u16 seid; + int ret; + + if (vsi->type == I40E_VSI_MAIN) { + vsi_type = I40E_VSI_VMDQ2; + } else { + dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n", + vsi->type); + return false; + } + + /* underlying switching element */ + seid = pf->vsi[pf->lan_vsi]->uplink_seid; + + /* create channel (VSI), configure TX rings */ + ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); + if (ret) { + dev_err(&pf->pdev->dev, "failed to setup hw_channel\n"); + return false; + } + + return ch->initialized ? true : false; +} + +/** + * i40e_create_queue_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + **/ +int i40e_create_queue_channel(struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + struct i40e_pf *pf = vsi->back; + bool reconfig_rss; + int err; + + if (!ch) + return -EINVAL; + + if (!ch->num_queue_pairs) { + dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* validate user requested num_queues for channel */ + err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi, + &reconfig_rss); + if (err) { + dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* By default we are in VEPA mode, if this is the first VF/VMDq + * VSI to be added switch to VEB mode. + */ + if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || + (!i40e_is_any_channel(vsi))) { + if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { + dev_dbg(&pf->pdev->dev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + return -EINVAL; + } + + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, + true); + else + i40e_do_reset_safe(pf, + I40E_PF_RESET_FLAG); + } + } + /* now onwards for main VSI, number of queues will be value + * of TC0's queue count + */ + } + + /* By this time, vsi->cnt_q_avail shall be set to non-zero and + * it should be more than num_queues + */ + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) { + dev_dbg(&pf->pdev->dev, + "Error: cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_queue_pairs); + return -EINVAL; + } + + /* reconfig_rss only if vsi type is MAIN_VSI */ + if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) { + err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs); + if (err) { + dev_info(&pf->pdev->dev, + "Error: unable to reconfig rss for num_queues (%u)\n", + ch->num_queue_pairs); + return -EINVAL; + } + } + + if (!i40e_setup_channel(pf, vsi, ch)) { + dev_info(&pf->pdev->dev, "Failed to setup channel\n"); + return -EINVAL; + } + + dev_info(&pf->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + ch->seid, ch->num_queue_pairs); + + /* in case of VF, this will be main SRIOV VSI */ + ch->parent_vsi = vsi; + + /* and update main_vsi's count for queue_available to use */ + vsi->cnt_q_avail -= ch->num_queue_pairs; + + return 0; +} + +/** + * i40e_configure_queue_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + **/ +static int i40e_configure_queue_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch; + int ret = 0, i; + + /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ + for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (vsi->tc_config.enabled_tc & BIT(i)) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = + vsi->tc_config.tc_info[i].qcount; + ch->base_queue = + vsi->tc_config.tc_info[i].qoffset; + + list_add_tail(&ch->list, &vsi->ch_list); + + ret = i40e_create_queue_channel(vsi, ch); + if (ret) { + dev_err(&vsi->back->pdev->dev, + "Failed creating queue channel with TC%d: queues %d\n", + i, ch->num_queue_pairs); + goto err_free; + } + } + } + return ret; + +err_free: + i40e_remove_queue_channels(vsi); + return ret; +} + /** * i40e_veb_config_tc - Configure TCs for given VEB * @veb: given VEB @@ -5612,10 +6267,18 @@ static int i40e_setup_tc(struct net_device *netdev, u8 tc) goto exit; } - /* Unquiesce VSI */ - i40e_unquiesce_vsi(vsi); + if (pf->flags & I40E_FLAG_TC_MQPRIO) { + ret = i40e_configure_queue_channels(vsi); + if (ret) { + netdev_info(netdev, + "Failed configuring queue channels\n"); + goto exit; + } + } exit: + /* Unquiesce VSI */ + i40e_unquiesce_vsi(vsi); return ret; } @@ -7030,6 +7693,35 @@ static void i40e_fdir_teardown(struct i40e_pf *pf) i40e_vsi_release(vsi); } +/** + * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset + * @vsi: PF main vsi + * + * Rebuilds channel VSIs if they existed before reset + **/ +static int i40e_rebuild_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + i40e_status ret; + + if (list_empty(&vsi->ch_list)) + return 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + break; + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "failed to rebuild channels using uplink_seid %u\n", + vsi->uplink_seid); + return ret; + } + } + return 0; +} + /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure @@ -7295,6 +7987,13 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } + /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs + * for this main VSI if they exist + */ + ret = i40e_rebuild_channels(pf->vsi[pf->lan_vsi]); + if (ret) + goto end_unlock; + /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO. @@ -11608,6 +12307,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } + INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list); /* Make sure flow control is set according to current settings */ err = i40e_set_fc(hw, &set_fc_aq_fail, true); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index ff57ae451524..fbae1182e2ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -426,6 +426,8 @@ struct i40e_ring { * i40e_clean_rx_ring_irq() is called * for this ring. */ + + struct i40e_channel *ch; } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) From a9ce82f744dc401ec27c787e2eacf3bbb33565ec Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:22 -0700 Subject: [PATCH 0943/2177] i40e: Enable 'channel' mode in mqprio for TC configs The i40e driver is modified to enable the new mqprio hardware offload mode and factor the TCs and queue configuration by creating channel VSIs. In this mode, the priority to traffic class mapping and the user specified queue ranges are used to configure the traffic classes by setting the mode option to 'channel'. Example: map 0 0 0 0 1 2 2 3 queues 2@0 2@2 1@4 1@5\ hw 1 mode channel qdisc mqprio 8038: root tc 4 map 0 0 0 0 1 2 2 3 0 0 0 0 0 0 0 0 queues:(0:1) (2:3) (4:4) (5:5) mode:channel shaper:dcb The HW channels created are removed and all the queue configuration is set to default when the qdisc is detached from the root of the device. This patch also disables setting up channels via ethtool (ethtool -L) when the TCs are configured using mqprio scheduler. The patch also limits setting ethtool Rx flow hash indirection (ethtool -X eth0 equal N) to max queues configured via mqprio. The Rx flow hash indirection input through ethtool should be validated so that it is within in the queue range configured via tc/mqprio. The bound checking is achieved by reporting the current rss size to the kernel when queues are configured via mqprio. Example: map 0 0 0 1 0 2 3 0 queues 2@0 4@2 8@6 11@14\ hw 1 mode channel Cannot set RX flow hash configuration: Invalid argument Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 3 + .../net/ethernet/intel/i40e/i40e_ethtool.c | 8 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 461 ++++++++++++++---- 3 files changed, 364 insertions(+), 108 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index bde982541772..024c88474951 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -54,6 +54,7 @@ #include #include #include +#include #include "i40e_type.h" #include "i40e_prototype.h" #include "i40e_client.h" @@ -700,6 +701,7 @@ struct i40e_vsi { enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */ + struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */ struct i40e_tc_configuration tc_config; struct i40e_aqc_vsi_properties_data info; @@ -725,6 +727,7 @@ struct i40e_vsi { u16 cnt_q_avail; /* num of queues available for channel usage */ u16 orig_rss_size; u16 current_rss_size; + bool reconfig_rss; u16 next_base_queue; /* next queue to be used for channel setup */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index afd3ca8d9851..72d5f2cdf419 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2652,7 +2652,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = vsi->num_queue_pairs; + cmd->data = vsi->rss_size; ret = 0; break; case ETHTOOL_GRXFH: @@ -3897,6 +3897,12 @@ static int i40e_set_channels(struct net_device *dev, if (vsi->type != I40E_VSI_MAIN) return -EINVAL; + /* We do not support setting channels via ethtool when TCs are + * configured through mqprio + */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return -EINVAL; + /* verify they are not requesting separate vectors */ if (!count || ch->rx_count || ch->tx_count) return -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e23105bee6d1..e803aa1552c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1588,6 +1588,170 @@ static int i40e_set_mac(struct net_device *netdev, void *p) return 0; } +/** + * i40e_config_rss_aq - Prepare for RSS using AQ commands + * @vsi: vsi structure + * @seed: RSS hash seed + **/ +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + struct i40e_aqc_get_set_rss_key_data *seed_dw = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS key, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + + ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + return ret; +} + +/** + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used + * @vsi: VSI structure + **/ +static int i40e_vsi_config_rss(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; + int ret; + + if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)) + return 0; + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use the user configured hash keys and lookup table if there is one, + * otherwise use default + */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + return ret; +} + +/** + * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @enabled_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + **/ +static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi, + struct i40e_vsi_context *ctxt, + u8 enabled_tc) +{ + u16 qcount = 0, max_qcount, qmap, sections = 0; + int i, override_q, pow, num_qps, ret; + u8 netdev_tc = 0, offset = 0; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc; + vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; + num_qps = vsi->mqprio_qopt.qopt.count[0]; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(num_qps); + if (!is_power_of_2(num_qps)) + pow++; + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue offset/count for all TCs for given VSI */ + max_qcount = vsi->mqprio_qopt.qopt.count[0]; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* See if the given TC is enabled for the given VSI */ + if (vsi->tc_config.enabled_tc & BIT(i)) { + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount = vsi->mqprio_qopt.qopt.count[i]; + if (qcount > max_qcount) + max_qcount = qcount; + vsi->tc_config.tc_info[i].qoffset = offset; + vsi->tc_config.tc_info[i].qcount = qcount; + vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++; + } else { + /* TC is not enabled so set the offset to + * default queue and allocate one queue + * for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_queue_pairs = offset + qcount; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with max queue count */ + vsi->rss_size = max_qcount; + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues (%u)\n", + max_qcount); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured rss with num_queues (%u)\n", max_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + override_q = vsi->mqprio_qopt.qopt.count[0]; + if (override_q && override_q < vsi->num_queue_pairs) { + vsi->cnt_q_avail = vsi->num_queue_pairs - override_q; + vsi->next_base_queue = override_q; + } + return 0; +} + /** * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc * @vsi: the VSI being setup @@ -1626,7 +1790,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, numtc = 1; } } else { - /* At least TC0 is enabled in case of non-DCB case */ + /* At least TC0 is enabled in non-DCB, non-MQPRIO case */ numtc = 1; } @@ -3158,6 +3322,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) rx_ring->dcb_tc = 0; tx_ring->dcb_tc = 0; } + return; } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { @@ -4873,6 +5038,24 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) return enabled_tc; } +/** + * i40e_mqprio_get_enabled_tc - Get enabled traffic classes + * @pf: PF being queried + * + * Query the current MQPRIO configuration and return the number of + * traffic classes enabled. + **/ +static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + u8 num_tc = vsi->mqprio_qopt.qopt.num_tc; + u8 enabled_tc = 1, i; + + for (i = 1; i < num_tc; i++) + enabled_tc |= BIT(i); + return enabled_tc; +} + /** * i40e_pf_get_num_tc - Get enabled traffic classes for PF * @pf: PF being queried @@ -4886,7 +5069,10 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) u8 num_tc = 0; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; - /* If DCB is not enabled then always in single TC */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc; + + /* If neither MQPRIO nor DCB is enabled, then always use single TC */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return 1; @@ -4915,7 +5101,12 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) **/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { - /* If DCB is not enabled for this PF then just return default TC */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return i40e_mqprio_get_enabled_tc(pf); + + /* If neither MQPRIO nor DCB is enabled for this PF then just return + * default TC + */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return I40E_DEFAULT_TRAFFIC_CLASS; @@ -5005,6 +5196,9 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, i40e_status ret; int i; + if ((vsi->back->flags & I40E_FLAG_TC_MQPRIO) || + !vsi->mqprio_qopt.qopt.hw) + return 0; bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; @@ -5067,6 +5261,9 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) vsi->tc_config.tc_info[i].qoffset); } + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return; + /* Assign UP2TC map for the VSI */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { /* Get the actual TC# for the UP */ @@ -5117,7 +5314,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) int i; /* Check if enabled_tc is same as existing or new TCs */ - if (vsi->tc_config.enabled_tc == enabled_tc) + if (vsi->tc_config.enabled_tc == enabled_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) return ret; /* Enable ETS TCs with equal BW Share for now across all VSIs */ @@ -5140,15 +5338,37 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; - i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) { + ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); + if (ret) + goto out; + } else { + i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + } + /* On destroying the qdisc, reset vsi->rss_size, as number of enabled + * queues changed. + */ + if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) { + vsi->rss_size = min_t(int, vsi->back->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } - /* Update the VSI after updating the VSI queue-mapping information */ + /* Update the VSI after updating the VSI queue-mapping + * information + */ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, @@ -6216,54 +6436,157 @@ void i40e_down(struct i40e_vsi *vsi) } +/** + * i40e_validate_mqprio_qopt- validate queue mapping info + * @vsi: the VSI being configured + * @mqprio_qopt: queue parametrs + **/ +static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + int i; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS) + return -EINVAL; + for (i = 0; ; i++) { + if (!mqprio_qopt->qopt.count[i]) + return -EINVAL; + if (mqprio_qopt->min_rate[i] || mqprio_qopt->max_rate[i]) + return -EINVAL; + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + } + if (vsi->num_queue_pairs < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + return -EINVAL; + } + return 0; +} + +/** + * i40e_vsi_set_default_tc_config - set default values for tc configuration + * @vsi: the VSI being configured + **/ +static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) +{ + u16 qcount; + int i; + + /* Only TC0 is enabled */ + vsi->tc_config.numtc = 1; + vsi->tc_config.enabled_tc = 1; + qcount = min_t(int, vsi->alloc_queue_pairs, + i40e_pf_get_max_q_per_tc(vsi->back)); + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* For the TC that is not enabled set the offset to to default + * queue and allocate one queue for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + if (i == 0) + vsi->tc_config.tc_info[i].qcount = qcount; + else + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } +} + /** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure - * @tc: number of traffic classes to enable + * @type_data: tc offload data **/ -static int i40e_setup_tc(struct net_device *netdev, u8 tc) +static int i40e_setup_tc(struct net_device *netdev, void *type_data) { + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u8 enabled_tc = 0; + u8 enabled_tc = 0, num_tc, hw; + bool need_reset = false; int ret = -EINVAL; + u16 mode; int i; - /* Check if DCB enabled to continue */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { - netdev_info(netdev, "DCB is not enabled for adapter\n"); - goto exit; + num_tc = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + pf->flags &= ~I40E_FLAG_TC_MQPRIO; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tc; } /* Check if MFP enabled */ if (pf->flags & I40E_FLAG_MFP_ENABLED) { - netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); - goto exit; - } - - /* Check whether tc count is within enabled limit */ - if (tc > i40e_pf_get_num_tc(pf)) { - netdev_info(netdev, "TC count greater than enabled on link for adapter\n"); - goto exit; + netdev_info(netdev, + "Configuring TC not supported in MFP mode\n"); + return ret; + } + switch (mode) { + case TC_MQPRIO_MODE_DCB: + pf->flags &= ~I40E_FLAG_TC_MQPRIO; + + /* Check if DCB enabled to continue */ + if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { + netdev_info(netdev, + "DCB is not enabled for adapter\n"); + return ret; + } + + /* Check whether tc count is within enabled limit */ + if (num_tc > i40e_pf_get_num_tc(pf)) { + netdev_info(netdev, + "TC count greater than enabled on link for adapter\n"); + return ret; + } + break; + case TC_MQPRIO_MODE_CHANNEL: + if (pf->flags & I40E_FLAG_DCB_ENABLED) { + netdev_info(netdev, + "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); + return ret; + } + if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + return ret; + ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) + return ret; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, + sizeof(*mqprio_qopt)); + pf->flags |= I40E_FLAG_TC_MQPRIO; + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + break; + default: + return -EINVAL; } +config_tc: /* Generate TC map for number of tc requested */ - for (i = 0; i < tc; i++) + for (i = 0; i < num_tc; i++) enabled_tc |= BIT(i); /* Requesting same TC configuration as already enabled */ - if (enabled_tc == vsi->tc_config.enabled_tc) + if (enabled_tc == vsi->tc_config.enabled_tc && + mode != TC_MQPRIO_MODE_CHANNEL) return 0; /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); + if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO)) + i40e_remove_queue_channels(vsi); + /* Configure VSI for enabled TCs */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n", vsi->seid); + need_reset = true; goto exit; } @@ -6272,11 +6595,18 @@ static int i40e_setup_tc(struct net_device *netdev, u8 tc) if (ret) { netdev_info(netdev, "Failed configuring queue channels\n"); + need_reset = true; goto exit; } } exit: + /* Reset the configuration data to defaults, only TC0 is enabled */ + if (need_reset) { + i40e_vsi_set_default_tc_config(vsi); + need_reset = false; + } + /* Unquiesce VSI */ i40e_unquiesce_vsi(vsi); return ret; @@ -6285,14 +6615,10 @@ exit: static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { - struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) return -EOPNOTSUPP; - mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - - return i40e_setup_tc(netdev, mqprio->num_tc); + return i40e_setup_tc(netdev, type_data); } /** @@ -9153,45 +9479,6 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) return err; } -/** - * i40e_config_rss_aq - Prepare for RSS using AQ commands - * @vsi: vsi structure - * @seed: RSS hash seed - **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - int ret = 0; - - if (seed) { - struct i40e_aqc_get_set_rss_key_data *seed_dw = - (struct i40e_aqc_get_set_rss_key_data *)seed; - ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; - - ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - return ret; -} - /** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure @@ -9238,46 +9525,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, return ret; } -/** - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used - * @vsi: VSI structure - **/ -static int i40e_vsi_config_rss(struct i40e_vsi *vsi) -{ - u8 seed[I40E_HKEY_ARRAY_SIZE]; - struct i40e_pf *pf = vsi->back; - u8 *lut; - int ret; - - if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)) - return 0; - - if (!vsi->rss_size) - vsi->rss_size = min_t(int, pf->alloc_rss_size, - vsi->num_queue_pairs); - if (!vsi->rss_size) - return -EINVAL; - - lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - /* Use the user configured hash keys and lookup table if there is one, - * otherwise use default - */ - if (vsi->rss_lut_user) - memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); - else - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - if (vsi->rss_hkey_user) - memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); - else - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); - kfree(lut); - - return ret; -} - /** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure From 5ecae4120a6b50fb8a31d2f335eab390bcf5ad66 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:27 -0700 Subject: [PATCH 0944/2177] i40e: Refactor VF BW rate limiting This patch refactors the BW rate limiting for Tx traffic on the VF to be reused in the next patch for rate limiting Tx traffic for the VSIs on the PF as well. Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 5 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 64 +++++++++++++++++++ .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 45 +------------ 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 024c88474951..524aa06a9e0e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -128,6 +128,10 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) +/* BW rate limiting */ +#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */ +#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ + /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -1039,4 +1043,5 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) } int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); +int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e803aa1552c6..fc6eaf44d87c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5399,6 +5399,70 @@ out: return ret; } +/** + * i40e_get_link_speed - Returns link speed for the interface + * @vsi: VSI to be configured + * + **/ +int i40e_get_link_speed(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + switch (pf->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + return 40000; + case I40E_LINK_SPEED_25GB: + return 25000; + case I40E_LINK_SPEED_20GB: + return 20000; + case I40E_LINK_SPEED_10GB: + return 10000; + case I40E_LINK_SPEED_1GB: + return 1000; + default: + return -EINVAL; + } +} + +/** + * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @seid: seid of the channel/VSI + * @max_tx_rate: max TX rate to be configured as BW limit + * + * Helper function to set BW limit for a given VSI + **/ +int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) +{ + struct i40e_pf *pf = vsi->back; + int speed = 0; + int ret = 0; + + speed = i40e_get_link_speed(vsi); + if (max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VSI seid %d.", + max_tx_rate, seid); + return -EINVAL; + } + if (max_tx_rate && max_tx_rate < 50) { + dev_warn(&pf->pdev->dev, + "Setting max tx rate to minimum usable value of 50Mbps.\n"); + max_tx_rate = 50; + } + + /* Tx rate credits are in values of 50Mbps, 0 is disabled */ + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, + max_tx_rate / I40E_BW_CREDIT_DIVISOR, + I40E_MAX_BW_INACTIVE_ACCUM, NULL); + if (ret) + dev_err(&pf->pdev->dev, + "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n", + max_tx_rate, seid, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return ret; +} + /** * i40e_remove_queue_channels - Remove queue channels for the TCs * @vsi: VSI to be configured diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e7f98e306554..ce0981e2f605 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3117,8 +3117,6 @@ error_pvid: return ret; } -#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */ -#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* device can accumulate 4 credits max */ /** * i40e_ndo_set_vf_bw * @netdev: network interface device structure @@ -3134,7 +3132,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi; struct i40e_vf *vf; - int speed = 0; int ret = 0; /* validate the request */ @@ -3159,48 +3156,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, goto error; } - switch (pf->hw.phy.link_info.link_speed) { - case I40E_LINK_SPEED_40GB: - speed = 40000; - break; - case I40E_LINK_SPEED_25GB: - speed = 25000; - break; - case I40E_LINK_SPEED_20GB: - speed = 20000; - break; - case I40E_LINK_SPEED_10GB: - speed = 10000; - break; - case I40E_LINK_SPEED_1GB: - speed = 1000; - break; - default: - break; - } - - if (max_tx_rate > speed) { - dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n", - max_tx_rate, vf->vf_id); - ret = -EINVAL; + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (ret) goto error; - } - if ((max_tx_rate < 50) && (max_tx_rate > 0)) { - dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n"); - max_tx_rate = 50; - } - - /* Tx rate credits are in values of 50Mbps, 0 is disabled*/ - ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, - max_tx_rate / I40E_BW_CREDIT_DIVISOR, - I40E_MAX_BW_INACTIVE_ACCUM, NULL); - if (ret) { - dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n", - ret); - ret = -EIO; - goto error; - } vf->tx_rate = max_tx_rate; error: return ret; From 2027d4deacb129579f022746830ea05b72fe114a Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 7 Sep 2017 04:00:32 -0700 Subject: [PATCH 0945/2177] i40e: Add support setting TC max bandwidth rates This patch enables setting up maximum Tx rates for the traffic classes in i40e. The maximum rate is offloaded to the hardware through the mqprio framework by specifying the mode option as 'channel' and shaper option as 'bw_rlimit' and is configured for the VSI. Configuring minimum Tx rate limit is not supported in the device. The minimum usable value for Tx rate is 50Mbps. Example: # tc qdisc add dev eth0 root mqprio num_tc 2 map 0 0 0 0 1 1 1 1\ queues 4@0 4@4 hw 1 mode channel shaper bw_rlimit\ max_rate 4Gbit 5Gbit To dump the bandwidth rates: # tc qdisc show dev eth0 qdisc mqprio 804a: root tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 queues:(0:3) (4:7) mode:channel shaper:bw_rlimit max_rate:4Gbit 5Gbit Signed-off-by: Amritha Nambiar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 + drivers/net/ethernet/intel/i40e/i40e_main.c | 100 ++++++++++++++++++-- 2 files changed, 93 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 524aa06a9e0e..266e1dc5e786 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -359,6 +359,8 @@ struct i40e_channel { u8 enabled_tc; struct i40e_aqc_vsi_properties_data info; + u64 max_tx_rate; + /* track this channel belongs to which VSI */ struct i40e_vsi *parent_vsi; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fc6eaf44d87c..bb31d53c4923 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5196,9 +5196,16 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, i40e_status ret; int i; - if ((vsi->back->flags & I40E_FLAG_TC_MQPRIO) || - !vsi->mqprio_qopt.qopt.hw) + if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) return 0; + if (!vsi->mqprio_qopt.qopt.hw) { + ret = i40e_set_bw_limit(vsi, vsi->seid, 0); + if (ret) + dev_info(&vsi->back->pdev->dev, + "Failed to reset tx rate for vsi->seid %u\n", + vsi->seid); + return ret; + } bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; @@ -5505,6 +5512,13 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) rx_ring->ch = NULL; } + /* Reset BW configured for this VSI via mqprio */ + ret = i40e_set_bw_limit(vsi, ch->seid, 0); + if (ret) + dev_info(&vsi->back->pdev->dev, + "Failed to reset tx rate for ch->seid %u\n", + ch->seid); + /* delete VSI from FW */ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, NULL); @@ -6047,6 +6061,17 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, "Setup channel (id:%u) utilizing num_queues %d\n", ch->seid, ch->num_queue_pairs); + /* configure VSI for BW limit */ + if (ch->max_tx_rate) { + if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) + return -EINVAL; + + dev_dbg(&pf->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + ch->max_tx_rate / I40E_BW_CREDIT_DIVISOR, ch->seid); + } + /* in case of VF, this will be main SRIOV VSI */ ch->parent_vsi = vsi; @@ -6082,6 +6107,12 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) ch->base_queue = vsi->tc_config.tc_info[i].qoffset; + /* Bandwidth limit through tc interface is in bytes/s, + * change to Mbit/s + */ + ch->max_tx_rate = + vsi->mqprio_qopt.max_rate[i] / (1000000 / 8); + list_add_tail(&ch->list, &vsi->ch_list); ret = i40e_create_queue_channel(vsi, ch); @@ -6508,6 +6539,7 @@ void i40e_down(struct i40e_vsi *vsi) static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { + u64 sum_max_rate = 0; int i; if (mqprio_qopt->qopt.offset[0] != 0 || @@ -6517,8 +6549,13 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, for (i = 0; ; i++) { if (!mqprio_qopt->qopt.count[i]) return -EINVAL; - if (mqprio_qopt->min_rate[i] || mqprio_qopt->max_rate[i]) + if (mqprio_qopt->min_rate[i]) { + dev_err(&vsi->back->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); return -EINVAL; + } + sum_max_rate += (mqprio_qopt->max_rate[i] / (1000000 / 8)); + if (i >= mqprio_qopt->qopt.num_tc - 1) break; if (mqprio_qopt->qopt.offset[i + 1] != @@ -6529,6 +6566,11 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { return -EINVAL; } + if (sum_max_rate > i40e_get_link_speed(vsi)) { + dev_err(&vsi->back->pdev->dev, + "Invalid max tx rate specified\n"); + return -EINVAL; + } return 0; } @@ -6655,6 +6697,21 @@ config_tc: } if (pf->flags & I40E_FLAG_TC_MQPRIO) { + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0] / + (1000000 / 8); + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (!ret) { + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + max_tx_rate / I40E_BW_CREDIT_DIVISOR, + vsi->seid); + } else { + need_reset = true; + goto exit; + } + } ret = i40e_configure_queue_channels(vsi); if (ret) { netdev_info(netdev, @@ -8108,6 +8165,17 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) vsi->uplink_seid); return ret; } + if (ch->max_tx_rate) { + if (i40e_set_bw_limit(vsi, ch->seid, + ch->max_tx_rate)) + return -EINVAL; + + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + ch->max_tx_rate / I40E_BW_CREDIT_DIVISOR, + ch->seid); + } } return 0; } @@ -8248,6 +8316,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; u8 set_fc_aq_fail = 0; i40e_status ret; @@ -8330,7 +8399,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * If there were VEBs but the reconstitution failed, we'll try * try to recover minimal use by getting the basic PF VSI working. */ - if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) { + if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); /* find the one VEB connected to the MAC, and find orphans */ for (v = 0; v < I40E_MAX_VEB; v++) { @@ -8354,8 +8423,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) dev_info(&pf->pdev->dev, "rebuild of switch failed: %d, will try to set up simple PF connection\n", ret); - pf->vsi[pf->lan_vsi]->uplink_seid - = pf->mac_seid; + vsi->uplink_seid = pf->mac_seid; break; } else if (pf->veb[v]->uplink_seid == 0) { dev_info(&pf->pdev->dev, @@ -8366,10 +8434,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } - if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) { + if (vsi->uplink_seid == pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n"); /* no VEB, so rebuild only the Main VSI */ - ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]); + ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); @@ -8377,10 +8445,24 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0] / (1000000 / 8); + + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (!ret) + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + max_tx_rate / I40E_BW_CREDIT_DIVISOR, + vsi->seid); + else + goto end_unlock; + } + /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs * for this main VSI if they exist */ - ret = i40e_rebuild_channels(pf->vsi[pf->lan_vsi]); + ret = i40e_rebuild_channels(vsi); if (ret) goto end_unlock; From b06da8f939ff8dcf28eb9b86aae933850658742e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 15:11:38 +0100 Subject: [PATCH 0946/2177] i40e: make const array patterns static, reduces object code size Don't populate const array patterns on the stack, instead make it static. Makes the object code smaller by over 60 bytes: Before: text data bss dec hex filename 1953 496 0 2449 991 i40e_diag.o After: text data bss dec hex filename 1798 584 0 2382 94e i40e_diag.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Acked-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c index f141e78d409e..76ed56641864 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.c +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c @@ -36,7 +36,9 @@ static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw, u32 reg, u32 mask) { - const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + static const u32 patterns[] = { + 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF + }; u32 pat, val, orig_val; int i; From 8fdb69dd383f1f937f7e2f1f24efe97c5268a84c Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 11 Oct 2017 14:49:42 -0700 Subject: [PATCH 0947/2177] i40e: fix link reporting When querying the NVM for supported phy_types, on some firmware versions, we were failing to actually fill out the phy_types which means ethtool wouldn't report any link types. Testing-hints: Check 'ethtool ' if you have the right (wrong?) firmware. Without this patch, no link modes will be reported. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 53aad378d49c..aeb497258f20 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1611,8 +1611,13 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, if (report_init) { if (hw->mac.type == I40E_MAC_XL710 && hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { status = i40e_aq_get_link_info(hw, true, NULL, NULL); + } else { + hw->phy.phy_types = le32_to_cpu(abilities->phy_type); + hw->phy.phy_types |= + ((u64)abilities->phy_type_ext << 32); + } } return status; From 17a9422de78c3a59b490b400f555635c477f1476 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 11 Oct 2017 14:49:43 -0700 Subject: [PATCH 0948/2177] i40e/i40evf: don't trust VF to reset itself When using 'ethtool -L' on a VF to change number of requested queues from PF, we shouldn't trust the VF to reset itself after making the request. Doing it that way opens the door for a potentially malicious VF to do nasty things to the PF which should never be the case. This makes it such that after VF makes a successful request, PF will then reset the VF to institute required changes. Only if the request fails will PF send a message back to VF letting it know the request was unsuccessful. Testing-hints: There should be no real functional changes. This is simply hardening against a potentially malicious VF. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 9 +++++++-- drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 7 +++---- include/linux/avf/virtchnl.h | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ce0981e2f605..f8a794b72462 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2045,8 +2045,9 @@ error_param: * @msglen: msg length * * VFs get a default number of queues but can use this message to request a - * different number. Will respond with either the number requested or the - * maximum we can support. + * different number. If the request is successful, PF will reset the VF and + * return 0. If unsuccessful, PF will send message informing VF of number of + * available queues and return result of sending VF a message. **/ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) { @@ -2077,7 +2078,11 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; } else { + /* successful request */ vf->num_req_queues = req_pairs; + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + return 0; } return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2bb81c39d85f..46c8b8a3907c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -407,6 +407,7 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num) vfres.num_queue_pairs = num; adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, (u8 *)&vfres, sizeof(vfres)); } @@ -1098,15 +1099,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case VIRTCHNL_OP_REQUEST_QUEUES: { struct virtchnl_vf_res_request *vfres = (struct virtchnl_vf_res_request *)msg; - if (vfres->num_queue_pairs == adapter->num_req_queues) { - adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; - i40evf_schedule_reset(adapter); - } else { + if (vfres->num_queue_pairs != adapter->num_req_queues) { dev_info(&adapter->pdev->dev, "Requested %d queues, PF can support %d\n", adapter->num_req_queues, vfres->num_queue_pairs); adapter->num_req_queues = 0; + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; } } break; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 60e5d90cb18a..3ce61342fa31 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -333,8 +333,8 @@ struct virtchnl_vsi_queue_config_info { * additional queues must be negotiated. This is a best effort request as it * is possible the PF does not have enough queues left to support the request. * If the PF cannot support the number requested it will respond with the - * maximum number it is able to support; otherwise it will respond with the - * number requested. + * maximum number it is able to support. If the request is successful, PF will + * then reset the VF to institute required changes. */ /* VF resource request */ From b133e0c4bc953a3c156ce7d89ae49cc27957869c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Oct 2017 17:54:01 +0200 Subject: [PATCH 0949/2177] Bluetooth: btbcm: Add entry for BCM4356A2 UART bluetooth This patch adds the device ID for the bluetooth chip used in the Broadcom BCM4356 PCI-E WiFi / UART BT chip. Successfully tested using Firmware version 0273 The upper nibble of the rev field is 2 on this device, so this commit also adds handling of 2 to the switch-case done on the upper nibble. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btbcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 06e8bed4f5eb..ae1fa390f508 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -328,6 +328,7 @@ static const struct { { 0x610c, "BCM4354" }, /* 003.001.012 */ { 0x2209, "BCM43430A1" }, /* 001.002.009 */ { 0x6119, "BCM4345C0" }, /* 003.001.025 */ + { 0x230f, "BCM4356A2" }, /* 001.003.015 */ { } }; @@ -362,6 +363,7 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len) switch ((rev & 0xf000) >> 12) { case 0: case 1: + case 2: case 3: for (i = 0; bcm_uart_subver_table[i].name; i++) { if (subver == bcm_uart_subver_table[i].subver) { From 61d220a6c2c001473011d44f34d6f36f09eb2224 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Oct 2017 17:54:02 +0200 Subject: [PATCH 0950/2177] Bluetooth: hci_bcm: Add support for BCM2E7E Tested on a GPD win with a BCM4356 PCI-E wifi/bt combo card. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index c69da0e8b79f..707c2d1b84c7 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -941,6 +941,7 @@ static const struct acpi_device_id bcm_acpi_match[] = { { "BCM2E71", (kernel_ulong_t)&acpi_bcm_int_last_gpios }, { "BCM2E7B", (kernel_ulong_t)&acpi_bcm_int_last_gpios }, { "BCM2E7C", (kernel_ulong_t)&acpi_bcm_int_last_gpios }, + { "BCM2E7E", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, { "BCM2E95", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, { "BCM2E96", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, { "BCM2EA4", (kernel_ulong_t)&acpi_bcm_int_first_gpios }, From 2d13e347498f69b4f4e95830eda88937c72d7ba6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Oct 2017 17:54:03 +0200 Subject: [PATCH 0951/2177] Revert "Bluetooth: btusb: Add workaround for Broadcom devices without product id" Commit 9834e586fa66 ("Bluetooth: btusb: Add workaround for Broadcom devices without product id") was added to deal with the BT part of the BCM4356A2 on GPD pocket laptops having an usb vid:pid of 0000:0000. After another commit to add support for the BCM UART connected BT ACPI-id BCM2E7E used on the GPD win, it turns out that the BT on the GPD pocket is connected via both USB and UART. Adding support for the BCM2E7E ACPI-id causes it to switch to UART mode. The Windows shipped with the device is using it in UART mode and the presence of the BCM2E7E ACPI-id combined with the all 0 USB vid:pid indicates that the BT part was never meant to be used in USB mode. With the recent patches to use serdev device enumeration / instantiation for UART attached ACPI enumerated BT devices, everything work OOTB in UART mode and the workaround for the all 0 USB vid:pid is no longer needed. This reverts commit 9834e586fa ("Bluetooth: btusb: Add workaround for Broadcom devices without product id"). Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7a5c06aaa181..c054d7bce490 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -66,7 +66,6 @@ static struct usb_driver btusb_driver; #define BTUSB_BCM2045 0x40000 #define BTUSB_IFNUM_2 0x80000 #define BTUSB_CW6622 0x100000 -#define BTUSB_BCM_NO_PRODID 0x200000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -171,10 +170,6 @@ static const struct usb_device_id btusb_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01), .driver_info = BTUSB_BCM_PATCHRAM }, - /* Broadcom devices with missing product id */ - { USB_DEVICE_AND_INTERFACE_INFO(0x0000, 0x0000, 0xff, 0x01, 0x01), - .driver_info = BTUSB_BCM_PATCHRAM | BTUSB_BCM_NO_PRODID }, - /* Intel Bluetooth USB Bootloader (RAM module) */ { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, @@ -2909,19 +2904,6 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info == BTUSB_IGNORE) return -ENODEV; - if (id->driver_info & BTUSB_BCM_NO_PRODID) { - struct usb_device *udev = interface_to_usbdev(intf); - - /* For the broken Broadcom devices that show 0000:0000 - * as USB vendor and product information, check that the - * manufacturer string identifies them as Broadcom based - * devices. - */ - if (!udev->manufacturer || - strcmp(udev->manufacturer, "Broadcom Corp")) - return -ENODEV; - } - if (id->driver_info & BTUSB_ATH3012) { struct usb_device *udev = interface_to_usbdev(intf); From fac72b243cc789bb209e6eca824919b42d98cfe2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 12 Oct 2017 17:24:02 -0500 Subject: [PATCH 0952/2177] Bluetooth: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. In this particular case, notice that I replaced the "deliberate fall-through..." comment with a "fall through" comment, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bcm203x.c | 2 +- drivers/bluetooth/hci_ll.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index 5ce6d4176dc3..8e9547f195ef 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -121,7 +121,7 @@ static void bcm203x_complete(struct urb *urb) } data->state = BCM203X_LOAD_FIRMWARE; - + /* fall through */ case BCM203X_LOAD_FIRMWARE: if (data->fw_sent == data->fw_size) { usb_fill_int_urb(urb, udev, usb_rcvintpipe(udev, BCM203X_IN_EP), diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 424c15aa7bb7..e2c078d61730 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -242,7 +242,7 @@ static void ll_device_want_to_wakeup(struct hci_uart *hu) * perfectly safe to always send one. */ BT_DBG("dual wake-up-indication"); - /* deliberate fall-through - do not add break */ + /* fall through */ case HCILL_ASLEEP: /* acknowledge device wake up */ if (send_hcill_cmd(HCILL_WAKE_UP_ACK, hu) < 0) { From 258bbb1b0e594ad5f5652cb526b3c63e6a7fad3d Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 12 Oct 2017 16:12:37 +0200 Subject: [PATCH 0953/2177] icmp: don't fail on fragment reassembly time exceeded The ICMP implementation currently replies to an ICMP time exceeded message (type 11) with an ICMP host unreachable message (type 3, code 1). However, time exceeded messages can either represent "time to live exceeded in transit" (code 0) or "fragment reassembly time exceeded" (code 1). Unconditionally replying to "fragment reassembly time exceeded" with host unreachable messages might cause unjustified connection resets which are now easily triggered as UFO has been removed, because, in turn, sending large buffers triggers IP fragmentation. The issue can be easily reproduced by running a lot of UDP streams which is likely to trigger IP fragmentation: # start netserver in the test namespace ip netns add test ip netns exec test netserver # create a VETH pair ip link add name veth0 type veth peer name veth0 netns test ip link set veth0 up ip -n test link set veth0 up for i in $(seq 20 29); do # assign addresses to both ends ip addr add dev veth0 192.168.$i.1/24 ip -n test addr add dev veth0 192.168.$i.2/24 # start the traffic netperf -L 192.168.$i.1 -H 192.168.$i.2 -t UDP_STREAM -l 0 & done # wait send_data: data send error: No route to host (errno 113) netperf: send_omni: send_data failed: No route to host We need to differentiate instead: if fragment reassembly time exceeded is reported, we need to silently drop the packet, if time to live exceeded is reported, maintain the current behaviour. In both cases increment the related error count "icmpInTimeExcds". While at it, fix a typo in a comment, and convert the if statement into a switch to mate it more readable. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 681e33998e03..3c1570d3e22f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. */ @@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb) if (iph->ihl < 5) /* Mangled header, drop. */ goto out_err; - if (icmph->type == ICMP_DEST_UNREACH) { + switch (icmph->type) { + case ICMP_DEST_UNREACH: switch (icmph->code & 15) { case ICMP_NET_UNREACH: case ICMP_HOST_UNREACH: @@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb) } if (icmph->code > NR_ICMP_UNREACH) goto out; - } else if (icmph->type == ICMP_PARAMETERPROB) + break; + case ICMP_PARAMETERPROB: info = ntohl(icmph->un.gateway) >> 24; + break; + case ICMP_TIME_EXCEEDED: + __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS); + if (icmph->code == ICMP_EXC_FRAGTIME) + goto out; + break; + } /* * Throw it at our lower layers From 52a76235d0c4dd259cd0df503afed4757c04ba1d Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 13 Oct 2017 10:58:36 +0100 Subject: [PATCH 0954/2177] net: stmmac: Use correct values in TQS/RQS fields Currently we are using all the available fifo size in RQS and TQS fields. This will not work correctly in multi-queues IP's because total fifo size must be splitted to the enabled queues. Correct this by computing the available fifo size per queue and setting the right value in TQS and RQS fields. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- .../net/ethernet/stmicro/stmmac/dwmac4_dma.c | 15 ++++++++----- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 +++++++++++++++++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e82b4b70b7be..c26c8a7f957f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -443,7 +443,8 @@ struct stmmac_dma_ops { int rxfifosz); void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, int fifosz); - void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel); + void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, + int fifosz); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index e84831e1b63b..898849bbc7d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -271,9 +271,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, } static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel) + u32 channel, int fifosz) { u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel)); + unsigned int tqs = fifosz / 256 - 1; if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable TX store and forward mode\n"); @@ -306,12 +307,14 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W * with reset values: TXQEN off, TQS 256 bytes. * - * Write the bits in both cases, since it will have no effect when RO. - * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might - * be RO, however, writing the whole TQS field will result in a value - * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1. + * TXQEN must be written for multi-channel operation and TQS must + * reflect the available fifo size per queue (total fifo size / number + * of enabled queues). */ - mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK; + mtl_tx_op |= MTL_OP_MODE_TXQEN; + mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK; + mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT; + writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f41661a04f23..edf245b8bce3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1750,12 +1750,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 rx_channels_count = priv->plat->rx_queues_to_use; u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; u32 txmode = 0; u32 rxmode = 0; u32 chan = 0; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->plat->force_thresh_dma_mode) { txmode = tc; @@ -1783,7 +1790,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) rxfifosz); for (chan = 0; chan < tx_channels_count; chan++) - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); @@ -1946,15 +1954,25 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan) { + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->synopsys_id >= DWMAC_CORE_4_00) { priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, rxfifosz); - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); From a0daae13776994cf90e9a7bc81cd8e4ad3959093 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 13 Oct 2017 10:58:37 +0100 Subject: [PATCH 0955/2177] net: stmmac: Disable flow ctrl for RX AVB queues and really enable TX AVB queues Flow control must be disabled for AVB enabled queues and TX AVB queues must be enabled by setting BIT(2) of TXQEN. Correct this by passing the queue mode to DMA callbacks and by checking in these functions wether we are in AVB performing the necessary adjustments. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 4 ++-- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 ++ .../net/ethernet/stmicro/stmmac/dwmac4_dma.c | 16 +++++++++---- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++++++++++++------ 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index c26c8a7f957f..e1e5ac053760 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -442,9 +442,9 @@ struct stmmac_dma_ops { void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, int rxfifosz); void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, - int fifosz); + int fifosz, u8 qmode); void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, - int fifosz); + int fifosz, u8 qmode); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d74cedf2a397..aeda3ab2d761 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -225,6 +225,8 @@ enum power_event { #define MTL_CHAN_RX_DEBUG(x) (MTL_CHANX_BASE_ADDR(x) + 0x38) #define MTL_OP_MODE_RSF BIT(5) +#define MTL_OP_MODE_TXQEN_MASK GENMASK(3, 2) +#define MTL_OP_MODE_TXQEN_AV BIT(2) #define MTL_OP_MODE_TXQEN BIT(3) #define MTL_OP_MODE_TSF BIT(1) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 898849bbc7d4..c110f6850ffa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -191,7 +191,7 @@ static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan) } static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel, int fifosz) + u32 channel, int fifosz, u8 qmode) { unsigned int rqs = fifosz / 256 - 1; u32 mtl_rx_op, mtl_rx_int; @@ -218,8 +218,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK; mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT; - /* enable flow control only if each channel gets 4 KiB or more FIFO */ - if (fifosz >= 4096) { + /* Enable flow control only if each channel gets 4 KiB or more FIFO and + * only if channel is not an AVB channel. + */ + if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) { unsigned int rfd, rfa; mtl_rx_op |= MTL_OP_MODE_EHFC; @@ -271,7 +273,7 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, } static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel, int fifosz) + u32 channel, int fifosz, u8 qmode) { u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel)); unsigned int tqs = fifosz / 256 - 1; @@ -311,7 +313,11 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, * reflect the available fifo size per queue (total fifo size / number * of enabled queues). */ - mtl_tx_op |= MTL_OP_MODE_TXQEN; + mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK; + if (qmode != MTL_QUEUE_AVB) + mtl_tx_op |= MTL_OP_MODE_TXQEN; + else + mtl_tx_op |= MTL_OP_MODE_TXQEN_AV; mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK; mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index edf245b8bce3..0e1b0a3d7b76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1754,6 +1754,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 txmode = 0; u32 rxmode = 0; u32 chan = 0; + u8 qmode = 0; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; @@ -1785,13 +1786,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /* configure all channels */ if (priv->synopsys_id >= DWMAC_CORE_4_00) { - for (chan = 0; chan < rx_channels_count; chan++) - priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz); + for (chan = 0; chan < rx_channels_count; chan++) { + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + + priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, + rxfifosz, qmode); + } + + for (chan = 0; chan < tx_channels_count; chan++) { + qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; - for (chan = 0; chan < tx_channels_count; chan++) priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, - txfifosz); + txfifosz, qmode); + } } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); @@ -1954,6 +1961,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan) { + u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use; u32 rx_channels_count = priv->plat->rx_queues_to_use; u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; @@ -1970,9 +1979,9 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, if (priv->synopsys_id >= DWMAC_CORE_4_00) { priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz); + rxfifosz, rxqmode); priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, - txfifosz); + txfifosz, txqmode); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); From 1bdec44955edc22fb840f5965987d2972307dcc9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:07 -0700 Subject: [PATCH 0956/2177] bpf: verifier: set reg_type on context accesses in second pass Use a simplified is_valid_access() callback when verifier is used for program analysis by non-host JITs. This allows us to teach the verifier about packet start and packet end offsets for direct packet access. We can extend the callback as needed but for most packet processing needs there isn't much more the offloads may require. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cdbcc4f8f6b..9755279d94cb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -813,6 +813,36 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, return err; } +static bool analyzer_is_valid_access(struct bpf_verifier_env *env, int off, + struct bpf_insn_access_aux *info) +{ + switch (env->prog->type) { + case BPF_PROG_TYPE_XDP: + switch (off) { + case offsetof(struct xdp_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct xdp_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; + case BPF_PROG_TYPE_SCHED_CLS: + switch (off) { + case offsetof(struct sk_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct sk_buff, cb) + + offsetof(struct bpf_skb_data_end, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; + default: + return false; + } +} + /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) @@ -821,12 +851,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, .reg_type = *reg_type, }; - /* for analyzer ctx accesses are already validated and converted */ - if (env->analyzer_ops) - return 0; - - if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + if (env->analyzer_ops) { + if (analyzer_is_valid_access(env, off, &info)) { + *reg_type = info.reg_type; + return 0; + } + } else if (env->prog->aux->ops->is_valid_access && + env->prog->aux->ops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower From bc8c80a8c978d24b2746dc7d9a8cef65ae82be3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:08 -0700 Subject: [PATCH 0957/2177] nfp: bpf: reorder arguments to emit_ld_field_any() ld_field instruction has the following format in NFP assembler: ld_field[dst, 1000, src, <<24] reoder parameters to emit_ld_field_any() to make it closer to the familiar assembler order. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 13148f30fc4c..cf8a6eb3ec99 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -402,8 +402,8 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, } static void -emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, - swreg dst, u8 bmask, swreg src, bool zero) +emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift, bool zero) { struct nfp_insn_re_regs reg; int err; @@ -424,7 +424,7 @@ static void emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, enum shf_sc sc, u8 shift) { - emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); + emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); } static void emit_nop(struct nfp_prog *nfp_prog) From 8283737065b2dab480cd10e00e6f8abbcb62b5b0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:09 -0700 Subject: [PATCH 0958/2177] nfp: bpf: add missing return in jne_imm optimization We optimize comparisons to immediate 0 as if (reg.lo | reg.hi). The early return statement was missing, however, which means we would generate two comparisons - optimized one followed by a normal 2x 32 bit compare. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index cf8a6eb3ec99..5ac834e91aed 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1191,6 +1191,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); emit_br(nfp_prog, BR_BNE, insn->off, 0); + return 0; } tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); From 26fa818dc07c649fcb37674580ebd5a3c7cae66c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:10 -0700 Subject: [PATCH 0959/2177] nfp: bpf: fix compare instructions Now that we have BPF assemebler support in LLVM 6 we can easily test all compare instructions (LLVM 4 didn't generate most of them from C). Fix the compare to immediates and refactor the order of compare to regs to make sure they both follow the same pattern. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5ac834e91aed..e970f284c8a4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -720,7 +720,10 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, enum br_mask br_mask, bool swap) { const struct bpf_insn *insn = &meta->insn; - u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + u8 areg, breg; + + areg = insn->dst_reg * 2; + breg = insn->src_reg * 2; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1129,22 +1132,22 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); } static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); } static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); } static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); } static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1227,22 +1230,22 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); } static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); } static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); } static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); } static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) From c000dfb5e29a2abaf303cf90502cb68227f29fae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:11 -0700 Subject: [PATCH 0960/2177] nfp: bpf: add mov helper Register move operation is encoded as alu no op. This means that one has to specify number of unused/none parameters to the emit_alu(). Add a helper. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 31 ++++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e970f284c8a4..4f7cfa6adfc1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -504,9 +504,14 @@ wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, FIELD_PREP(OP_BR_SPECIAL, special); } +static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) +{ + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); +} + static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) { - emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); + wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); } static int @@ -556,8 +561,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, reg_xfer(0), SHF_SC_R_SHF, shift * 8); else for (; i * 4 < size; i++) - emit_alu(nfp_prog, reg_both(i), - reg_none(), ALU_OP_NONE, reg_xfer(i)); + wrp_mov(nfp_prog, reg_both(i), reg_xfer(i)); if (i < 2) wrp_immed(nfp_prog, reg_both(1), 0); @@ -1032,8 +1036,8 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, len)) - emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), - reg_none(), ALU_OP_NONE, plen_reg(nfp_prog)); + wrp_mov(nfp_prog, + reg_both(meta->insn.dst_reg * 2), plen_reg(nfp_prog)); else return -EOPNOTSUPP; @@ -1048,7 +1052,7 @@ static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.off != offsetof(struct xdp_md, data_end)) return -EOPNOTSUPP; - emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, pptr_reg(nfp_prog)); + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); if (meta->insn.off == offsetof(struct xdp_md, data)) return 0; @@ -1438,8 +1442,7 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) * ife + tx 0x24 -> redir, count as stat1 */ emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); @@ -1466,8 +1469,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); /* Target for normal exits */ @@ -1476,8 +1478,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* if R0 > 7 jump to abort */ emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); wrp_immed(nfp_prog, reg_b(2), 0x41221211); wrp_immed(nfp_prog, reg_b(3), 0x41001211); @@ -1514,8 +1515,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); /* Target for normal exits */ @@ -1536,8 +1536,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } From 3119d1fd46464c61c80731c3a9f40eee4434fc1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:12 -0700 Subject: [PATCH 0961/2177] nfp: bpf: implement byte swap instruction Implement byte swaps with rotations, shifts and byte loads. Remember to clear upper parts of the 64 bit registers. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 38 ++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 4f7cfa6adfc1..5e8a6b766790 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -746,6 +746,14 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return 0; } +static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) +{ + emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), + SHF_SC_R_ROT, 16); +} + /* --- Callbacks --- */ static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -982,6 +990,35 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 gpr = insn->dst_reg * 2; + + switch (insn->imm) { + case 16: + emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), + SHF_SC_R_SHF, 16); + + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 32: + wrp_end32(nfp_prog, reg_a(gpr), gpr); + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 64: + wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); + + wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); + wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); + break; + } + + return 0; +} + static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), @@ -1297,6 +1334,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_END | BPF_X] = end_reg32, [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, [BPF_LD | BPF_ABS | BPF_B] = data_ld1, [BPF_LD | BPF_ABS | BPF_H] = data_ld2, From 0f6cf4ddf63fa4d645c36d96ed1092fe7a0a8d0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:13 -0700 Subject: [PATCH 0962/2177] nfp: bpf: support BPF offload only on little endian eBPF is host-endian specific. Translating both BE and LE eBPF to the NFP is feasible, but would require quite a bit of indirection. The fact that I don't have access to any BE hosts that would fit a 25G/40G/100G NIC is also limiting my ability to test big endian. For now restrict the offload to little endian hosts only. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 074726980994..6e74f8db1cc1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -42,9 +42,11 @@ static bool nfp_net_ebpf_capable(struct nfp_net *nn) { +#ifdef __LITTLE_ENDIAN if (nn->cap & NFP_NET_CFG_CTRL_BPF && nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) return true; +#endif return false; } From 943c57b97cde2ce0806e59b553c650c9889d8b69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:14 -0700 Subject: [PATCH 0963/2177] nfp: bpf: fix context accesses Sizes of fields in struct xdp_md/xdp_buff and some in sk_buff depend on target architecture. Take that into account and use struct xdp_buff, not struct xdp_md. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 49 ++++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5e8a6b766790..4b62f5497728 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1070,47 +1070,56 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.src_reg * 2, true, 4); } -static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) { - if (meta->insn.off == offsetof(struct sk_buff, len)) + switch (meta->insn.off) { + case offsetof(struct sk_buff, len): + if (size != FIELD_SIZEOF(struct sk_buff, len)) + return -EOPNOTSUPP; wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), plen_reg(nfp_prog)); - else + break; + default: return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); return 0; } -static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) { swreg dst = reg_both(meta->insn.dst_reg * 2); - if (meta->insn.off != offsetof(struct xdp_md, data) && - meta->insn.off != offsetof(struct xdp_md, data_end)) + if (size != sizeof(void *)) + return -EINVAL; + + switch (meta->insn.off) { + case offsetof(struct xdp_buff, data): + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct xdp_buff, data_end): + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: return -EOPNOTSUPP; + } - wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); - - if (meta->insn.off == offsetof(struct xdp_md, data)) - return 0; - - emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, plen_reg(nfp_prog)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); return 0; } static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - int ret; - if (nfp_prog->act == NN_ACT_XDP) - ret = mem_ldx4_xdp(nfp_prog, meta); + return mem_ldx_xdp(nfp_prog, meta, 4); else - ret = mem_ldx4_skb(nfp_prog, meta); - - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); - - return ret; + return mem_ldx_skb(nfp_prog, meta, 4); } static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) From 0a7939775f8546268206c1e8efe78218f3c18aae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:15 -0700 Subject: [PATCH 0964/2177] nfp: bpf: separate I/O from checks for legacy data load Move data load into a separate function and separate it from packet length checks of legacy I/O. This makes the code more readable and easier to reuse. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 77 ++++++++++---------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 4b62f5497728..3e173da16428 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -515,63 +515,66 @@ static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) } static int -construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, - u16 src, bool src_valid, u8 size) +data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) { unsigned int i; u16 shift, sz; - swreg tmp_reg; /* We load the value from the address indicated in @offset and then * shift out the data we don't need. Note: this is big endian! */ - sz = size < 4 ? 4 : size; + sz = max(size, 4); shift = size < 4 ? 4 - size : 0; - if (src_valid) { - /* Calculate the true offset (src_reg + imm) */ - tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - emit_alu(nfp_prog, imm_both(nfp_prog), - reg_a(src), ALU_OP_ADD, tmp_reg); - /* Check packet length (size guaranteed to fit b/c it's u8) */ - emit_alu(nfp_prog, imm_a(nfp_prog), - imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); - emit_alu(nfp_prog, reg_none(), - plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); - /* Load data */ - emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pptr_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); - } else { - /* Check packet length */ - tmp_reg = ur_load_imm_any(nfp_prog, offset + size, - imm_a(nfp_prog)); - emit_alu(nfp_prog, reg_none(), - plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); - /* Load data */ - tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pptr_reg(nfp_prog), tmp_reg, sz - 1, true); - } + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pptr_reg(nfp_prog), offset, sz - 1, true); i = 0; if (shift) - emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, reg_xfer(0), SHF_SC_R_SHF, shift * 8); else for (; i * 4 < size; i++) - wrp_mov(nfp_prog, reg_both(i), reg_xfer(i)); + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(1), 0); + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); return 0; } +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) +{ + swreg tmp_reg; + + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); + + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + + /* Load data */ + return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); +} + static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) { - return construct_data_ind_ld(nfp_prog, offset, 0, false, size); + swreg tmp_reg; + + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + return data_ld(nfp_prog, tmp_reg, 0, size); } static void @@ -1055,19 +1058,19 @@ static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 1); + meta->insn.src_reg * 2, 1); } static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 2); + meta->insn.src_reg * 2, 2); } static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 4); + meta->insn.src_reg * 2, 4); } static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, From 2ca71441f524b0a0cc01d8e51c875b00fbe31275 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:16 -0700 Subject: [PATCH 0965/2177] nfp: bpf: add support for direct packet access - read In direct packet access bound checks are already done, we can simply dereference the packet pointer. Verifier/parser logic needs to record pointer type. Note that although verifier does protect us from CTX vs other pointer changes we will also want to differentiate between PACKET vs MAP_VALUE or STACK, so we can add the check already. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 85 ++++++++++++++++++- drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 + .../net/ethernet/netronome/nfp/bpf/verifier.c | 21 +++-- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 3 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 4 + 5 files changed, 105 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 3e173da16428..975d63fbc1d5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -543,6 +543,36 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) return 0; } +static int +data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + u8 dst_gpr, int size) +{ + unsigned int i; + u8 mask, sz; + + /* We load the value from the address indicated in @offset and then + * mask out the data we don't need. Note: this is little endian! + */ + sz = max(size, 4); + mask = size < 4 ? GENMASK(size - 1, 0) : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, + reg_a(src_gpr), offset, sz / 4 - 1, true); + + i = 0; + if (mask) + emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, + reg_xfer(0), SHF_SC_NONE, 0, true); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + + return 0; +} + static int construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) { @@ -1117,12 +1147,53 @@ static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return 0; } +static int +mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; + + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg, + meta->insn.dst_reg * 2, size); +} + +static int +mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_CTX) { + if (nfp_prog->act == NN_ACT_XDP) + return mem_ldx_xdp(nfp_prog, meta, size); + else + return mem_ldx_skb(nfp_prog, meta, size); + } + + if (meta->ptr.type == PTR_TO_PACKET) + return mem_ldx_data(nfp_prog, meta, size); + + return -EOPNOTSUPP; +} + +static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 1); +} + +static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 2); +} + static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - if (nfp_prog->act == NN_ACT_XDP) - return mem_ldx_xdp(nfp_prog, meta, 4); - else - return mem_ldx_skb(nfp_prog, meta, 4); + return mem_ldx(nfp_prog, meta, 4); +} + +static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 8); } static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1137,6 +1208,9 @@ static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { + if (meta->ptr.type == PTR_TO_PACKET) + return -EOPNOTSUPP; + if (nfp_prog->act == NN_ACT_XDP) return mem_stx4_xdp(nfp_prog, meta); return mem_stx4_skb(nfp_prog, meta); @@ -1354,7 +1428,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, + [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index b7a112acbdb7..d77e88a45409 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -36,6 +36,7 @@ #include #include +#include #include #include @@ -96,6 +97,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); /** * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction + * @ptr: pointer type for memory operations * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @skip: skip this instruction (optimized out) @@ -104,6 +106,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); */ struct nfp_insn_meta { struct bpf_insn insn; + struct bpf_reg_state ptr; unsigned int off; unsigned short n; bool skip; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 5b783a91b115..e361c0e3b788 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -112,12 +112,19 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } static int -nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env, u8 reg) +nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + const struct bpf_verifier_env *env, u8 reg) { - if (env->cur_state.regs[reg].type != PTR_TO_CTX) + if (env->cur_state.regs[reg].type != PTR_TO_CTX && + env->cur_state.regs[reg].type != PTR_TO_PACKET) return -EINVAL; + if (meta->ptr.type != NOT_INIT && + meta->ptr.type != env->cur_state.regs[reg].type) + return -EINVAL; + + meta->ptr = env->cur_state.regs[reg]; + return 0; } @@ -145,11 +152,11 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) return nfp_bpf_check_exit(priv->prog, env); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) - return nfp_bpf_check_ctx_ptr(priv->prog, env, - meta->insn.src_reg); + return nfp_bpf_check_ptr(priv->prog, meta, env, + meta->insn.src_reg); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) - return nfp_bpf_check_ctx_ptr(priv->prog, env, - meta->insn.dst_reg); + return nfp_bpf_check_ptr(priv->prog, meta, env, + meta->insn.dst_reg); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index de76e7444fc2..7cae99b3e00a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -42,6 +42,9 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_WRITE8] = { 0x00, 0x42 }, [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ32] = { 0x00, 0x5c }, + [CMD_TGT_READ32_LE] = { 0x01, 0x5c }, + [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, [CMD_TGT_READ_LE] = { 0x01, 0x40 }, [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index c4c18dd5630a..e3df7a26724f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -153,6 +153,7 @@ enum shf_op { enum shf_sc { SHF_SC_R_ROT = 0, + SHF_SC_NONE = SHF_SC_R_ROT, SHF_SC_R_SHF = 1, SHF_SC_L_SHF = 2, SHF_SC_R_DSHF = 3, @@ -217,6 +218,9 @@ struct cmd_tgt_act { enum cmd_tgt_map { CMD_TGT_READ8, CMD_TGT_WRITE8, + CMD_TGT_READ32, + CMD_TGT_READ32_LE, + CMD_TGT_READ32_SWAP, CMD_TGT_READ_LE, CMD_TGT_READ_SWAP_LE, __CMD_TGT_MAP_SIZE, From e663fe3863ad20c5e6a84a1a1d47aff8e71f583f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:17 -0700 Subject: [PATCH 0966/2177] nfp: bpf: direct packet access - write This patch adds ability to write packet contents using pre-validated packet pointers (direct packet access). Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 114 +++++++++++++++++-- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 2 +- 3 files changed, 109 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 975d63fbc1d5..139a4ebdc774 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -607,6 +607,35 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) return data_ld(nfp_prog, tmp_reg, 0, size); } +static int +data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u8 src_gpr, u8 size) +{ + unsigned int i; + + for (i = 0; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, true); + + return 0; +} + +static int +data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u64 imm, u8 size) +{ + wrp_immed(nfp_prog, reg_xfer(0), imm); + if (size == 8) + wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, true); + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1196,24 +1225,88 @@ static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return mem_ldx(nfp_prog, meta, 8); } -static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int +mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) { + u64 imm = meta->insn.imm; /* sign extend */ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + imm, size); +} + +static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_st_data(nfp_prog, meta, size); + return -EOPNOTSUPP; } -static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { + return mem_st(nfp_prog, meta, 1); +} + +static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 2); +} + +static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 4); +} + +static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 8); +} + +static int +mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + meta->insn.src_reg * 2, size); +} + +static int +mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_stx_data(nfp_prog, meta, size); + return -EOPNOTSUPP; } +static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 1); +} + +static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 2); +} + static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - if (meta->ptr.type == PTR_TO_PACKET) - return -EOPNOTSUPP; + return mem_stx(nfp_prog, meta, 4); +} - if (nfp_prog->act == NN_ACT_XDP) - return mem_stx4_xdp(nfp_prog, meta); - return mem_stx4_skb(nfp_prog, meta); +static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 8); } static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1432,7 +1525,14 @@ static const instr_cb_t instr_cb[256] = { [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, + [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, + [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, + [BPF_ST | BPF_MEM | BPF_B] = mem_st1, + [BPF_ST | BPF_MEM | BPF_H] = mem_st2, + [BPF_ST | BPF_MEM | BPF_W] = mem_st4, + [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 7cae99b3e00a..830f6de25f47 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -40,7 +40,7 @@ #include "nfp_asm.h" const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { - [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 }, [CMD_TGT_READ8] = { 0x01, 0x43 }, [CMD_TGT_READ32] = { 0x00, 0x5c }, [CMD_TGT_READ32_LE] = { 0x01, 0x5c }, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index e3df7a26724f..c26aa7e4a839 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -217,7 +217,7 @@ struct cmd_tgt_act { enum cmd_tgt_map { CMD_TGT_READ8, - CMD_TGT_WRITE8, + CMD_TGT_WRITE8_SWAP, CMD_TGT_READ32, CMD_TGT_READ32_LE, CMD_TGT_READ32_SWAP, From bfddbc8adcd471806f2369d347a958d11e80f53b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Oct 2017 10:34:18 -0700 Subject: [PATCH 0967/2177] nfp: bpf: support direct packet access in TC Add support for direct packet access in TC, note that because writing the packet will cause the verifier to generate a csum fixup prologue we won't be able to offload packet writes from TC, just yet, only the reads will work. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 139a4ebdc774..23fb11a41cc4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1135,12 +1135,25 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 size) { + swreg dst = reg_both(meta->insn.dst_reg * 2); + switch (meta->insn.off) { case offsetof(struct sk_buff, len): if (size != FIELD_SIZEOF(struct sk_buff, len)) return -EOPNOTSUPP; - wrp_mov(nfp_prog, - reg_both(meta->insn.dst_reg * 2), plen_reg(nfp_prog)); + wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); + break; + case offsetof(struct sk_buff, data): + if (size != sizeof(void *)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct sk_buff, cb) + + offsetof(struct bpf_skb_data_end, data_end): + if (size != sizeof(void *)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); break; default: return -EOPNOTSUPP; From ae904beaea48d369205c81dbffecc23afcec46de Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 14 Aug 2017 11:23:27 +0300 Subject: [PATCH 0968/2177] net/mlx5: File renaming towards ptp core implementation en_clock.c renamed clock.c and moved to lib/ as first step towards relocating code to core part of the driver to allow sharing between Ethernet and Infiniband. Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 ++-- .../ethernet/mellanox/mlx5/core/{en_clock.c => lib/clock.c} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename drivers/net/ethernet/mellanox/mlx5/core/{en_clock.c => lib/clock.c} (100%) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index fdaef00465d7..25deaa5a534c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -6,6 +6,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on MAY_USE_DEVLINK depends on PCI + imply PTP_1588_CLOCK default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -29,7 +30,6 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m - imply PTP_1588_CLOCK default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 87a3099808f3..d9621b2152d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,7 +4,7 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \ diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o @@ -13,7 +13,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ - en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o vxlan.o \ en_arfs.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c similarity index 100% rename from drivers/net/ethernet/mellanox/mlx5/core/en_clock.c rename to drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c From 7c39afb394c79e72c3795b4a42d55155b34ee073 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Tue, 15 Aug 2017 13:46:04 +0300 Subject: [PATCH 0969/2177] net/mlx5: PTP code migration to driver core section PTP code is moved to core section of mlx5 driver in order to share it between ethernet and infiniband. This movement involves the following changes: - Change mlx5e_ prefix to be mlx5_ - Add clock structs to Core - Add clock object to mlx5_core_dev - Call Init/Uninit clock from core init/cleanup - Rename mlx5e_tstamp to be mlx5_clock Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 39 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 7 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 95 ++- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 17 +- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 +- .../ethernet/mellanox/mlx5/core/lib/clock.c | 540 ++++++++---------- .../ethernet/mellanox/mlx5/core/lib/clock.h | 51 ++ .../net/ethernet/mellanox/mlx5/core/main.c | 4 + .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + include/linux/mlx5/driver.h | 24 + 12 files changed, 412 insertions(+), 378 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc13d3dbd366..2059122eb089 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -267,28 +267,6 @@ struct mlx5e_dcbx { }; #endif -#define MAX_PIN_NUM 8 -struct mlx5e_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; -}; - -struct mlx5e_tstamp { - rwlock_t lock; - struct cyclecounter cycles; - struct timecounter clock; - struct hwtstamp_config hwtstamp_config; - u32 nominal_c_mult; - unsigned long overflow_period; - struct delayed_work overflow_work; - struct mlx5_core_dev *mdev; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5e_pps pps_info; -}; - enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_AM, @@ -375,9 +353,10 @@ struct mlx5e_txqsq { u8 min_inline_mode; u16 edge; struct device *pdev; - struct mlx5e_tstamp *tstamp; __be32 mkey_be; unsigned long state; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; /* control path */ struct mlx5_wq_ctrl wq_ctrl; @@ -543,10 +522,11 @@ struct mlx5e_rq { struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; - struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; struct mlx5e_page_cache page_cache; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_post_rx_wqes post_wqes; @@ -588,7 +568,7 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct mlx5e_tstamp *tstamp; + struct hwtstamp_config *tstamp; int ix; }; @@ -789,7 +769,7 @@ struct mlx5e_priv { struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; - struct mlx5e_tstamp tstamp; + struct hwtstamp_config tstamp; u16 q_counter; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; @@ -873,12 +853,6 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, - struct skb_shared_hwtstamps *hwts); -void mlx5e_timestamp_init(struct mlx5e_priv *priv); -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); @@ -889,6 +863,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_timestamp_set(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { bool is_rss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d12e9fc0d76b..81a112e40fe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1417,14 +1417,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { + struct mlx5_core_dev *mdev = priv->mdev; int ret; ret = ethtool_op_get_ts_info(priv->netdev, info); if (ret) return ret; - info->phc_index = priv->tstamp.ptp ? - ptp_clock_index(priv->tstamp.ptp) : -1; + info->phc_index = mdev->clock.ptp ? + ptp_clock_index(mdev->clock.ptp) : -1; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return 0; @@ -1754,7 +1755,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cc11bbbd0309..6df00dd9745a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -373,8 +373,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; - struct ptp_clock_event ptp_event; - struct mlx5_eqe *eqe = NULL; if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; @@ -384,14 +382,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; - case MLX5_DEV_EVENT_PPS: - eqe = (struct mlx5_eqe *)param; - ptp_event.index = eqe->data.pps.pin; - ptp_event.timestamp = - timecounter_cyc2time(&priv->tstamp.clock, - be64_to_cpu(eqe->data.pps.time_stamp)); - mlx5e_pps_event_handler(vpriv, &ptp_event); - break; default: break; } @@ -585,6 +575,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; rq->channel = c; rq->ix = c->ix; rq->mdev = mdev; @@ -1123,6 +1114,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->tstamp = c->tstamp; + sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->channel = c; sq->txq_ix = txq_ix; @@ -2678,6 +2670,12 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +void mlx5e_timestamp_set(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -2693,7 +2691,7 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); - mlx5e_timestamp_init(priv); + mlx5e_timestamp_set(priv); if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); @@ -2731,7 +2729,6 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); @@ -3403,6 +3400,80 @@ out: return err; } +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling cqe compression"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + mutex_unlock(&priv->state_lock); + return -ERANGE; + } + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 15a1687483cc..7e3bfe62ef6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,10 +42,11 @@ #include "en_rep.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" -static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { - return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; + return config->rx_filter == HWTSTAMP_FILTER_ALL; } static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, @@ -661,7 +662,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; @@ -676,8 +676,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, rq->stats.lro_bytes += cqe_bcnt; } - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); @@ -1163,7 +1164,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; char *pseudo_header; u8 *dgid; u8 g; @@ -1188,8 +1188,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1d6925d4369a..a7c208a1ad83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,6 +35,7 @@ #include "en.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -452,8 +453,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) SKBTX_HW_TSTAMP)) { struct skb_shared_hwtstamps hwts = {}; - mlx5e_fill_hwstamp(sq->tstamp, - get_cqe_ts(cqe), &hwts); + hwts.hwtstamp = + mlx5_timecounter_cyc2time(sq->clock, + get_cqe_ts(cqe)); skb_tstamp_tx(skb, &hwts); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fc606bfd1d6e..60771865c99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -491,8 +491,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_PPS_EVENT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); + mlx5_pps_event(dev, eqe); break; case MLX5_EVENT_TYPE_FPGA_ERROR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 145e392ab849..14dfb577691b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -404,7 +404,7 @@ static int mlx5i_open(struct net_device *netdev) mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); - mlx5e_timestamp_init(priv); + mlx5e_timestamp_set(priv); mutex_unlock(&priv->state_lock); return 0; @@ -429,7 +429,6 @@ static int mlx5i_close(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 84dd63e74041..fa8aed62b231 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -34,250 +34,164 @@ #include "en.h" enum { - MLX5E_CYCLES_SHIFT = 23 + MLX5_CYCLES_SHIFT = 23 }; enum { - MLX5E_PIN_MODE_IN = 0x0, - MLX5E_PIN_MODE_OUT = 0x1, + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, }; enum { - MLX5E_OUT_PATTERN_PULSE = 0x0, - MLX5E_OUT_PATTERN_PERIODIC = 0x1, + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, }; enum { - MLX5E_EVENT_MODE_DISABLE = 0x0, - MLX5E_EVENT_MODE_REPETETIVE = 0x1, - MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, }; enum { - MLX5E_MTPPS_FS_ENABLE = BIT(0x0), - MLX5E_MTPPS_FS_PATTERN = BIT(0x2), - MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), - MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), - MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), - MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), }; -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, - struct skb_shared_hwtstamps *hwts) +static u64 read_internal_timer(const struct cyclecounter *cc) { - u64 nsec; + struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); - read_lock(&tstamp->lock); - nsec = timecounter_cyc2time(&tstamp->clock, timestamp); - read_unlock(&tstamp->lock); - - hwts->hwtstamp = ns_to_ktime(nsec); + return mlx5_read_internal_timer(mdev) & cc->mask; } -static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc) +static void mlx5_pps_out(struct work_struct *work) { - struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp, - cycles); - - return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; -} - -static void mlx5e_pps_out(struct work_struct *work) -{ - struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps, - out_work); - struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp, - pps_info); + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; unsigned long flags; int i; - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { + for (i = 0; i < clock->ptp_info.n_pins; i++) { u64 tstart; - write_lock_irqsave(&tstamp->lock, flags); - tstart = tstamp->pps_info.start[i]; - tstamp->pps_info.start[i] = 0; - write_unlock_irqrestore(&tstamp->lock, flags); + write_lock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_unlock_irqrestore(&clock->lock, flags); if (!tstart) continue; MLX5_SET(mtpps_reg, in, pin, i); MLX5_SET64(mtpps_reg, in, time_stamp, tstart); - MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP); - mlx5_set_mtpps(tstamp->mdev, in, sizeof(in)); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); } } -static void mlx5e_timestamp_overflow(struct work_struct *work) +static void mlx5_timestamp_overflow(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, - overflow_work); - struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); + struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, + overflow_work); unsigned long flags; - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); - queue_delayed_work(priv->wq, &tstamp->overflow_work, - msecs_to_jiffies(tstamp->overflow_period * 1000)); + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) { - struct hwtstamp_config config; - int err; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* TX HW timestamp */ - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - mutex_lock(&priv->state_lock); - /* RX HW timestamp */ - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - mutex_unlock(&priv->state_lock); - return -ERANGE; - } - - memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); - mutex_unlock(&priv->state_lock); - - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; -} - -int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) -{ - struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; -} - -static int mlx5e_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); u64 ns = timespec64_to_ns(ts); unsigned long flags; - write_lock_irqsave(&tstamp->lock, flags); - timecounter_init(&tstamp->clock, &tstamp->cycles, ns); - write_unlock_irqrestore(&tstamp->lock, flags); + write_lock_irqsave(&clock->lock, flags); + timecounter_init(&clock->tc, &clock->cycles, ns); + write_unlock_irqrestore(&clock->lock, flags); return 0; } -static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) +static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); u64 ns; unsigned long flags; - write_lock_irqsave(&tstamp->lock, flags); - ns = timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); + write_lock_irqsave(&clock->lock, flags); + ns = timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); return 0; } -static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); unsigned long flags; - write_lock_irqsave(&tstamp->lock, flags); - timecounter_adjtime(&tstamp->clock, delta); - write_unlock_irqrestore(&tstamp->lock, flags); + write_lock_irqsave(&clock->lock, flags); + timecounter_adjtime(&clock->tc, delta); + write_unlock_irqrestore(&clock->lock, flags); return 0; } -static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) { u64 adj; u32 diff; unsigned long flags; int neg_adj = 0; - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); if (delta < 0) { neg_adj = 1; delta = -delta; } - adj = tstamp->nominal_c_mult; + adj = clock->nominal_c_mult; adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff : - tstamp->nominal_c_mult + diff; - write_unlock_irqrestore(&tstamp->lock, flags); + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + write_unlock_irqrestore(&clock->lock, flags); return 0; } -static int mlx5e_extts_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) { - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u32 field_select = 0; u8 pin_mode = 0; @@ -285,24 +199,24 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(priv->mdev)) + if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; - if (rq->extts.index >= tstamp->ptp_info.n_pins) + if (rq->extts.index >= clock->ptp_info.n_pins) return -EINVAL; if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); if (pin < 0) return -EBUSY; - pin_mode = MLX5E_PIN_MODE_IN; + pin_mode = MLX5_PIN_MODE_IN; pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE; + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; } else { pin = rq->extts.index; - field_select = MLX5E_MTPPS_FS_ENABLE; + field_select = MLX5_MTPPS_FS_ENABLE; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -311,22 +225,22 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, enable, on); MLX5_SET(mtpps_reg, in, field_select, field_select); - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) return err; - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); } -static int mlx5e_perout_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) { - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u64 nsec_now, nsec_delta, time_stamp = 0; u64 cycles_now, cycles_delta; @@ -339,20 +253,20 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, int err = 0; s64 ns; - if (!MLX5_PPS_CAP(priv->mdev)) + if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; - if (rq->perout.index >= tstamp->ptp_info.n_pins) + if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT, + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index); if (pin < 0) return -EBUSY; - pin_mode = MLX5E_PIN_MODE_OUT; - pattern = MLX5E_OUT_PATTERN_PERIODIC; + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; ts.tv_sec = rq->perout.period.sec; ts.tv_nsec = rq->perout.period.nsec; ns = timespec64_to_ns(&ts); @@ -363,21 +277,21 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, ts.tv_sec = rq->perout.start.sec; ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + cycles_now = mlx5_read_internal_timer(mdev); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - write_unlock_irqrestore(&tstamp->lock, flags); + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + write_unlock_irqrestore(&clock->lock, flags); time_stamp = cycles_now + cycles_delta; - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE | - MLX5E_MTPPS_FS_TIME_STAMP; + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE | + MLX5_MTPPS_FS_TIME_STAMP; } else { pin = rq->perout.index; - field_select = MLX5E_MTPPS_FS_ENABLE; + field_select = MLX5_MTPPS_FS_ENABLE; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -387,233 +301,225 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); MLX5_SET(mtpps_reg, in, field_select, field_select); - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) return err; - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); } -static int mlx5e_pps_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) { - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); - tstamp->pps_info.enabled = !!on; + clock->pps_info.enabled = !!on; return 0; } -static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) { switch (rq->type) { case PTP_CLK_REQ_EXTTS: - return mlx5e_extts_configure(ptp, rq, on); + return mlx5_extts_configure(ptp, rq, on); case PTP_CLK_REQ_PEROUT: - return mlx5e_perout_configure(ptp, rq, on); + return mlx5_perout_configure(ptp, rq, on); case PTP_CLK_REQ_PPS: - return mlx5e_pps_configure(ptp, rq, on); + return mlx5_pps_configure(ptp, rq, on); default: return -EOPNOTSUPP; } return 0; } -static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, - enum ptp_pin_function func, unsigned int chan) +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) { return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; } -static const struct ptp_clock_info mlx5e_ptp_clock_info = { +static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, + .name = "mlx5_p2p", .max_adj = 100000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, .n_pins = 0, .pps = 0, - .adjfreq = mlx5e_ptp_adjfreq, - .adjtime = mlx5e_ptp_adjtime, - .gettime64 = mlx5e_ptp_gettime, - .settime64 = mlx5e_ptp_settime, + .adjfreq = mlx5_ptp_adjfreq, + .adjtime = mlx5_ptp_adjtime, + .gettime64 = mlx5_ptp_gettime, + .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, }; -static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) -{ - tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; - tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; -} - -static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) +static int mlx5_init_pin_config(struct mlx5_clock *clock) { int i; - tstamp->ptp_info.pin_config = - kzalloc(sizeof(*tstamp->ptp_info.pin_config) * - tstamp->ptp_info.n_pins, GFP_KERNEL); - if (!tstamp->ptp_info.pin_config) + clock->ptp_info.pin_config = + kzalloc(sizeof(*clock->ptp_info.pin_config) * + clock->ptp_info.n_pins, GFP_KERNEL); + if (!clock->ptp_info.pin_config) return -ENOMEM; - tstamp->ptp_info.enable = mlx5e_ptp_enable; - tstamp->ptp_info.verify = mlx5e_ptp_verify; - tstamp->ptp_info.pps = 1; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { - snprintf(tstamp->ptp_info.pin_config[i].name, - sizeof(tstamp->ptp_info.pin_config[i].name), + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); - tstamp->ptp_info.pin_config[i].index = i; - tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE; - tstamp->ptp_info.pin_config[i].chan = i; + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = PTP_PF_NONE; + clock->ptp_info.pin_config[i].chan = i; } return 0; } -static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, - struct mlx5e_tstamp *tstamp) +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) { + struct mlx5_clock *clock = &mdev->clock; u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - mlx5_query_mtpps(priv->mdev, out, sizeof(out)); + mlx5_query_mtpps(mdev, out, sizeof(out)); - tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, - cap_number_of_pps_pins); - tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_in_pins); - tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_out_pins); + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); - tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); - tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); - tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); - tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); - tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); - tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); - tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); - tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event) +void mlx5_pps_event(struct mlx5_core_dev *mdev, + struct mlx5_eqe *eqe) { - struct net_device *netdev = priv->netdev; - struct mlx5e_tstamp *tstamp = &priv->tstamp; + struct mlx5_clock *clock = &mdev->clock; + struct ptp_clock_event ptp_event; struct timespec64 ts; u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; - int pin = event->index; + int pin = eqe->data.pps.pin; s64 ns; unsigned long flags; - switch (tstamp->ptp_info.pin_config[pin].func) { + switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: - if (tstamp->pps_info.enabled) { - event->type = PTP_CLOCK_PPSUSR; - event->pps_times.ts_real = ns_to_timespec64(event->timestamp); + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp); } else { - event->type = PTP_CLOCK_EXTTS; + ptp_event.type = PTP_CLOCK_EXTTS; } - ptp_clock_event(tstamp->ptp, event); + ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: - mlx5e_ptp_gettime(&tstamp->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); + mlx5_ptp_gettime(&clock->ptp_info, &ts); + cycles_now = mlx5_read_internal_timer(mdev); ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - tstamp->pps_info.start[pin] = cycles_now + cycles_delta; - queue_work(priv->wq, &tstamp->pps_info.out_work); - write_unlock_irqrestore(&tstamp->lock, flags); + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + clock->pps_info.start[pin] = cycles_now + cycles_delta; + schedule_work(&clock->pps_info.out_work); + write_unlock_irqrestore(&clock->lock, flags); break; default: - netdev_err(netdev, "%s: Unhandled event\n", __func__); + mlx5_core_err(mdev, " Unhandled event\n"); } } -void mlx5e_timestamp_init(struct mlx5e_priv *priv) +void mlx5_init_clock(struct mlx5_core_dev *mdev) { - struct mlx5e_tstamp *tstamp = &priv->tstamp; + struct mlx5_clock *clock = &mdev->clock; u64 ns; u64 frac = 0; u32 dev_freq; - mlx5e_timestamp_init_config(tstamp); - dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz); + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); if (!dev_freq) { - mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); return; } - rwlock_init(&tstamp->lock); - tstamp->cycles.read = mlx5e_read_internal_timer; - tstamp->cycles.shift = MLX5E_CYCLES_SHIFT; - tstamp->cycles.mult = clocksource_khz2mult(dev_freq, - tstamp->cycles.shift); - tstamp->nominal_c_mult = tstamp->cycles.mult; - tstamp->cycles.mask = CLOCKSOURCE_MASK(41); - tstamp->mdev = priv->mdev; + rwlock_init(&clock->lock); + clock->cycles.read = read_internal_timer; + clock->cycles.shift = MLX5_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(dev_freq, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(41); - timecounter_init(&tstamp->clock, &tstamp->cycles, + timecounter_init(&clock->tc, &clock->cycles, ktime_to_ns(ktime_get_real())); /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. */ - ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, + ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask, frac, &frac); do_div(ns, NSEC_PER_SEC / 2 / HZ); - tstamp->overflow_period = ns; + clock->overflow_period = ns; - INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); - INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); - if (tstamp->overflow_period) - queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); + if (clock->overflow_period) + schedule_delayed_work(&clock->overflow_work, 0); else - mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); + mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n"); /* Configure the PHC */ - tstamp->ptp_info = mlx5e_ptp_clock_info; - snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); + clock->ptp_info = mlx5_ptp_clock_info; /* Initialize 1PPS data structures */ - if (MLX5_PPS_CAP(priv->mdev)) - mlx5e_get_pps_caps(priv, tstamp); - if (tstamp->ptp_info.n_pins) - mlx5e_init_pin_config(tstamp); + if (MLX5_PPS_CAP(mdev)) + mlx5_get_pps_caps(mdev); + if (clock->ptp_info.n_pins) + mlx5_init_pin_config(clock); - tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, - &priv->mdev->pdev->dev); - if (IS_ERR(tstamp->ptp)) { - mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", - PTR_ERR(tstamp->ptp)); - tstamp->ptp = NULL; + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; } } -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) { - struct mlx5e_tstamp *tstamp = &priv->tstamp; + struct mlx5_clock *clock = &mdev->clock; - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; - if (priv->tstamp.ptp) { - ptp_clock_unregister(priv->tstamp.ptp); - priv->tstamp.ptp = NULL; + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; } - cancel_work_sync(&tstamp->pps_info.out_work); - cancel_delayed_work_sync(&tstamp->overflow_work); - kfree(tstamp->ptp_info.pin_config); + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock->ptp_info.pin_config); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 000000000000..a8eecedd46c2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 nsec; + + read_lock(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + read_unlock(&clock->lock); + + return ns_to_ktime(nsec); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0d2c8dcd6eae..ecbe9fad22d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -59,6 +59,7 @@ #include "lib/mlx5.h" #include "fpga/core.h" #include "accel/ipsec.h" +#include "lib/clock.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -889,6 +890,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_reserved_gids(dev); + mlx5_init_clock(dev); + err = mlx5_init_rl_table(dev); if (err) { dev_err(&pdev->dev, "Failed to init rate limiting\n"); @@ -949,6 +952,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b7c2900b75f9..8f00de2fe283 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -93,6 +93,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, struct mlx5_pagefault *pfault); +void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 401c8972cc3a..08c77b7e59cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -49,6 +49,8 @@ #include #include #include +#include +#include enum { MLX5_BOARD_ID_LEN = 64, @@ -760,6 +762,27 @@ struct mlx5_rsvd_gids { struct ida ida; }; +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + struct work_struct out_work; + u64 start[MAX_PIN_NUM]; + u8 enabled; +}; + +struct mlx5_clock { + rwlock_t lock; + struct cyclecounter cycles; + struct timecounter tc; + struct hwtstamp_config hwtstamp_config; + u32 nominal_c_mult; + unsigned long overflow_period; + struct delayed_work overflow_work; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -800,6 +823,7 @@ struct mlx5_core_dev { #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif + struct mlx5_clock clock; }; struct mlx5_db { From c8249eda7fac00b55eca17ab05207be291d91a3f Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 12 Sep 2017 14:11:29 +0300 Subject: [PATCH 0970/2177] net/mlx5e: IPoIB, Move underlay QP init/uninit to separate functions During the creation of the underlay QP the PKEY index is unknown, the PKEY index is known only when calling ndo_open. PKEY index attached to the QP during state modification. Splitting the functions will also make the code symmetric and more readable. This split is also required for later PKEY support to be called with the PKEY index during ndo_open. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 108 ++++++++++++------ 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 14dfb577691b..feb94db6b921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -108,11 +108,68 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv) /* Do nothing .. */ } +static int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_qp *qp = &ipriv->qp; + struct mlx5_qp_context *context; + int ret; + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto err_qp_modify_to_err; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + kfree(context); + return 0; + +err_qp_modify_to_err: + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp); + kfree(context); + return ret; +} + +static void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_qp_context context; + int err; + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, + &ipriv->qp); + if (err) + mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err); +} + #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { - struct mlx5_qp_context *context = NULL; u32 *in = NULL; void *addr_path; int ret = 0; @@ -140,38 +197,7 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core goto out; } - /* QP states */ - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto out; - } - - context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); - context->pri_path.port = 1; - context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); - goto out; - } - memset(context, 0, sizeof(*context)); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); - goto out; - } - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); - goto out; - } - out: - kfree(context); kvfree(in); return ret; } @@ -192,13 +218,23 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } + err = mlx5i_init_underlay_qp(priv); + if (err) { + mlx5_core_warn(priv->mdev, "intilize underlay QP failed, %d\n", err); + goto err_destroy_underlay_qp; + } + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); - return err; + goto err_destroy_underlay_qp; } return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); + return err; } static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) @@ -381,12 +417,8 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5i_priv *ipriv = priv->ppriv; - struct mlx5_qp_context context; - /* detach qp from flow-steering by reset it */ - mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); + mlx5i_uninit_underlay_qp(priv); } static int mlx5i_open(struct net_device *netdev) From dae37456c8ac3afe8d5f306717f2b75ed5ca38d9 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 13 Sep 2017 11:37:02 +0300 Subject: [PATCH 0971/2177] net/mlx5: Support for attaching multiple underlay QPs to root flow table Previous support allowed connecting only a single QPN to the FT. Now using a linked list multiple QPNs can be attached to the same FT. Supporting attaching multiple underlay QPs is required for PKEY support in which child and parent share the same FT. The actual attaching/detaching FW commands will be called inside the function symmetrically. This change requires a change in IPoIB open and close functions, the attaching/detaching to/from the FT is done each time we open/close. Signed-off-by: Alex Vesker Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 13 +- .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 4 +- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 123 ++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 7 +- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 78 ++++++----- 5 files changed, 171 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 36ecc2b2e187..881e2e55840c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -40,7 +40,8 @@ #include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, u32 underlay_qpn) + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; @@ -52,7 +53,15 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); - MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + if (disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + MLX5_SET(set_flow_table_root_in, in, table_id, 0); + } else { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + } + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); if (ft->vport) { MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index c6d7bdf255b6..71e2d0f37ad9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -71,8 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, unsigned int index); int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - u32 underlay_qpn); + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect); int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a7bea688ec8..8a1a7ba9fe53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -693,8 +693,10 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio *prio) { struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; int min_level = INT_MAX; int err; + u32 qpn; if (root->root_ft) min_level = root->root_ft->level; @@ -702,10 +704,24 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (ft->level >= min_level) return 0; - err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn); + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, + false); + if (err) + break; + } + } + if (err) - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); else root->root_ft = ft; @@ -1661,23 +1677,43 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) static int update_root_ft_destroy(struct mlx5_flow_table *ft) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; if (root->root_ft != ft) return 0; new_root_ft = find_next_ft(ft); - if (new_root_ft) { - int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, - root->underlay_qpn); - if (err) { - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); - return err; + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn, + false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, + qpn, false); + if (err) + break; } } - root->root_ft = new_root_ft; + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + return 0; } @@ -1965,6 +2001,8 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering root_ns->dev = steering->dev; root_ns->table_type = table_type; + INIT_LIST_HEAD(&root_ns->underlay_qpns); + ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); @@ -2245,17 +2283,76 @@ err: int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); - root->underlay_qpn = underlay_qpn; return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; } EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); - root->underlay_qpn = 0; return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 48dd78975062..9bc048a89bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -147,6 +147,11 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + #define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. @@ -212,7 +217,7 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_table *root_ft; /* Should be held when chaining flow tables */ struct mutex chain_lock; - u32 underlay_qpn; + struct list_head underlay_qpns; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index feb94db6b921..00f0e6a038bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -218,12 +218,6 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5i_init_underlay_qp(priv); - if (err) { - mlx5_core_warn(priv->mdev, "intilize underlay QP failed, %d\n", err); - goto err_destroy_underlay_qp; - } - err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); @@ -285,7 +279,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; int err; err = mlx5e_create_indirect_rqt(priv); @@ -304,18 +297,12 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_indirect_tirs; - err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + err = mlx5i_create_flow_steering(priv); if (err) goto err_destroy_direct_tirs; - err = mlx5i_create_flow_steering(priv); - if (err) - goto err_remove_rx_underlay_qpn; - return 0; -err_remove_rx_underlay_qpn: - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: @@ -329,9 +316,6 @@ err_destroy_indirect_rqts: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; - - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); @@ -423,49 +407,71 @@ static void mlx5i_dev_cleanup(struct net_device *dev) static int mlx5i_open(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; int err; - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - set_bit(MLX5E_STATE_OPENED, &priv->state); + set_bit(MLX5E_STATE_OPENED, &epriv->state); - err = mlx5e_open_channels(priv, &priv->channels); - if (err) + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); goto err_clear_state_opened_flag; + } - mlx5e_refresh_tirs(priv, false); - mlx5e_activate_priv_channels(priv); - mlx5e_timestamp_set(priv); + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } - mutex_unlock(&priv->state_lock); + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; + + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mlx5e_timestamp_set(epriv); + + mutex_unlock(&epriv->state_lock); return 0; +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); err_clear_state_opened_flag: - clear_bit(MLX5E_STATE_OPENED, &priv->state); - mutex_unlock(&priv->state_lock); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); return err; } static int mlx5i_close(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. */ - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) goto unlock; - clear_bit(MLX5E_STATE_OPENED, &priv->state); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); - netif_carrier_off(priv->netdev); - mlx5e_deactivate_priv_channels(priv); - mlx5e_close_channels(&priv->channels); + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(epriv); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels);; unlock: - mutex_unlock(&priv->state_lock); + mutex_unlock(&epriv->state_lock); return 0; } From b4b678b06f6eef18bff44a338c01870234db0bc9 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 10 Oct 2017 10:36:41 +0300 Subject: [PATCH 0972/2177] IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop When ndo_open and ndo_stop are called RTNL lock should be held. In this specific case ipoib_ib_dev_open calls the offloaded ndo_open which re-sets the number of TX queue assuming RTNL lock is held. Since RTNL lock is not held, RTNL assert will fail. Signed-off-by: Alex Vesker --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6cd61638b441..c97384c914a4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1203,10 +1203,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { + rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - if (ipoib_ib_dev_open(dev) != 0) + + result = ipoib_ib_dev_open(dev); + rtnl_unlock(); + if (result) return; + if (netif_queue_stopped(dev)) netif_start_queue(dev); } From 980f91c3a62852ebfd0e60387d4a9d13934d9760 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 14 Jun 2017 22:18:48 +0300 Subject: [PATCH 0973/2177] IB/ipoib: Add ability to set PKEY index to lower device driver To support passing child interfaces to the lower device a new rdma_netdev function was used, set_id. This will allow us to attach the PKEY index lower device resources such as TIS/QP. For devices that do not support offloads in IPoIB same logic will be used, setting the PKEY index to priv struct. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c97384c914a4..fe690f82af29 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -893,13 +893,17 @@ dev_stop: void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); if (!(priv->pkey & 0x7fff) || ib_find_pkey(priv->ca, priv->port, priv->pkey, - &priv->pkey_index)) + &priv->pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - else + } else { + if (rn->set_id) + rn->set_id(dev, priv->pkey_index); set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + } } void ipoib_ib_dev_up(struct net_device *dev) From da34f1a85b78c2220dac1ce4f4c4595dd0cab5a9 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 13 Sep 2017 12:17:50 +0300 Subject: [PATCH 0974/2177] net/mlx5e: IPoIB, Support for setting PKEY index to underlay QP Added a function to set PKEY index to IPoIB device driver using the already present set_id function. PKEY index is attached to the QP during state modification. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 00f0e6a038bb..679c1f9af642 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -123,6 +123,7 @@ static int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); context->pri_path.port = 1; + context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index); context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); @@ -529,6 +530,13 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); } +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) { if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) @@ -593,6 +601,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, rn->send = mlx5i_xmit; rn->attach_mcast = mlx5i_attach_mcast; rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; return netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index a0f405f520f7..9a729883c3b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -50,6 +50,7 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ struct mlx5_core_qp qp; u32 qkey; + u16 pkey_index; char *mlx5e_priv[0]; }; From 7e7f4780c3402bd181eea82ca6395013623e4fbf Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 14 Sep 2017 10:27:25 +0300 Subject: [PATCH 0975/2177] net/mlx5e: IPoIB, Use hash-table to map between QPN to child netdev This change is needed for PKEY support, since the RQs are shared between the child interface and the parent. The parent is responsible for NAPI and the precessing of RX completions. Using the dqpn in the completion descriptor we set the corresponding child IPoIB netdevice on the SKB. The mapping between the dqpn and the netdevice is done using a HT, each mlx5 IPoIB interface registers its mapping on creation. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 20 ++- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 16 +++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 12 ++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 136 ++++++++++++++++++ 5 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d9621b2152d3..100fe4ecad9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,7 +22,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc. mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o -mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7e3bfe62ef6e..2c3f2e9b6983 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1163,11 +1163,25 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - struct net_device *netdev = rq->netdev; + struct net_device *netdev; char *pseudo_header; + u32 qpn; u8 *dgid; u8 g; + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; if ((!g) || dgid[0] != 0xff) @@ -1230,6 +1244,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_free_wqe; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_free_wqe; + } napi_gro_receive(rq->cq.napi, skb); wq_free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 679c1f9af642..c479fe54a6ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -382,6 +382,9 @@ static int mlx5i_dev_init(struct net_device *dev) dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn); + return 0; } @@ -402,8 +405,12 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; mlx5i_uninit_underlay_qp(priv); + + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn); } static int mlx5i_open(struct net_device *netdev) @@ -590,6 +597,12 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, if (!epriv->wq) goto err_free_netdev; + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + goto destroy_wq; + } + profile->init(mdev, netdev, profile, ipriv); mlx5e_attach_netdev(epriv); @@ -605,6 +618,8 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, return netdev; +destroy_wq: + destroy_workqueue(epriv->wq); err_free_netdev: free_netdev(netdev); free_mdev_resources: @@ -623,6 +638,7 @@ void mlx5_rdma_netdev_free(struct net_device *netdev) mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); + mlx5i_pkey_qpn_ht_cleanup(netdev); free_netdev(netdev); mlx5e_destroy_mdev_resources(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 9a729883c3b3..e313f6d90729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -51,9 +51,21 @@ struct mlx5i_priv { struct mlx5_core_qp qp; u32 qkey; u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; char *mlx5e_priv[0]; }; +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + /* Extract mlx5e_priv from IPoIB netdev */ #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 000000000000..e4d39aa1f552 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} From 4c6c615e3f308aee26277abebc7d4ffcd9a6abe2 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 14 Sep 2017 14:08:39 +0300 Subject: [PATCH 0976/2177] net/mlx5e: IPoIB, Add PKEY child interface nic profile Child interface profile will be called to support child interface specific behaviour. The child code is sparse compared to the parent since the RX channels are shared between the interfaces. Creating a septate profile for child and parent will make a smother code with a better ability for future expansion. The profile stuct is exposed to the parent using a getter function. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 12 +-- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 13 +++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 83 +++++++++++++++++++ 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index c479fe54a6ca..196771cc599e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -70,10 +70,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -static void mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); @@ -169,7 +169,7 @@ static void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 -static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { u32 *in = NULL; void *addr_path; @@ -203,7 +203,7 @@ out: return ret; } -static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { mlx5_core_destroy_qp(mdev, qp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index e313f6d90729..c9895f7a2358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -55,6 +55,10 @@ struct mlx5i_priv { char *mlx5e_priv[0]; }; +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); + /* Allocate/Free underlay QPN to net-device hash table */ int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); @@ -66,6 +70,15 @@ int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); /* Get the net-device corresponding to the given underlay QPN */ struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); +/* Parent profile functions */ +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + /* Extract mlx5e_priv from IPoIB netdev */ #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index e4d39aa1f552..17c508d98dbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -134,3 +134,86 @@ struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) return node->netdev; } + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mlx5i_init(mdev, netdev, profile, ppriv); + + /* Override parent ndo */ + netdev->netdev_ops = NULL; + + /* Currently no ethtool support */ + netdev->ethtool_ops = NULL; + + /* Use dummy rqs */ + priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_stats = NULL, + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} From af98cebcb3e66d349173c33c0aaef352d108a081 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 14 Sep 2017 16:33:35 +0300 Subject: [PATCH 0977/2177] net/mlx5e: IPoIB, Add PKEY child interface ndos Child interface ndos will be called to support child interface specific behaviour. ndo_init flow: -Acquire shared QPN to net-device HT from parent -Continue with the same flow as parent interface ndo_open flow: -Initialize child underlay QP and connect to shared FT -Create child send TIS -Open child send channels Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 10 +- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 8 ++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 133 +++++++++++++++++- 3 files changed, 144 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 196771cc599e..70706eb70d3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -40,8 +40,6 @@ static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); -static int mlx5i_dev_init(struct net_device *dev); -static void mlx5i_dev_cleanup(struct net_device *dev); static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); @@ -108,7 +106,7 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv) /* Do nothing .. */ } -static int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5i_priv *ipriv = priv->ppriv; @@ -154,7 +152,7 @@ err_qp_modify_to_err: return ret; } -static void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; @@ -372,7 +370,7 @@ out: return err; } -static int mlx5i_dev_init(struct net_device *dev) +int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv = priv->ppriv; @@ -402,7 +400,7 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } -static void mlx5i_dev_cleanup(struct net_device *dev) +void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv = priv->ppriv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index c9895f7a2358..80c0cfee7164 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -59,6 +59,10 @@ struct mlx5i_priv { int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + /* Allocate/Free underlay QPN to net-device hash table */ int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); @@ -70,6 +74,10 @@ int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); /* Get the net-device corresponding to the given underlay QPN */ struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); +/* Shared ndo functionts */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); + /* Parent profile functions */ void mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 17c508d98dbb..d99bec6855de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -135,6 +135,137 @@ struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) return node->netdev; } +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + /* Get QPN to netdevice hash table from parent */ + parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); + parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + dev_put(parent_dev); + + return mlx5i_dev_init(dev); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + /* Called directly after IPoIB netdevice was created to initialize SW structs */ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, struct net_device *netdev, @@ -146,7 +277,7 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, mlx5i_init(mdev, netdev, profile, ppriv); /* Override parent ndo */ - netdev->netdev_ops = NULL; + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; /* Currently no ethtool support */ netdev->ethtool_ops = NULL; From 6a910233c1eb19673dd0f37f1d72d7cdc419e176 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 14 Sep 2017 18:02:31 +0300 Subject: [PATCH 0978/2177] net/mlx5e: IPoIB, Add PKEY child interface ethtool ops Similar to VLAN interfaces child interfaces have limited ethtool support. In current code the main limitation that does not allow child interface ethtool configuration is due to shared resources which are managed by the parent. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 43c126c63955..6f338a9219c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -250,3 +250,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 80c0cfee7164..a50c1a19550e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -39,6 +39,7 @@ #define MLX5I_MAX_NUM_TC 1 extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; #define MLX5_IB_GRH_BYTES 40 #define MLX5_IPOIB_ENCAP_LEN 4 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index d99bec6855de..531b02cc979b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -279,8 +279,8 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, /* Override parent ndo */ netdev->netdev_ops = &mlx5i_pkey_netdev_ops; - /* Currently no ethtool support */ - netdev->ethtool_ops = NULL; + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; /* Use dummy rqs */ priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; From b5ae577741bec22b584fa704076ccd8221cad19d Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 14 Sep 2017 18:22:50 +0300 Subject: [PATCH 0979/2177] net/mlx5e: IPoIB, Modify rdma netdev allocate and free to support PKEY Resources such as FT, QPN HT and mdev resources should be allocated only by parent netdev. Shared resources are allocated and freed by the parent interface since the parent is always present and created before the IPoIB PKEY sub-interface. Signed-off-by: Alex Vesker Reviewed-by: Erez Shitrit --- .../ethernet/mellanox/mlx5/core/en_common.c | 1 + .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 52 ++++++++++++------- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 1 + 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index ece3fb147e3e..157d02917237 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -134,6 +134,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); } int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 70706eb70d3e..abf270d7f556 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -560,12 +560,13 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, const char *name, void (*setup)(struct net_device *)) { - const struct mlx5e_profile *profile = &mlx5i_nic_profile; - int nch = profile->max_nch(mdev); + const struct mlx5e_profile *profile; struct net_device *netdev; struct mlx5i_priv *ipriv; struct mlx5e_priv *epriv; struct rdma_netdev *rn; + bool sub_interface; + int nch; int err; if (mlx5i_check_required_hca_cap(mdev)) { @@ -573,10 +574,15 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, return ERR_PTR(-EOPNOTSUPP); } - /* This function should only be called once per mdev */ - err = mlx5e_create_mdev_resources(mdev); - if (err) - return NULL; + /* TODO: Need to find a better way to check if child device*/ + sub_interface = (mdev->mlx5e_res.pdn != 0); + + if (sub_interface) + profile = mlx5i_pkey_get_profile(); + else + profile = &mlx5i_nic_profile; + + nch = profile->max_nch(mdev); netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), name, NET_NAME_UNKNOWN, @@ -585,7 +591,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, nch); if (!netdev) { mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); - goto free_mdev_resources; + return NULL; } ipriv = netdev_priv(netdev); @@ -595,10 +601,18 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, if (!epriv->wq) goto err_free_netdev; - err = mlx5i_pkey_qpn_ht_init(netdev); - if (err) { - mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); - goto destroy_wq; + ipriv->sub_interface = sub_interface; + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + goto destroy_wq; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; } profile->init(mdev, netdev, profile, ipriv); @@ -616,12 +630,12 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, return netdev; +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); destroy_wq: destroy_workqueue(epriv->wq); err_free_netdev: free_netdev(netdev); -free_mdev_resources: - mlx5e_destroy_mdev_resources(mdev); return NULL; } @@ -629,16 +643,18 @@ EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); void mlx5_rdma_netdev_free(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; const struct mlx5e_profile *profile = priv->profile; - struct mlx5_core_dev *mdev = priv->mdev; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); - mlx5i_pkey_qpn_ht_cleanup(netdev); - free_netdev(netdev); - mlx5e_destroy_mdev_resources(mdev); + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } + free_netdev(netdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index a50c1a19550e..49008022c306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -50,6 +50,7 @@ extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ struct mlx5_core_qp qp; + bool sub_interface; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; From ec0d0987f084cdaf7b7ff8bd8f9f946e29fb8d94 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 12 Oct 2017 16:11:32 -0500 Subject: [PATCH 0980/2177] atm: fore200e: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f8b7e86907cc..126855e6cb7d 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -358,26 +358,33 @@ fore200e_shutdown(struct fore200e* fore200e) case FORE200E_STATE_COMPLETE: kfree(fore200e->stats); + /* fall through */ case FORE200E_STATE_IRQ: free_irq(fore200e->irq, fore200e->atm_dev); + /* fall through */ case FORE200E_STATE_ALLOC_BUF: fore200e_free_rx_buf(fore200e); + /* fall through */ case FORE200E_STATE_INIT_BSQ: fore200e_uninit_bs_queue(fore200e); + /* fall through */ case FORE200E_STATE_INIT_RXQ: fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_rxq.status); fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_rxq.rpd); + /* fall through */ case FORE200E_STATE_INIT_TXQ: fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_txq.status); fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_txq.tpd); + /* fall through */ case FORE200E_STATE_INIT_CMDQ: fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_cmdq.status); + /* fall through */ case FORE200E_STATE_INITIALIZE: /* nothing to do for that state */ @@ -390,6 +397,7 @@ fore200e_shutdown(struct fore200e* fore200e) case FORE200E_STATE_MAP: fore200e->bus->unmap(fore200e); + /* fall through */ case FORE200E_STATE_CONFIGURE: /* nothing to do for that state */ From 04a69a17597c1366d764926577745dca4aa6b100 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:05 -0400 Subject: [PATCH 0981/2177] net: dsa: mv88e6xxx: setup random mac address An Ethernet switch may support having a MAC address, which can be used as the switch's source address in transmitted full-duplex Pause frames. If a DSA switch supports the related .set_addr operation, the DSA core sets the master's MAC address on the switch. This won't make sense anymore in a multi-CPU ports system, because there won't be a unique master device assigned to a switch tree. Instead, setup the switch from within the Marvell driver with a random MAC address, and remove the .set_addr implementation. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 33 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d74c7335c512..76cf383dcf90 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -932,6 +932,19 @@ static int mv88e6xxx_irl_setup(struct mv88e6xxx_chip *chip) return 0; } +static int mv88e6xxx_mac_setup(struct mv88e6xxx_chip *chip) +{ + if (chip->info->ops->set_switch_mac) { + u8 addr[ETH_ALEN]; + + eth_random_addr(addr); + + return chip->info->ops->set_switch_mac(chip, addr); + } + + return 0; +} + static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port) { u16 pvlan = 0; @@ -2013,6 +2026,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + err = mv88e6xxx_mac_setup(chip); + if (err) + goto unlock; + err = mv88e6xxx_phy_setup(chip); if (err) goto unlock; @@ -2043,21 +2060,6 @@ unlock: return err; } -static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (!chip->info->ops->set_switch_mac) - return -EOPNOTSUPP; - - mutex_lock(&chip->reg_lock); - err = chip->info->ops->set_switch_mac(chip, addr); - mutex_unlock(&chip->reg_lock); - - return err; -} - static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; @@ -3785,7 +3787,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, - .set_addr = mv88e6xxx_set_addr, .adjust_link = mv88e6xxx_adjust_link, .get_strings = mv88e6xxx_get_strings, .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, From 1723ab4f5e9af710acb36ba754e9cbf02ee08215 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:06 -0400 Subject: [PATCH 0982/2177] net: dsa: mv88e6060: fix switch MAC address The 88E6060 Ethernet switch always transmits the multicast bit of the switch MAC address as a zero. It re-uses the corresponding bit 8 of the register "Switch MAC Address Register Bytes 0 & 1" for "DiffAddr". If the "DiffAddr" bit is 0, then all ports transmit the same source address. If it is set to 1, then bit 2:0 are used for the port number. The mv88e6060 driver is currently wrongly shifting the MAC address byte 0 by 9. To fix this, shift it by 8 as usual and clear its bit 0. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 621cdc46ad81..d64be2b83d3c 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -214,8 +214,14 @@ static int mv88e6060_setup(struct dsa_switch *ds) static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) { - /* Use the same MAC Address as FD Pause frames for all ports */ - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]); + u16 val = addr[0] << 8 | addr[1]; + + /* The multicast bit is always transmitted as a zero, so the switch uses + * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA. + */ + val &= 0xfeff; + + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); From 56c3ff9bf23e1f9e5f13e2343b0603365f8d9c7d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:07 -0400 Subject: [PATCH 0983/2177] net: dsa: mv88e6060: setup random mac address As for mv88e6xxx, setup the switch from within the mv88e6060 driver with a random MAC address, and remove the .set_addr implementation. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 43 ++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index d64be2b83d3c..6173be889d95 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -188,6 +189,27 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) return 0; } +static int mv88e6060_setup_addr(struct dsa_switch *ds) +{ + u8 addr[ETH_ALEN]; + u16 val; + + eth_random_addr(addr); + + val = addr[0] << 8 | addr[1]; + + /* The multicast bit is always transmitted as a zero, so the switch uses + * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA. + */ + val &= 0xfeff; + + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val); + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); + + return 0; +} + static int mv88e6060_setup(struct dsa_switch *ds) { int ret; @@ -203,6 +225,10 @@ static int mv88e6060_setup(struct dsa_switch *ds) if (ret < 0) return ret; + ret = mv88e6060_setup_addr(ds); + if (ret < 0) + return ret; + for (i = 0; i < MV88E6060_PORTS; i++) { ret = mv88e6060_setup_port(ds, i); if (ret < 0) @@ -212,22 +238,6 @@ static int mv88e6060_setup(struct dsa_switch *ds) return 0; } -static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) -{ - u16 val = addr[0] << 8 | addr[1]; - - /* The multicast bit is always transmitted as a zero, so the switch uses - * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA. - */ - val &= 0xfeff; - - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val); - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); - - return 0; -} - static int mv88e6060_port_to_phy_addr(int port) { if (port >= 0 && port < MV88E6060_PORTS) @@ -262,7 +272,6 @@ static const struct dsa_switch_ops mv88e6060_switch_ops = { .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, - .set_addr = mv88e6060_set_addr, .phy_read = mv88e6060_phy_read, .phy_write = mv88e6060_phy_write, }; From 93004a934bc19ea52b687d74482fd1bdd25e7b87 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:08 -0400 Subject: [PATCH 0984/2177] net: dsa: dsa_loop: remove .set_addr The .set_addr function does nothing, remove the dsa_loop implementation before getting rid of it completely in DSA. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/dsa_loop.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index d55051abf4ed..3a3f4f7ba364 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -110,13 +110,6 @@ static void dsa_loop_get_ethtool_stats(struct dsa_switch *ds, int port, data[i] = ps->ports[port].mib[i].val; } -static int dsa_loop_set_addr(struct dsa_switch *ds, u8 *addr) -{ - dev_dbg(ds->dev, "%s\n", __func__); - - return 0; -} - static int dsa_loop_phy_read(struct dsa_switch *ds, int port, int regnum) { struct dsa_loop_priv *ps = ds->priv; @@ -263,7 +256,6 @@ static const struct dsa_switch_ops dsa_loop_driver = { .get_strings = dsa_loop_get_strings, .get_ethtool_stats = dsa_loop_get_ethtool_stats, .get_sset_count = dsa_loop_get_sset_count, - .set_addr = dsa_loop_set_addr, .phy_read = dsa_loop_phy_read, .phy_write = dsa_loop_phy_write, .port_bridge_join = dsa_loop_port_bridge_join, From 841f4f24053acad69240c6ab7427a1d24bc29491 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 14:18:09 -0400 Subject: [PATCH 0985/2177] net: dsa: remove .set_addr Now that there is no user for the .set_addr function, remove it from DSA. If a switch supports this feature (like mv88e6xxx), the implementation can be done in the driver setup. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - net/dsa/dsa2.c | 6 ------ net/dsa/legacy.c | 6 ------ 3 files changed, 13 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index ce1d622734d7..2746741f74cf 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -291,7 +291,6 @@ struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); int (*setup)(struct dsa_switch *ds); - int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 54ed054777bd..6ac9e11d385c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -336,12 +336,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err) return err; - if (ds->ops->set_addr) { - err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr); - if (err < 0) - return err; - } - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 19ff6e0a21dc..b0fefbffe082 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -172,12 +172,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, if (ret) return ret; - if (ops->set_addr) { - ret = ops->set_addr(ds, master->dev_addr); - if (ret < 0) - return ret; - } - if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) From 4c7787ba3a1f583dba6ff3420817229f0056df84 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Oct 2017 03:50:35 +0100 Subject: [PATCH 0986/2177] nfp: Explicitly include linux/bug.h Today's -next build encountered an error due to a missing definition of WARN_ON(), caused by some header reorganization removing an implicit inclusion of linux/bug.h. Fix this with an explicit inclusion. Signed-off-by: Mark Brown Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_app.c | 1 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 82c290763529..5d9e2eba5b49 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index c26aa7e4a839..86e7daee6099 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -35,6 +35,7 @@ #define __NFP_ASM_H__ 1 #include +#include #include #define REG_NONE 0 From ad75b7d32f2517a6cc92a5d70569c33455157453 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:13 +0530 Subject: [PATCH 0987/2177] cxgb4: implement ethtool dump data operations Implement operations to set/get dump data via ethtool. Also add template header that precedes dump data, which helps in decoding and extracting the dump data. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 33 +++++++++ .../ethernet/chelsio/cxgb4/cudbg_lib_common.h | 65 +++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 73 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h | 32 ++++++++ .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 56 +++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 + 8 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index fecd7aab673b..4c6041f45630 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ - cxgb4_ptp.o cxgb4_tc_flower.o + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h new file mode 100644 index 000000000000..ebaa5b7063cf --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_IF_H__ +#define __CUDBG_IF_H__ + +#define CUDBG_MAJOR_VERSION 1 +#define CUDBG_MINOR_VERSION 14 + +enum cudbg_dbg_entity_type { + CUDBG_MAX_ENTITY = 70, +}; + +struct cudbg_init { + struct adapter *adap; /* Pointer to adapter structure */ + void *outbuf; /* Output buffer */ + u32 outbuf_size; /* Output buffer size */ +}; +#endif /* __CUDBG_IF_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h new file mode 100644 index 000000000000..eb1b36b72455 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_LIB_COMMON_H__ +#define __CUDBG_LIB_COMMON_H__ + +#define CUDBG_SIGNATURE 67856866 /* CUDB in ascii */ + +enum cudbg_dump_type { + CUDBG_DUMP_TYPE_MINI = 1, +}; + +enum cudbg_compression_type { + CUDBG_COMPRESSION_NONE = 1, +}; + +struct cudbg_hdr { + u32 signature; + u32 hdr_len; + u16 major_ver; + u16 minor_ver; + u32 data_len; + u32 hdr_flags; + u16 max_entities; + u8 chip_ver; + u8 dump_type:3; + u8 reserved1:1; + u8 compress_type:4; + u32 reserved[8]; +}; + +struct cudbg_entity_hdr { + u32 entity_type; + u32 start_offset; + u32 size; + int hdr_flags; + u32 sys_warn; + u32 sys_err; + u8 num_pad; + u8 flag; /* bit 0 is used to indicate ext data */ + u8 reserved1[2]; + u32 next_ext_offset; /* pointer to next extended entity meta data */ + u32 reserved[5]; +}; + +struct cudbg_buffer { + u32 size; + u32 offset; + char *data; +}; +#endif /* __CUDBG_LIB_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0db3ab6ad094..a749602fdc41 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -909,6 +909,9 @@ struct adapter { /* TC flower offload */ DECLARE_HASHTABLE(flower_anymatch_tbl, 9); struct timer_list flower_stats_timer; + + /* Ethtool Dump */ + struct ethtool_dump eth_dump; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c new file mode 100644 index 000000000000..a808150de208 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "cxgb4.h" +#include "cxgb4_cudbg.h" + +u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) +{ + return 0; +} + +int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, + u32 flag) +{ + struct cudbg_init cudbg_init = { 0 }; + struct cudbg_buffer dbg_buff = { 0 }; + u32 size, min_size, total_size = 0; + struct cudbg_hdr *cudbg_hdr; + + size = *buf_size; + + cudbg_init.adap = adap; + cudbg_init.outbuf = buf; + cudbg_init.outbuf_size = size; + + dbg_buff.data = buf; + dbg_buff.size = size; + dbg_buff.offset = 0; + + cudbg_hdr = (struct cudbg_hdr *)buf; + cudbg_hdr->signature = CUDBG_SIGNATURE; + cudbg_hdr->hdr_len = sizeof(struct cudbg_hdr); + cudbg_hdr->major_ver = CUDBG_MAJOR_VERSION; + cudbg_hdr->minor_ver = CUDBG_MINOR_VERSION; + cudbg_hdr->max_entities = CUDBG_MAX_ENTITY; + cudbg_hdr->chip_ver = adap->params.chip; + cudbg_hdr->dump_type = CUDBG_DUMP_TYPE_MINI; + cudbg_hdr->compress_type = CUDBG_COMPRESSION_NONE; + + min_size = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * + cudbg_hdr->max_entities; + if (size < min_size) + return -ENOMEM; + + dbg_buff.offset += min_size; + total_size = dbg_buff.offset; + + cudbg_hdr->data_len = total_size; + *buf_size = total_size; + return 0; +} + +void cxgb4_init_ethtool_dump(struct adapter *adapter) +{ + adapter->eth_dump.flag = CXGB4_ETH_DUMP_NONE; + adapter->eth_dump.version = adapter->params.fw_vers; + adapter->eth_dump.len = 0; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h new file mode 100644 index 000000000000..8c5dd6794f81 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CXGB4_CUDBG_H__ +#define __CXGB4_CUDBG_H__ + +#include "cudbg_if.h" +#include "cudbg_lib_common.h" + +enum CXGB4_ETHTOOL_DUMP_FLAGS { + CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE, +}; + +u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag); +int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, + u32 flag); +void cxgb4_init_ethtool_dump(struct adapter *adapter); +#endif /* __CXGB4_CUDBG_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index a71af1e587e2..796eb051cb2f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -21,6 +21,7 @@ #include "cxgb4.h" #include "t4_regs.h" #include "t4fw_api.h" +#include "cxgb4_cudbg.h" #define EEPROM_MAGIC 0x38E2F10C @@ -1374,6 +1375,56 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return -EOPNOTSUPP; } +static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump) +{ + struct adapter *adapter = netdev2adap(dev); + u32 len = 0; + + len = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY; + len += cxgb4_get_dump_length(adapter, eth_dump->flag); + + adapter->eth_dump.flag = eth_dump->flag; + adapter->eth_dump.len = len; + return 0; +} + +static int get_dump_flag(struct net_device *dev, struct ethtool_dump *eth_dump) +{ + struct adapter *adapter = netdev2adap(dev); + + eth_dump->flag = adapter->eth_dump.flag; + eth_dump->len = adapter->eth_dump.len; + eth_dump->version = adapter->eth_dump.version; + return 0; +} + +static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump, + void *buf) +{ + struct adapter *adapter = netdev2adap(dev); + u32 len = 0; + int ret = 0; + + if (adapter->eth_dump.flag == CXGB4_ETH_DUMP_NONE) + return -ENOENT; + + len = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY; + len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag); + if (eth_dump->len < len) + return -ENOMEM; + + ret = cxgb4_cudbg_collect(adapter, buf, &len, adapter->eth_dump.flag); + if (ret) + return ret; + + eth_dump->flag = adapter->eth_dump.flag; + eth_dump->len = len; + eth_dump->version = adapter->eth_dump.version; + return 0; +} + static const struct ethtool_ops cxgb_ethtool_ops = { .get_link_ksettings = get_link_ksettings, .set_link_ksettings = set_link_ksettings, @@ -1404,7 +1455,10 @@ static const struct ethtool_ops cxgb_ethtool_ops = { .get_rxfh = get_rss_table, .set_rxfh = set_rss_table, .flash_device = set_flash, - .get_ts_info = get_ts_info + .get_ts_info = get_ts_info, + .set_dump = set_dump, + .get_dump_flag = get_dump_flag, + .get_dump_data = get_dump_data, }; void cxgb4_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index fe4cbe22d5d7..70c395d18087 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -81,6 +81,7 @@ #include "cxgb4_tc_u32.h" #include "cxgb4_tc_flower.h" #include "cxgb4_ptp.h" +#include "cxgb4_cudbg.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -5035,6 +5036,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) cxgb4_set_ethtool_ops(netdev); } + cxgb4_init_ethtool_dump(adapter); + pci_set_drvdata(pdev, adapter); if (adapter->flags & FW_OK) { From a7975a2f9a7984de9b9b318da9d1826033db32c7 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:14 +0530 Subject: [PATCH 0988/2177] cxgb4: collect register dump Add base to collect dump entities. Collect register dump and update template header accordingly. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 3 +- .../net/ethernet/chelsio/cxgb4/cudbg_common.c | 54 +++++++++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 5 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 79 ++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 28 ++++++ .../ethernet/chelsio/cxgb4/cudbg_lib_common.h | 13 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 94 ++++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h | 11 +++ 8 files changed, 285 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 4c6041f45630..70d454379996 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ - cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \ + cudbg_common.o cudbg_lib.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c new file mode 100644 index 000000000000..f78ba1743b5a --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "cxgb4.h" +#include "cudbg_if.h" +#include "cudbg_lib_common.h" + +int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, + struct cudbg_buffer *pin_buff) +{ + u32 offset; + + offset = pdbg_buff->offset; + if (offset + size > pdbg_buff->size) + return CUDBG_STATUS_NO_MEM; + + pin_buff->data = (char *)pdbg_buff->data + offset; + pin_buff->offset = offset; + pin_buff->size = size; + pdbg_buff->size -= size; + return 0; +} + +void cudbg_put_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pdbg_buff) +{ + pdbg_buff->size += pin_buff->size; + pin_buff->data = NULL; + pin_buff->offset = 0; + pin_buff->size = 0; +} + +void cudbg_update_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff) +{ + /* We already write to buffer provided by ethool, so just + * increment offset to next free space. + */ + pout_buff->offset += pin_buff->size; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index ebaa5b7063cf..73725a8666df 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -18,10 +18,15 @@ #ifndef __CUDBG_IF_H__ #define __CUDBG_IF_H__ +/* Error codes */ +#define CUDBG_STATUS_NO_MEM -19 +#define CUDBG_SYSTEM_ERROR -29 + #define CUDBG_MAJOR_VERSION 1 #define CUDBG_MINOR_VERSION 14 enum cudbg_dbg_entity_type { + CUDBG_REG_DUMP = 1, CUDBG_MAX_ENTITY = 70, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c new file mode 100644 index 000000000000..b37d8f7825dd --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "cxgb4.h" +#include "cudbg_if.h" +#include "cudbg_lib_common.h" +#include "cudbg_lib.h" + +static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *dbg_buff) +{ + cudbg_update_buff(pin_buff, dbg_buff); + cudbg_put_buff(pin_buff, dbg_buff); +} + +/* This function will add additional padding bytes into debug_buffer to make it + * 4 byte aligned. + */ +void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, + struct cudbg_entity_hdr *entity_hdr) +{ + u8 zero_buf[4] = {0}; + u8 padding, remain; + + remain = (dbg_buff->offset - entity_hdr->start_offset) % 4; + padding = 4 - remain; + if (remain) { + memcpy(((u8 *)dbg_buff->data) + dbg_buff->offset, &zero_buf, + padding); + dbg_buff->offset += padding; + entity_hdr->num_pad = padding; + } + entity_hdr->size = dbg_buff->offset - entity_hdr->start_offset; +} + +struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i) +{ + struct cudbg_hdr *cudbg_hdr = (struct cudbg_hdr *)outbuf; + + return (struct cudbg_entity_hdr *) + ((char *)outbuf + cudbg_hdr->hdr_len + + (sizeof(struct cudbg_entity_hdr) * (i - 1))); +} + +int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 buf_size = 0; + int rc = 0; + + if (is_t4(padap->params.chip)) + buf_size = T4_REGMAP_SIZE; + else if (is_t5(padap->params.chip) || is_t6(padap->params.chip)) + buf_size = T5_REGMAP_SIZE; + + rc = cudbg_get_buff(dbg_buff, buf_size, &temp_buff); + if (rc) + return rc; + t4_get_regs(padap, (void *)temp_buff.data, temp_buff.size); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h new file mode 100644 index 000000000000..5b0a0e964601 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_LIB_H__ +#define __CUDBG_LIB_H__ + +int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); + +struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); +void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, + struct cudbg_entity_hdr *entity_hdr); +#endif /* __CUDBG_LIB_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h index eb1b36b72455..422a5ceedd2e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -62,4 +62,17 @@ struct cudbg_buffer { u32 offset; char *data; }; + +struct cudbg_error { + int sys_err; + int sys_warn; + int app_err; +}; + +int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, + struct cudbg_buffer *pin_buff); +void cudbg_put_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pdbg_buff); +void cudbg_update_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff); #endif /* __CUDBG_LIB_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index a808150de208..4ec322eec68c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -18,9 +18,94 @@ #include "cxgb4.h" #include "cxgb4_cudbg.h" +static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { + { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, +}; + +static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) +{ + u32 len = 0; + + switch (entity) { + case CUDBG_REG_DUMP: + switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { + case CHELSIO_T4: + len = T4_REGMAP_SIZE; + break; + case CHELSIO_T5: + case CHELSIO_T6: + len = T5_REGMAP_SIZE; + break; + default: + break; + } + break; + default: + break; + } + + return len; +} + u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) { - return 0; + u32 i, entity; + u32 len = 0; + + if (flag & CXGB4_ETH_DUMP_HW) { + for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) { + entity = cxgb4_collect_hw_dump[i].entity; + len += cxgb4_get_entity_length(adap, entity); + } + } + + return len; +} + +static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + const struct cxgb4_collect_entity *e_arr, + u32 arr_size, void *buf, u32 *tot_size) +{ + struct adapter *adap = pdbg_init->adap; + struct cudbg_error cudbg_err = { 0 }; + struct cudbg_entity_hdr *entity_hdr; + u32 entity_size, i; + u32 total_size = 0; + int ret; + + for (i = 0; i < arr_size; i++) { + const struct cxgb4_collect_entity *e = &e_arr[i]; + + /* Skip entities that won't fit in output buffer */ + entity_size = cxgb4_get_entity_length(adap, e->entity); + if (entity_size > + pdbg_init->outbuf_size - *tot_size - total_size) + continue; + + entity_hdr = cudbg_get_entity_hdr(buf, e->entity); + entity_hdr->entity_type = e->entity; + entity_hdr->start_offset = dbg_buff->offset; + memset(&cudbg_err, 0, sizeof(struct cudbg_error)); + ret = e->collect_cb(pdbg_init, dbg_buff, &cudbg_err); + if (ret) { + entity_hdr->size = 0; + dbg_buff->offset = entity_hdr->start_offset; + } else { + cudbg_align_debug_buffer(dbg_buff, entity_hdr); + } + + /* Log error and continue with next entity */ + if (cudbg_err.sys_err) + ret = CUDBG_SYSTEM_ERROR; + + entity_hdr->hdr_flags = ret; + entity_hdr->sys_err = cudbg_err.sys_err; + entity_hdr->sys_warn = cudbg_err.sys_warn; + total_size += entity_hdr->size; + } + + *tot_size += total_size; } int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, @@ -60,6 +145,13 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, dbg_buff.offset += min_size; total_size = dbg_buff.offset; + if (flag & CXGB4_ETH_DUMP_HW) + cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff, + cxgb4_collect_hw_dump, + ARRAY_SIZE(cxgb4_collect_hw_dump), + buf, + &total_size); + cudbg_hdr->data_len = total_size; *buf_size = total_size; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h index 8c5dd6794f81..7369a7e22b89 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h @@ -20,9 +20,20 @@ #include "cudbg_if.h" #include "cudbg_lib_common.h" +#include "cudbg_lib.h" + +typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); + +struct cxgb4_collect_entity { + enum cudbg_dbg_entity_type entity; + cudbg_collect_callback_t collect_cb; +}; enum CXGB4_ETHTOOL_DUMP_FLAGS { CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE, + CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */ }; u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag); From b33af022e57996dc818ec960cbdf0f07cb5130d8 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:15 +0530 Subject: [PATCH 0989/2177] cxgb4: collect on-chip memory dump Collect EDC0 and EDC1 memory dump. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 29 ++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 8 ++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 136 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 6 + .../ethernet/chelsio/cxgb4/cudbg_lib_common.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 38 ++++- .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h | 1 + 7 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h new file mode 100644 index 000000000000..71a426dd22f5 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_ENTITY_H__ +#define __CUDBG_ENTITY_H__ + +#define EDC0_FLAG 3 +#define EDC1_FLAG 4 + +struct card_mem { + u16 size_edc0; + u16 size_edc1; + u16 mem_flag; +}; +#endif /* __CUDBG_ENTITY_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 73725a8666df..8bcea985af77 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -20,6 +20,7 @@ /* Error codes */ #define CUDBG_STATUS_NO_MEM -19 +#define CUDBG_STATUS_ENTITY_NOT_FOUND -24 #define CUDBG_SYSTEM_ERROR -29 #define CUDBG_MAJOR_VERSION 1 @@ -27,6 +28,8 @@ enum cudbg_dbg_entity_type { CUDBG_REG_DUMP = 1, + CUDBG_EDC0 = 18, + CUDBG_EDC1 = 19, CUDBG_MAX_ENTITY = 70, }; @@ -35,4 +38,9 @@ struct cudbg_init { void *outbuf; /* Output buffer */ u32 outbuf_size; /* Output buffer size */ }; + +static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size) +{ + return size * 1024 * 1024; +} #endif /* __CUDBG_IF_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index b37d8f7825dd..fb0e97e6a6a0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -15,10 +15,12 @@ * */ +#include "t4_regs.h" #include "cxgb4.h" #include "cudbg_if.h" #include "cudbg_lib_common.h" #include "cudbg_lib.h" +#include "cudbg_entity.h" static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff, struct cudbg_buffer *dbg_buff) @@ -27,6 +29,16 @@ static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff, cudbg_put_buff(pin_buff, dbg_buff); } +static int is_fw_attached(struct cudbg_init *pdbg_init) +{ + struct adapter *padap = pdbg_init->adap; + + if (!(padap->flags & FW_OK) || padap->use_bd) + return 0; + + return 1; +} + /* This function will add additional padding bytes into debug_buffer to make it * 4 byte aligned. */ @@ -77,3 +89,127 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, cudbg_write_and_release_buff(&temp_buff, dbg_buff); return rc; } + +static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, u8 mem_type, + unsigned long tot_len, + struct cudbg_error *cudbg_err) +{ + unsigned long bytes, bytes_left, bytes_read = 0; + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc = 0; + + bytes_left = tot_len; + while (bytes_left > 0) { + bytes = min_t(unsigned long, bytes_left, + (unsigned long)CUDBG_CHUNK_SIZE); + rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff); + if (rc) + return rc; + spin_lock(&padap->win0_lock); + rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, + bytes_read, bytes, + (__be32 *)temp_buff.data, + 1); + spin_unlock(&padap->win0_lock); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + bytes_left -= bytes; + bytes_read += bytes; + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + } + return rc; +} + +static void cudbg_collect_mem_info(struct cudbg_init *pdbg_init, + struct card_mem *mem_info) +{ + struct adapter *padap = pdbg_init->adap; + u32 value; + + value = t4_read_reg(padap, MA_EDRAM0_BAR_A); + value = EDRAM0_SIZE_G(value); + mem_info->size_edc0 = (u16)value; + + value = t4_read_reg(padap, MA_EDRAM1_BAR_A); + value = EDRAM1_SIZE_G(value); + mem_info->size_edc1 = (u16)value; + + value = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM0_ENABLE_F) + mem_info->mem_flag |= (1 << EDC0_FLAG); + if (value & EDRAM1_ENABLE_F) + mem_info->mem_flag |= (1 << EDC1_FLAG); +} + +static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + int rc; + + if (is_fw_attached(pdbg_init)) { + /* Flush uP dcache before reading edcX/mcX */ + rc = t4_fwcache(padap, FW_PARAM_DEV_FWCACHE_FLUSH); + if (rc) + cudbg_err->sys_warn = rc; + } +} + +static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, + u8 mem_type) +{ + struct card_mem mem_info = {0}; + unsigned long flag, size; + int rc; + + cudbg_t4_fwcache(pdbg_init, cudbg_err); + cudbg_collect_mem_info(pdbg_init, &mem_info); + switch (mem_type) { + case MEM_EDC0: + flag = (1 << EDC0_FLAG); + size = cudbg_mbytes_to_bytes(mem_info.size_edc0); + break; + case MEM_EDC1: + flag = (1 << EDC1_FLAG); + size = cudbg_mbytes_to_bytes(mem_info.size_edc1); + break; + default: + rc = CUDBG_STATUS_ENTITY_NOT_FOUND; + goto err; + } + + if (mem_info.mem_flag & flag) { + rc = cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, + size, cudbg_err); + if (rc) + goto err; + } else { + rc = CUDBG_STATUS_ENTITY_NOT_FOUND; + goto err; + } +err: + return rc; +} + +int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err, + MEM_EDC0); +} + +int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err, + MEM_EDC1); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 5b0a0e964601..f7be5090b172 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -21,6 +21,12 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h index 422a5ceedd2e..b150c5d1f7c0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -69,6 +69,9 @@ struct cudbg_error { int app_err; }; +#define CDUMP_MAX_COMP_BUF_SIZE ((64 * 1024) - 1) +#define CUDBG_CHUNK_SIZE ((CDUMP_MAX_COMP_BUF_SIZE / 1024) * 1024) + int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, struct cudbg_buffer *pin_buff); void cudbg_put_buff(struct cudbg_buffer *pin_buff, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 4ec322eec68c..286d172a9c19 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -15,16 +15,22 @@ * */ +#include "t4_regs.h" #include "cxgb4.h" #include "cxgb4_cudbg.h" +static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = { + { CUDBG_EDC0, cudbg_collect_edc0_meminfo }, + { CUDBG_EDC1, cudbg_collect_edc1_meminfo }, +}; + static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, }; static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) { - u32 len = 0; + u32 value, len = 0; switch (entity) { case CUDBG_REG_DUMP: @@ -40,6 +46,22 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) break; } break; + case CUDBG_EDC0: + value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM0_ENABLE_F) { + value = t4_read_reg(adap, MA_EDRAM0_BAR_A); + len = EDRAM0_SIZE_G(value); + } + len = cudbg_mbytes_to_bytes(len); + break; + case CUDBG_EDC1: + value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM1_ENABLE_F) { + value = t4_read_reg(adap, MA_EDRAM1_BAR_A); + len = EDRAM1_SIZE_G(value); + } + len = cudbg_mbytes_to_bytes(len); + break; default: break; } @@ -59,6 +81,13 @@ u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) } } + if (flag & CXGB4_ETH_DUMP_MEM) { + for (i = 0; i < ARRAY_SIZE(cxgb4_collect_mem_dump); i++) { + entity = cxgb4_collect_mem_dump[i].entity; + len += cxgb4_get_entity_length(adap, entity); + } + } + return len; } @@ -152,6 +181,13 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, buf, &total_size); + if (flag & CXGB4_ETH_DUMP_MEM) + cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff, + cxgb4_collect_mem_dump, + ARRAY_SIZE(cxgb4_collect_mem_dump), + buf, + &total_size); + cudbg_hdr->data_len = total_size; *buf_size = total_size; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h index 7369a7e22b89..c099b5aa2214 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h @@ -33,6 +33,7 @@ struct cxgb4_collect_entity { enum CXGB4_ETHTOOL_DUMP_FLAGS { CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE, + CXGB4_ETH_DUMP_MEM = (1 << 0), /* On-Chip Memory Dumps */ CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */ }; From 844d1b6f0ef8051a1ac0403327ab881dd4d101a3 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:16 +0530 Subject: [PATCH 0990/2177] cxgb4: collect firmware mbox and device log dump Collect firmware mbox and device logs before collecting the rest of the hardware dumps to snap the firmware state before the mailbox logs are updated by other hardware dumps. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 6 ++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 2 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 84 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 6 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 9 ++ 5 files changed, 107 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 71a426dd22f5..2b717e700bbc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -26,4 +26,10 @@ struct card_mem { u16 size_edc1; u16 mem_flag; }; + +struct cudbg_mbox_log { + struct mbox_cmd entry; + u32 hi[MBOX_LEN / 8]; + u32 lo[MBOX_LEN / 8]; +}; #endif /* __CUDBG_ENTITY_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 8bcea985af77..0a37d9b6cd32 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -28,8 +28,10 @@ enum cudbg_dbg_entity_type { CUDBG_REG_DUMP = 1, + CUDBG_DEV_LOG = 2, CUDBG_EDC0 = 18, CUDBG_EDC1 = 19, + CUDBG_MBOX_LOG = 66, CUDBG_MAX_ENTITY = 70, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index fb0e97e6a6a0..960635e37a9d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -90,6 +90,45 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct devlog_params *dparams; + int rc = 0; + + rc = t4_init_devlog_params(padap); + if (rc < 0) { + cudbg_err->sys_err = rc; + return rc; + } + + dparams = &padap->params.devlog; + rc = cudbg_get_buff(dbg_buff, dparams->size, &temp_buff); + if (rc) + return rc; + + /* Collect FW devlog */ + if (dparams->start != 0) { + spin_lock(&padap->win0_lock); + rc = t4_memory_rw(padap, padap->params.drv_memwin, + dparams->memtype, dparams->start, + dparams->size, + (__be32 *)(char *)temp_buff.data, + 1); + spin_unlock(&padap->win0_lock); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, u8 mem_type, unsigned long tot_len, @@ -213,3 +252,48 @@ int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err, MEM_EDC1); } + +int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_mbox_log *mboxlog = NULL; + struct cudbg_buffer temp_buff = { 0 }; + struct mbox_cmd_log *log = NULL; + struct mbox_cmd *entry; + unsigned int entry_idx; + u16 mbox_cmds; + int i, k, rc; + u64 flit; + u32 size; + + log = padap->mbox_log; + mbox_cmds = padap->mbox_log->size; + size = sizeof(struct cudbg_mbox_log) * mbox_cmds; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + mboxlog = (struct cudbg_mbox_log *)temp_buff.data; + for (k = 0; k < mbox_cmds; k++) { + entry_idx = log->cursor + k; + if (entry_idx >= log->size) + entry_idx -= log->size; + + entry = mbox_cmd_log_entry(log, entry_idx); + /* skip over unused entries */ + if (entry->timestamp == 0) + continue; + + memcpy(&mboxlog->entry, entry, sizeof(struct mbox_cmd)); + for (i = 0; i < MBOX_LEN / 8; i++) { + flit = entry->cmd[i]; + mboxlog->hi[i] = (u32)(flit >> 32); + mboxlog->lo[i] = (u32)flit; + } + mboxlog++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index f7be5090b172..690591b36d4c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -21,12 +21,18 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 286d172a9c19..f8c4f4199ce6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -18,6 +18,7 @@ #include "t4_regs.h" #include "cxgb4.h" #include "cxgb4_cudbg.h" +#include "cudbg_entity.h" static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = { { CUDBG_EDC0, cudbg_collect_edc0_meminfo }, @@ -25,6 +26,8 @@ static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = { }; static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { + { CUDBG_MBOX_LOG, cudbg_collect_mbox_log }, + { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, }; @@ -46,6 +49,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) break; } break; + case CUDBG_DEV_LOG: + len = adap->params.devlog.size; + break; case CUDBG_EDC0: value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); if (value & EDRAM0_ENABLE_F) { @@ -62,6 +68,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) } len = cudbg_mbytes_to_bytes(len); break; + case CUDBG_MBOX_LOG: + len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size; + break; default: break; } From 5ccf9d049615994349e9b0a1f0d4b9a398b9b0c2 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:17 +0530 Subject: [PATCH 0991/2177] cxgb4: update API for TP indirect register access Try to access TP indirect registers via firmware first. If this fails, fallback and access them directly. This ensures that driver and firmware do not conflict each other while accessing the TP indirect registers. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 36 +- .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 13 +- .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 14 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 +- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 312 +++++++++++------- 5 files changed, 239 insertions(+), 140 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a749602fdc41..d4032e373927 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1459,7 +1459,7 @@ unsigned int qtimer_val(const struct adapter *adap, int t4_init_devlog_params(struct adapter *adapter); int t4_init_sge_params(struct adapter *adapter); -int t4_init_tp_params(struct adapter *adap); +int t4_init_tp_params(struct adapter *adap, bool sleep_ok); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_init_rss_mode(struct adapter *adap, int mbox); int t4_init_portinfo(struct port_info *pi, int mbox, @@ -1473,14 +1473,15 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, unsigned int flags, unsigned int defq); int t4_read_rss(struct adapter *adapter, u16 *entries); -void t4_read_rss_key(struct adapter *adapter, u32 *key); -void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx); +void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok); +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx, + bool sleep_ok); void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, - u32 *valp); + u32 *valp, bool sleep_ok); void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, - u32 *vfl, u32 *vfh); -u32 t4_read_rss_pf_map(struct adapter *adapter); -u32 t4_read_rss_pf_mask(struct adapter *adapter); + u32 *vfl, u32 *vfh, bool sleep_ok); +u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok); +u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok); unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx); unsigned int t4_get_tp_ch_map(struct adapter *adapter, int pidx); @@ -1511,14 +1512,18 @@ void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]); void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr, unsigned int mask, unsigned int val); void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr); -void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st); -void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st); -void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st); -void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st); +void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st, + bool sleep_ok); +void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st, + bool sleep_ok); +void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st, + bool sleep_ok); +void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st, + bool sleep_ok); void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, - struct tp_tcp_stats *v6); + struct tp_tcp_stats *v6, bool sleep_ok); void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, - struct tp_fcoe_stats *st); + struct tp_fcoe_stats *st, bool sleep_ok); void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); @@ -1627,6 +1632,11 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); +void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); +void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); + void t4_uld_mem_free(struct adapter *adap); int t4_uld_mem_alloc(struct adapter *adap); void t4_uld_clean_up(struct adapter *adap); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 76540b0e082d..917663b35603 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2211,7 +2211,7 @@ static int rss_key_show(struct seq_file *seq, void *v) { u32 key[10]; - t4_read_rss_key(seq->private, key); + t4_read_rss_key(seq->private, key, true); seq_printf(seq, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x\n", key[9], key[8], key[7], key[6], key[5], key[4], key[3], key[2], key[1], key[0]); @@ -2248,7 +2248,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf, } } - t4_write_rss_key(adap, key, -1); + t4_write_rss_key(adap, key, -1, true); return count; } @@ -2325,12 +2325,13 @@ static int rss_pf_config_open(struct inode *inode, struct file *file) return -ENOMEM; pfconf = (struct rss_pf_conf *)p->data; - rss_pf_map = t4_read_rss_pf_map(adapter); - rss_pf_mask = t4_read_rss_pf_mask(adapter); + rss_pf_map = t4_read_rss_pf_map(adapter, true); + rss_pf_mask = t4_read_rss_pf_mask(adapter, true); for (pf = 0; pf < 8; pf++) { pfconf[pf].rss_pf_map = rss_pf_map; pfconf[pf].rss_pf_mask = rss_pf_mask; - t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config); + t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config, + true); } return 0; } @@ -2393,7 +2394,7 @@ static int rss_vf_config_open(struct inode *inode, struct file *file) vfconf = (struct rss_vf_conf *)p->data; for (vf = 0; vf < vfcount; vf++) { t4_read_rss_vf_config(adapter, vf, &vfconf[vf].rss_vf_vfl, - &vfconf[vf].rss_vf_vfh); + &vfconf[vf].rss_vf_vfh, true); } return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 796eb051cb2f..1b7f6b9ccc8b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -336,10 +336,10 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) memset(s, 0, sizeof(*s)); spin_lock(&adap->stats_lock); - t4_tp_get_tcp_stats(adap, &v4, &v6); - t4_tp_get_rdma_stats(adap, &rdma_stats); - t4_get_usm_stats(adap, &usm_stats); - t4_tp_get_err_stats(adap, &err_stats); + t4_tp_get_tcp_stats(adap, &v4, &v6, false); + t4_tp_get_rdma_stats(adap, &rdma_stats, false); + t4_get_usm_stats(adap, &usm_stats, false); + t4_tp_get_err_stats(adap, &err_stats, false); spin_unlock(&adap->stats_lock); s->db_drop = adap->db_stats.db_drop; @@ -389,9 +389,9 @@ static void collect_channel_stats(struct adapter *adap, struct channel_stats *s, memset(s, 0, sizeof(*s)); spin_lock(&adap->stats_lock); - t4_tp_get_cpl_stats(adap, &cpl_stats); - t4_tp_get_err_stats(adap, &err_stats); - t4_get_fcoe_stats(adap, i, &fcoe_stats); + t4_tp_get_cpl_stats(adap, &cpl_stats, false); + t4_tp_get_err_stats(adap, &err_stats, false); + t4_get_fcoe_stats(adap, i, &fcoe_stats, false); spin_unlock(&adap->stats_lock); s->cpl_req = cpl_stats.req[i]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 70c395d18087..8d97ae6039aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1639,7 +1639,7 @@ void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4, struct adapter *adap = pci_get_drvdata(pdev); spin_lock(&adap->stats_lock); - t4_tp_get_tcp_stats(adap, v4, v6); + t4_tp_get_tcp_stats(adap, v4, v6, false); spin_unlock(&adap->stats_lock); } EXPORT_SYMBOL(cxgb4_get_tcp_stats); @@ -4077,7 +4077,7 @@ static int adap_init0(struct adapter *adap) } t4_init_sge_params(adap); adap->flags |= FW_OK; - t4_init_tp_params(adap); + t4_init_tp_params(adap, true); return 0; /* diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index b3fd1f457639..0f12bf507d56 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -5052,23 +5052,26 @@ static unsigned int t4_use_ldst(struct adapter *adap) } /** - * t4_fw_tp_pio_rw - Access TP PIO through LDST - * @adap: the adapter - * @vals: where the indirect register values are stored/written - * @nregs: how many indirect registers to read/write - * @start_idx: index of first indirect register to read/write - * @rw: Read (1) or Write (0) + * t4_tp_fw_ldst_rw - Access TP indirect register through LDST + * @adap: the adapter + * @cmd: TP fw ldst address space type + * @vals: where the indirect register values are stored/written + * @nregs: how many indirect registers to read/write + * @start_idx: index of first indirect register to read/write + * @rw: Read (1) or Write (0) + * @sleep_ok: if true we may sleep while awaiting command completion * - * Access TP PIO registers through LDST + * Access TP indirect registers through LDST */ -static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs, - unsigned int start_index, unsigned int rw) +static int t4_tp_fw_ldst_rw(struct adapter *adap, int cmd, u32 *vals, + unsigned int nregs, unsigned int start_index, + unsigned int rw, bool sleep_ok) { - int ret, i; - int cmd = FW_LDST_ADDRSPC_TP_PIO; + int ret = 0; + unsigned int i; struct fw_ldst_cmd c; - for (i = 0 ; i < nregs; i++) { + for (i = 0; i < nregs; i++) { memset(&c, 0, sizeof(c)); c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | FW_CMD_REQUEST_F | @@ -5079,26 +5082,127 @@ static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs, c.u.addrval.addr = cpu_to_be32(start_index + i); c.u.addrval.val = rw ? 0 : cpu_to_be32(vals[i]); - ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); - if (!ret && rw) + ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, + sleep_ok); + if (ret) + return ret; + + if (rw) vals[i] = be32_to_cpu(c.u.addrval.val); } + return 0; +} + +/** + * t4_tp_indirect_rw - Read/Write TP indirect register through LDST or backdoor + * @adap: the adapter + * @reg_addr: Address Register + * @reg_data: Data register + * @buff: where the indirect register values are stored/written + * @nregs: how many indirect registers to read/write + * @start_index: index of first indirect register to read/write + * @rw: READ(1) or WRITE(0) + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read/Write TP indirect registers through LDST if possible. + * Else, use backdoor access + **/ +static void t4_tp_indirect_rw(struct adapter *adap, u32 reg_addr, u32 reg_data, + u32 *buff, u32 nregs, u32 start_index, int rw, + bool sleep_ok) +{ + int rc = -EINVAL; + int cmd; + + switch (reg_addr) { + case TP_PIO_ADDR_A: + cmd = FW_LDST_ADDRSPC_TP_PIO; + break; + case TP_MIB_INDEX_A: + cmd = FW_LDST_ADDRSPC_TP_MIB; + break; + default: + goto indirect_access; + } + + if (t4_use_ldst(adap)) + rc = t4_tp_fw_ldst_rw(adap, cmd, buff, nregs, start_index, rw, + sleep_ok); + +indirect_access: + + if (rc) { + if (rw) + t4_read_indirect(adap, reg_addr, reg_data, buff, nregs, + start_index); + else + t4_write_indirect(adap, reg_addr, reg_data, buff, nregs, + start_index); + } +} + +/** + * t4_tp_pio_read - Read TP PIO registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP PIO Registers + **/ +void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs, + start_index, 1, sleep_ok); +} + +/** + * t4_tp_pio_write - Write TP PIO registers + * @adap: the adapter + * @buff: where the indirect register values are stored + * @nregs: how many indirect registers to write + * @start_index: index of first indirect register to write + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Write TP PIO Registers + **/ +static void t4_tp_pio_write(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs, + start_index, 0, sleep_ok); +} + +/** + * t4_tp_mib_read - Read TP MIB registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP MIB Registers + **/ +void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, + bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, buff, nregs, + start_index, 1, sleep_ok); } /** * t4_read_rss_key - read the global RSS key * @adap: the adapter * @key: 10-entry array holding the 320-bit RSS key + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the global 320-bit RSS key. */ -void t4_read_rss_key(struct adapter *adap, u32 *key) +void t4_read_rss_key(struct adapter *adap, u32 *key, bool sleep_ok) { - if (t4_use_ldst(adap)) - t4_fw_tp_pio_rw(adap, key, 10, TP_RSS_SECRET_KEY0_A, 1); - else - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, - TP_RSS_SECRET_KEY0_A); + t4_tp_pio_read(adap, key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok); } /** @@ -5106,12 +5210,14 @@ void t4_read_rss_key(struct adapter *adap, u32 *key) * @adap: the adapter * @key: 10-entry array holding the 320-bit RSS key * @idx: which RSS key to write + * @sleep_ok: if true we may sleep while awaiting command completion * * Writes one of the RSS keys with the given 320-bit value. If @idx is * 0..15 the corresponding entry in the RSS key table is written, * otherwise the global RSS key is written. */ -void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx, + bool sleep_ok) { u8 rss_key_addr_cnt = 16; u32 vrt = t4_read_reg(adap, TP_RSS_CONFIG_VRT_A); @@ -5124,11 +5230,7 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) (vrt & KEYEXTEND_F) && (KEYMODE_G(vrt) == 3)) rss_key_addr_cnt = 32; - if (t4_use_ldst(adap)) - t4_fw_tp_pio_rw(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, 0); - else - t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, - TP_RSS_SECRET_KEY0_A); + t4_tp_pio_write(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok); if (idx >= 0 && idx < rss_key_addr_cnt) { if (rss_key_addr_cnt > 16) @@ -5146,19 +5248,15 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) * @adapter: the adapter * @index: the entry in the PF RSS table to read * @valp: where to store the returned value + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Configuration Table at the specified index and returns * the value found there. */ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, - u32 *valp) + u32 *valp, bool sleep_ok) { - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, valp, 1, - TP_RSS_PF0_CONFIG_A + index, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - valp, 1, TP_RSS_PF0_CONFIG_A + index); + t4_tp_pio_read(adapter, valp, 1, TP_RSS_PF0_CONFIG_A + index, sleep_ok); } /** @@ -5167,12 +5265,13 @@ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, * @index: the entry in the VF RSS table to read * @vfl: where to store the returned VFL * @vfh: where to store the returned VFH + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the VF RSS Configuration Table at the specified index and returns * the (VFL, VFH) values found there. */ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, - u32 *vfl, u32 *vfh) + u32 *vfl, u32 *vfh, bool sleep_ok) { u32 vrt, mask, data; @@ -5193,50 +5292,37 @@ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, /* Grab the VFL/VFH values ... */ - if (t4_use_ldst(adapter)) { - t4_fw_tp_pio_rw(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, 1); - t4_fw_tp_pio_rw(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, 1); - } else { - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - vfl, 1, TP_RSS_VFL_CONFIG_A); - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - vfh, 1, TP_RSS_VFH_CONFIG_A); - } + t4_tp_pio_read(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, sleep_ok); + t4_tp_pio_read(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, sleep_ok); } /** * t4_read_rss_pf_map - read PF RSS Map * @adapter: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Map register and returns its value. */ -u32 t4_read_rss_pf_map(struct adapter *adapter) +u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok) { u32 pfmap; - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &pfmap, 1, TP_RSS_PF_MAP_A); + t4_tp_pio_read(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, sleep_ok); return pfmap; } /** * t4_read_rss_pf_mask - read PF RSS Mask * @adapter: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Mask register and returns its value. */ -u32 t4_read_rss_pf_mask(struct adapter *adapter) +u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok) { u32 pfmask; - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &pfmask, 1, TP_RSS_PF_MSK_A); + t4_tp_pio_read(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, sleep_ok); return pfmask; } @@ -5245,12 +5331,13 @@ u32 t4_read_rss_pf_mask(struct adapter *adapter) * @adap: the adapter * @v4: holds the TCP/IP counter values * @v6: holds the TCP/IPv6 counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's TCP/IP and TCP/IPv6 MIB counters. * Either @v4 or @v6 may be %NULL to skip the corresponding stats. */ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, - struct tp_tcp_stats *v6) + struct tp_tcp_stats *v6, bool sleep_ok) { u32 val[TP_MIB_TCP_RXT_SEG_LO_A - TP_MIB_TCP_OUT_RST_A + 1]; @@ -5259,16 +5346,16 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, #define STAT64(x) (((u64)STAT(x##_HI) << 32) | STAT(x##_LO)) if (v4) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - ARRAY_SIZE(val), TP_MIB_TCP_OUT_RST_A); + t4_tp_mib_read(adap, val, ARRAY_SIZE(val), + TP_MIB_TCP_OUT_RST_A, sleep_ok); v4->tcp_out_rsts = STAT(OUT_RST); v4->tcp_in_segs = STAT64(IN_SEG); v4->tcp_out_segs = STAT64(OUT_SEG); v4->tcp_retrans_segs = STAT64(RXT_SEG); } if (v6) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - ARRAY_SIZE(val), TP_MIB_TCP_V6OUT_RST_A); + t4_tp_mib_read(adap, val, ARRAY_SIZE(val), + TP_MIB_TCP_V6OUT_RST_A, sleep_ok); v6->tcp_out_rsts = STAT(OUT_RST); v6->tcp_in_segs = STAT64(IN_SEG); v6->tcp_out_segs = STAT64(OUT_SEG); @@ -5283,63 +5370,66 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, * t4_tp_get_err_stats - read TP's error MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's error counters. */ -void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st) +void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st, + bool sleep_ok) { int nchan = adap->params.arch.nchan; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A); - - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A); + t4_tp_mib_read(adap, st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->tnl_cong_drops, nchan, + TP_MIB_TNL_CNG_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->ofld_chan_drops, nchan, + TP_MIB_OFD_CHN_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->ofld_vlan_drops, nchan, + TP_MIB_OFD_VLN_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->tcp6_in_errs, nchan, + TP_MIB_TCP_V6IN_ERR_0_A, sleep_ok); + t4_tp_mib_read(adap, &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A, + sleep_ok); } /** * t4_tp_get_cpl_stats - read TP's CPL MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's CPL counters. */ -void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st) +void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st, + bool sleep_ok) { int nchan = adap->params.arch.nchan; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req, - nchan, TP_MIB_CPL_IN_REQ_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp, - nchan, TP_MIB_CPL_OUT_RSP_0_A); + t4_tp_mib_read(adap, st->req, nchan, TP_MIB_CPL_IN_REQ_0_A, sleep_ok); + t4_tp_mib_read(adap, st->rsp, nchan, TP_MIB_CPL_OUT_RSP_0_A, sleep_ok); } /** * t4_tp_get_rdma_stats - read TP's RDMA MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's RDMA counters. */ -void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st) +void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st, + bool sleep_ok) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->rqe_dfr_pkt, - 2, TP_MIB_RQE_DFR_PKT_A); + t4_tp_mib_read(adap, &st->rqe_dfr_pkt, 2, TP_MIB_RQE_DFR_PKT_A, + sleep_ok); } /** @@ -5347,20 +5437,24 @@ void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st) * @adap: the adapter * @idx: the port index * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's FCoE counters for the selected port. */ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, - struct tp_fcoe_stats *st) + struct tp_fcoe_stats *st, bool sleep_ok) { u32 val[2]; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_ddp, - 1, TP_MIB_FCOE_DDP_0_A + idx); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_drop, - 1, TP_MIB_FCOE_DROP_0_A + idx); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx); + t4_tp_mib_read(adap, &st->frames_ddp, 1, TP_MIB_FCOE_DDP_0_A + idx, + sleep_ok); + + t4_tp_mib_read(adap, &st->frames_drop, 1, + TP_MIB_FCOE_DROP_0_A + idx, sleep_ok); + + t4_tp_mib_read(adap, val, 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx, + sleep_ok); + st->octets_ddp = ((u64)val[0] << 32) | val[1]; } @@ -5368,15 +5462,16 @@ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, * t4_get_usm_stats - read TP's non-TCP DDP MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's counters for non-TCP directly-placed packets. */ -void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st) +void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st, + bool sleep_ok) { u32 val[4]; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, 4, - TP_MIB_USM_PKTS_A); + t4_tp_mib_read(adap, val, 4, TP_MIB_USM_PKTS_A, sleep_ok); st->frames = val[0]; st->drops = val[1]; st->octets = ((u64)val[2] << 32) | val[3]; @@ -8663,10 +8758,11 @@ int t4_init_sge_params(struct adapter *adapter) /** * t4_init_tp_params - initialize adap->params.tp * @adap: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Initialize various fields of the adapter's TP Parameters structure. */ -int t4_init_tp_params(struct adapter *adap) +int t4_init_tp_params(struct adapter *adap, bool sleep_ok) { int chan; u32 v; @@ -8682,19 +8778,11 @@ int t4_init_tp_params(struct adapter *adap) /* Cache the adapter's Compressed Filter Mode and global Incress * Configuration. */ - if (t4_use_ldst(adap)) { - t4_fw_tp_pio_rw(adap, &adap->params.tp.vlan_pri_map, 1, - TP_VLAN_PRI_MAP_A, 1); - t4_fw_tp_pio_rw(adap, &adap->params.tp.ingress_config, 1, - TP_INGRESS_CONFIG_A, 1); - } else { - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &adap->params.tp.vlan_pri_map, 1, - TP_VLAN_PRI_MAP_A); - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &adap->params.tp.ingress_config, 1, - TP_INGRESS_CONFIG_A); - } + t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1, + TP_VLAN_PRI_MAP_A, sleep_ok); + t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1, + TP_INGRESS_CONFIG_A, sleep_ok); + /* For T6, cache the adapter's compressed error vector * and passing outer header info for encapsulated packets. */ From 4359cf33680c3f276c6bba11730836c41d3540a2 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:18 +0530 Subject: [PATCH 0992/2177] cxgb4: collect TP dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 72 +++++++++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 114 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 21 +++- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 20 +++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 2 + 8 files changed, 234 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 2b717e700bbc..a7446fd09366 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -32,4 +32,76 @@ struct cudbg_mbox_log { u32 hi[MBOX_LEN / 8]; u32 lo[MBOX_LEN / 8]; }; + +struct ireg_field { + u32 ireg_addr; + u32 ireg_data; + u32 ireg_local_offset; + u32 ireg_offset_range; +}; + +struct ireg_buf { + struct ireg_field tp_pio; + u32 outbuf[32]; +}; + +#define IREG_NUM_ELEM 4 + +static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = { + {0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */ + {0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */ + {0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */ + {0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */ + {0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */ + {0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */ + {0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */ + {0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */ + {0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */ + {0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */ + {0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */ + {0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */ +}; + +static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = { + {0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */ + {0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */ + {0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */ + {0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */ + {0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */ + {0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */ + {0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */ + {0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */ + {0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */ + {0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */ + {0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */ +}; + +static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = { + {0x7e18, 0x7e1c, 0x0, 12} +}; + +static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = { + {0x7e18, 0x7e1c, 0x0, 12} +}; + +static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = { + {0x7e50, 0x7e54, 0x0, 13}, + {0x7e50, 0x7e54, 0x10, 6}, + {0x7e50, 0x7e54, 0x18, 21}, + {0x7e50, 0x7e54, 0x30, 32}, + {0x7e50, 0x7e54, 0x50, 22}, + {0x7e50, 0x7e54, 0x68, 12} +}; + +static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = { + {0x7e50, 0x7e54, 0x0, 13}, + {0x7e50, 0x7e54, 0x10, 6}, + {0x7e50, 0x7e54, 0x18, 8}, + {0x7e50, 0x7e54, 0x20, 13}, + {0x7e50, 0x7e54, 0x30, 16}, + {0x7e50, 0x7e54, 0x40, 16}, + {0x7e50, 0x7e54, 0x50, 16}, + {0x7e50, 0x7e54, 0x60, 6}, + {0x7e50, 0x7e54, 0x68, 4} +}; #endif /* __CUDBG_ENTITY_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 0a37d9b6cd32..53ea447c9103 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -31,6 +31,7 @@ enum cudbg_dbg_entity_type { CUDBG_DEV_LOG = 2, CUDBG_EDC0 = 18, CUDBG_EDC1 = 19, + CUDBG_TP_INDIRECT = 36, CUDBG_MBOX_LOG = 66, CUDBG_MAX_ENTITY = 70, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 960635e37a9d..6efa1de3723c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -253,6 +253,120 @@ int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, MEM_EDC1); } +int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_tp_pio; + int i, rc, n = 0; + u32 size; + + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_pio_array) + + sizeof(t5_tp_tm_pio_array) + + sizeof(t5_tp_mib_index_array); + else + n = sizeof(t6_tp_pio_array) + + sizeof(t6_tp_tm_pio_array) + + sizeof(t6_tp_mib_index_array); + + n = n / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_tp_pio = (struct ireg_buf *)temp_buff.data; + + /* TP_PIO */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_pio_array[i][0]; + tp_pio->ireg_data = t5_tp_pio_array[i][1]; + tp_pio->ireg_local_offset = t5_tp_pio_array[i][2]; + tp_pio->ireg_offset_range = t5_tp_pio_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_pio_array[i][0]; + tp_pio->ireg_data = t6_tp_pio_array[i][1]; + tp_pio->ireg_local_offset = t6_tp_pio_array[i][2]; + tp_pio->ireg_offset_range = t6_tp_pio_array[i][3]; + } + t4_tp_pio_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + + /* TP_TM_PIO */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_tm_pio_array[i][0]; + tp_pio->ireg_data = t5_tp_tm_pio_array[i][1]; + tp_pio->ireg_local_offset = t5_tp_tm_pio_array[i][2]; + tp_pio->ireg_offset_range = t5_tp_tm_pio_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_tm_pio_array[i][0]; + tp_pio->ireg_data = t6_tp_tm_pio_array[i][1]; + tp_pio->ireg_local_offset = t6_tp_tm_pio_array[i][2]; + tp_pio->ireg_offset_range = t6_tp_tm_pio_array[i][3]; + } + t4_tp_tm_pio_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + + /* TP_MIB_INDEX */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_mib_index_array) / + (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_mib_index_array) / + (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n ; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_mib_index_array[i][0]; + tp_pio->ireg_data = t5_tp_mib_index_array[i][1]; + tp_pio->ireg_local_offset = + t5_tp_mib_index_array[i][2]; + tp_pio->ireg_offset_range = + t5_tp_mib_index_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_mib_index_array[i][0]; + tp_pio->ireg_data = t6_tp_mib_index_array[i][1]; + tp_pio->ireg_local_offset = + t6_tp_mib_index_array[i][2]; + tp_pio->ireg_offset_range = + t6_tp_mib_index_array[i][3]; + } + t4_tp_mib_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 690591b36d4c..7a927ec71a5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -30,6 +30,9 @@ int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d4032e373927..4eaca05ebd3a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1634,6 +1634,8 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); +void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index f8c4f4199ce6..7dfee6adc51e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -29,11 +29,12 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_MBOX_LOG, cudbg_collect_mbox_log }, { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, + { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, }; static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) { - u32 value, len = 0; + u32 value, n = 0, len = 0; switch (entity) { case CUDBG_REG_DUMP: @@ -68,6 +69,24 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) } len = cudbg_mbytes_to_bytes(len); break; + case CUDBG_TP_INDIRECT: + switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { + case CHELSIO_T5: + n = sizeof(t5_tp_pio_array) + + sizeof(t5_tp_tm_pio_array) + + sizeof(t5_tp_mib_index_array); + break; + case CHELSIO_T6: + n = sizeof(t6_tp_pio_array) + + sizeof(t6_tp_tm_pio_array) + + sizeof(t6_tp_mib_index_array); + break; + default: + break; + } + n = n / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + break; case CUDBG_MBOX_LOG: len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size; break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 0f12bf507d56..8fa40f9e75c4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -5118,6 +5118,9 @@ static void t4_tp_indirect_rw(struct adapter *adap, u32 reg_addr, u32 reg_data, case TP_PIO_ADDR_A: cmd = FW_LDST_ADDRSPC_TP_PIO; break; + case TP_TM_PIO_ADDR_A: + cmd = FW_LDST_ADDRSPC_TP_TM_PIO; + break; case TP_MIB_INDEX_A: cmd = FW_LDST_ADDRSPC_TP_MIB; break; @@ -5175,6 +5178,23 @@ static void t4_tp_pio_write(struct adapter *adap, u32 *buff, u32 nregs, start_index, 0, sleep_ok); } +/** + * t4_tp_tm_pio_read - Read TP TM PIO registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP TM PIO Registers + **/ +void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_TM_PIO_ADDR_A, TP_TM_PIO_DATA_A, buff, + nregs, start_index, 1, sleep_ok); +} + /** * t4_tp_mib_read - Read TP MIB registers * @adap: the adapter diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index dac90837842b..82614e078f50 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1447,6 +1447,8 @@ #define LKPTBLQUEUE0_M 0x3ffU #define LKPTBLQUEUE0_G(x) (((x) >> LKPTBLQUEUE0_S) & LKPTBLQUEUE0_M) +#define TP_TM_PIO_ADDR_A 0x7e18 +#define TP_TM_PIO_DATA_A 0x7e1c #define TP_PIO_ADDR_A 0x7e40 #define TP_PIO_DATA_A 0x7e44 #define TP_MIB_INDEX_A 0x7e50 From 270d39bf324ecdb9ab3f9c521e6b7fd9cc6c27b8 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:19 +0530 Subject: [PATCH 0993/2177] cxgb4: collect hardware module dumps Collect SGE, PCIE, PM, UP CIM, MA and HMA dumps. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 74 +++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 6 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 289 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 18 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 35 +++ 5 files changed, 422 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index a7446fd09366..d7f3392f618f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -104,4 +104,78 @@ static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = { {0x7e50, 0x7e54, 0x60, 6}, {0x7e50, 0x7e54, 0x68, 4} }; + +static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = { + {0x10cc, 0x10d0, 0x0, 16}, + {0x10cc, 0x10d4, 0x0, 16}, +}; + +static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = { + {0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */ + {0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */ + {0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */ +}; + +static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = { + {0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */ + {0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */ +}; + +static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = { + {0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */ + {0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */ +}; + +static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = { + {0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */ + {0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */ +}; + +static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = { + {0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */ + {0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */ + {0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */ +}; + +static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = { + {0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */ + {0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */ +}; + +static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM] = { + {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */ + {0x7b50, 0x7b54, 0x2080, 0x1d}, /* up_cim_2080_to_20fc */ + {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */ + {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */ + {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */ + {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */ + {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */ + {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */ + {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */ + {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */ + {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */ + +}; + +static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM] = { + {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */ + {0x7b50, 0x7b54, 0x2080, 0x19}, /* up_cim_2080_to_20ec */ + {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */ + {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */ + {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */ + {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */ + {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */ + {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */ + {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */ + {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */ + {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */ +}; + +static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = { + {0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */ +}; #endif /* __CUDBG_ENTITY_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 53ea447c9103..01d282081b2d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -32,7 +32,13 @@ enum cudbg_dbg_entity_type { CUDBG_EDC0 = 18, CUDBG_EDC1 = 19, CUDBG_TP_INDIRECT = 36, + CUDBG_SGE_INDIRECT = 37, + CUDBG_PCIE_INDIRECT = 50, + CUDBG_PM_INDIRECT = 51, + CUDBG_MA_INDIRECT = 61, + CUDBG_UP_CIM_INDIRECT = 64, CUDBG_MBOX_LOG = 66, + CUDBG_HMA_INDIRECT = 67, CUDBG_MAX_ENTITY = 70, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 6efa1de3723c..0149f1ca9f51 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -367,6 +367,258 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_sge_dbg; + int i, rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(*ch_sge_dbg) * 2, &temp_buff); + if (rc) + return rc; + + ch_sge_dbg = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < 2; i++) { + struct ireg_field *sge_pio = &ch_sge_dbg->tp_pio; + u32 *buff = ch_sge_dbg->outbuf; + + sge_pio->ireg_addr = t5_sge_dbg_index_array[i][0]; + sge_pio->ireg_data = t5_sge_dbg_index_array[i][1]; + sge_pio->ireg_local_offset = t5_sge_dbg_index_array[i][2]; + sge_pio->ireg_offset_range = t5_sge_dbg_index_array[i][3]; + t4_read_indirect(padap, + sge_pio->ireg_addr, + sge_pio->ireg_data, + buff, + sge_pio->ireg_offset_range, + sge_pio->ireg_local_offset); + ch_sge_dbg++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_pcie; + int i, rc, n; + u32 size; + + n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_pcie = (struct ireg_buf *)temp_buff.data; + /* PCIE_PDBG */ + for (i = 0; i < n; i++) { + struct ireg_field *pcie_pio = &ch_pcie->tp_pio; + u32 *buff = ch_pcie->outbuf; + + pcie_pio->ireg_addr = t5_pcie_pdbg_array[i][0]; + pcie_pio->ireg_data = t5_pcie_pdbg_array[i][1]; + pcie_pio->ireg_local_offset = t5_pcie_pdbg_array[i][2]; + pcie_pio->ireg_offset_range = t5_pcie_pdbg_array[i][3]; + t4_read_indirect(padap, + pcie_pio->ireg_addr, + pcie_pio->ireg_data, + buff, + pcie_pio->ireg_offset_range, + pcie_pio->ireg_local_offset); + ch_pcie++; + } + + /* PCIE_CDBG */ + n = sizeof(t5_pcie_cdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *pcie_pio = &ch_pcie->tp_pio; + u32 *buff = ch_pcie->outbuf; + + pcie_pio->ireg_addr = t5_pcie_cdbg_array[i][0]; + pcie_pio->ireg_data = t5_pcie_cdbg_array[i][1]; + pcie_pio->ireg_local_offset = t5_pcie_cdbg_array[i][2]; + pcie_pio->ireg_offset_range = t5_pcie_cdbg_array[i][3]; + t4_read_indirect(padap, + pcie_pio->ireg_addr, + pcie_pio->ireg_data, + buff, + pcie_pio->ireg_offset_range, + pcie_pio->ireg_local_offset); + ch_pcie++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_pm; + int i, rc, n; + u32 size; + + n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_pm = (struct ireg_buf *)temp_buff.data; + /* PM_RX */ + for (i = 0; i < n; i++) { + struct ireg_field *pm_pio = &ch_pm->tp_pio; + u32 *buff = ch_pm->outbuf; + + pm_pio->ireg_addr = t5_pm_rx_array[i][0]; + pm_pio->ireg_data = t5_pm_rx_array[i][1]; + pm_pio->ireg_local_offset = t5_pm_rx_array[i][2]; + pm_pio->ireg_offset_range = t5_pm_rx_array[i][3]; + t4_read_indirect(padap, + pm_pio->ireg_addr, + pm_pio->ireg_data, + buff, + pm_pio->ireg_offset_range, + pm_pio->ireg_local_offset); + ch_pm++; + } + + /* PM_TX */ + n = sizeof(t5_pm_tx_array) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *pm_pio = &ch_pm->tp_pio; + u32 *buff = ch_pm->outbuf; + + pm_pio->ireg_addr = t5_pm_tx_array[i][0]; + pm_pio->ireg_data = t5_pm_tx_array[i][1]; + pm_pio->ireg_local_offset = t5_pm_tx_array[i][2]; + pm_pio->ireg_offset_range = t5_pm_tx_array[i][3]; + t4_read_indirect(padap, + pm_pio->ireg_addr, + pm_pio->ireg_data, + buff, + pm_pio->ireg_offset_range, + pm_pio->ireg_local_offset); + ch_pm++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ma_indr; + int i, rc, n; + u32 size, j; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + n = sizeof(t6_ma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ma_indr = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *ma_fli = &ma_indr->tp_pio; + u32 *buff = ma_indr->outbuf; + + ma_fli->ireg_addr = t6_ma_ireg_array[i][0]; + ma_fli->ireg_data = t6_ma_ireg_array[i][1]; + ma_fli->ireg_local_offset = t6_ma_ireg_array[i][2]; + ma_fli->ireg_offset_range = t6_ma_ireg_array[i][3]; + t4_read_indirect(padap, ma_fli->ireg_addr, ma_fli->ireg_data, + buff, ma_fli->ireg_offset_range, + ma_fli->ireg_local_offset); + ma_indr++; + } + + n = sizeof(t6_ma_ireg_array2) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *ma_fli = &ma_indr->tp_pio; + u32 *buff = ma_indr->outbuf; + + ma_fli->ireg_addr = t6_ma_ireg_array2[i][0]; + ma_fli->ireg_data = t6_ma_ireg_array2[i][1]; + ma_fli->ireg_local_offset = t6_ma_ireg_array2[i][2]; + for (j = 0; j < t6_ma_ireg_array2[i][3]; j++) { + t4_read_indirect(padap, ma_fli->ireg_addr, + ma_fli->ireg_data, buff, 1, + ma_fli->ireg_local_offset); + buff++; + ma_fli->ireg_local_offset += 0x20; + } + ma_indr++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *up_cim; + int i, rc, n; + u32 size; + + n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + up_cim = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *up_cim_reg = &up_cim->tp_pio; + u32 *buff = up_cim->outbuf; + + if (is_t5(padap->params.chip)) { + up_cim_reg->ireg_addr = t5_up_cim_reg_array[i][0]; + up_cim_reg->ireg_data = t5_up_cim_reg_array[i][1]; + up_cim_reg->ireg_local_offset = + t5_up_cim_reg_array[i][2]; + up_cim_reg->ireg_offset_range = + t5_up_cim_reg_array[i][3]; + } else if (is_t6(padap->params.chip)) { + up_cim_reg->ireg_addr = t6_up_cim_reg_array[i][0]; + up_cim_reg->ireg_data = t6_up_cim_reg_array[i][1]; + up_cim_reg->ireg_local_offset = + t6_up_cim_reg_array[i][2]; + up_cim_reg->ireg_offset_range = + t6_up_cim_reg_array[i][3]; + } + + rc = t4_cim_read(padap, up_cim_reg->ireg_local_offset, + up_cim_reg->ireg_offset_range, buff); + if (rc) { + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + up_cim++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) @@ -411,3 +663,40 @@ int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, cudbg_write_and_release_buff(&temp_buff, dbg_buff); return rc; } + +int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *hma_indr; + int i, rc, n; + u32 size; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + n = sizeof(t6_hma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + hma_indr = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *hma_fli = &hma_indr->tp_pio; + u32 *buff = hma_indr->outbuf; + + hma_fli->ireg_addr = t6_hma_ireg_array[i][0]; + hma_fli->ireg_data = t6_hma_ireg_array[i][1]; + hma_fli->ireg_local_offset = t6_hma_ireg_array[i][2]; + hma_fli->ireg_offset_range = t6_hma_ireg_array[i][3]; + t4_read_indirect(padap, hma_fli->ireg_addr, hma_fli->ireg_data, + buff, hma_fli->ireg_offset_range, + hma_fli->ireg_local_offset); + hma_indr++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 7a927ec71a5f..4838d823750f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -33,9 +33,27 @@ int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 7dfee6adc51e..1f6d800dd1be 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -30,6 +30,12 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, + { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, + { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, + { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, + { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, + { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, + { CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect }, }; static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) @@ -87,9 +93,38 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) n = n / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n; break; + case CUDBG_SGE_INDIRECT: + len = sizeof(struct ireg_buf) * 2; + break; + case CUDBG_PCIE_INDIRECT: + n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + break; + case CUDBG_PM_INDIRECT: + n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + break; + case CUDBG_MA_INDIRECT: + if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + n = sizeof(t6_ma_ireg_array) / + (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + } + break; + case CUDBG_UP_CIM_INDIRECT: + n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + break; case CUDBG_MBOX_LOG: len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size; break; + case CUDBG_HMA_INDIRECT: + if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + n = sizeof(t6_hma_ireg_array) / + (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + } + break; default: break; } From 7c075ce221cf10a7aaef96b002d1d4c5dc715822 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 13 Oct 2017 18:48:20 +0530 Subject: [PATCH 0994/2177] cxgb4: collect IBQ and OBQ dumps Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 14 ++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 165 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 42 +++++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 32 ++++ 4 files changed, 253 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 01d282081b2d..9b8005e67811 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -29,10 +29,24 @@ enum cudbg_dbg_entity_type { CUDBG_REG_DUMP = 1, CUDBG_DEV_LOG = 2, + CUDBG_CIM_IBQ_TP0 = 6, + CUDBG_CIM_IBQ_TP1 = 7, + CUDBG_CIM_IBQ_ULP = 8, + CUDBG_CIM_IBQ_SGE0 = 9, + CUDBG_CIM_IBQ_SGE1 = 10, + CUDBG_CIM_IBQ_NCSI = 11, + CUDBG_CIM_OBQ_ULP0 = 12, + CUDBG_CIM_OBQ_ULP1 = 13, + CUDBG_CIM_OBQ_ULP2 = 14, + CUDBG_CIM_OBQ_ULP3 = 15, + CUDBG_CIM_OBQ_SGE = 16, + CUDBG_CIM_OBQ_NCSI = 17, CUDBG_EDC0 = 18, CUDBG_EDC1 = 19, CUDBG_TP_INDIRECT = 36, CUDBG_SGE_INDIRECT = 37, + CUDBG_CIM_OBQ_RXQ0 = 47, + CUDBG_CIM_OBQ_RXQ1 = 48, CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, CUDBG_MA_INDIRECT = 61, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 0149f1ca9f51..c451b2e42a6c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -129,6 +129,171 @@ int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, return rc; } +static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, int qid) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int no_of_read_words, rc = 0; + u32 qsize; + + /* collect CIM IBQ */ + qsize = CIM_IBQ_SIZE * 4 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + if (rc) + return rc; + + /* t4_read_cim_ibq will return no. of read words or error */ + no_of_read_words = t4_read_cim_ibq(padap, qid, + (u32 *)((u32 *)temp_buff.data + + temp_buff.offset), qsize); + /* no_of_read_words is less than or equal to 0 means error */ + if (no_of_read_words <= 0) { + if (!no_of_read_words) + rc = CUDBG_SYSTEM_ERROR; + else + rc = no_of_read_words; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 0); +} + +int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 1); +} + +int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 2); +} + +int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 3); +} + +int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 4); +} + +int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 5); +} + +static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, int qid) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int no_of_read_words, rc = 0; + u32 qsize; + + /* collect CIM OBQ */ + qsize = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + if (rc) + return rc; + + /* t4_read_cim_obq will return no. of read words or error */ + no_of_read_words = t4_read_cim_obq(padap, qid, + (u32 *)((u32 *)temp_buff.data + + temp_buff.offset), qsize); + /* no_of_read_words is less than or equal to 0 means error */ + if (no_of_read_words <= 0) { + if (!no_of_read_words) + rc = CUDBG_SYSTEM_ERROR; + else + rc = no_of_read_words; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + temp_buff.size = no_of_read_words * 4; + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 0); +} + +int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 1); +} + +int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 2); +} + +int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 3); +} + +int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 4); +} + +int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 5); +} + +int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 6); +} + +int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 7); +} + static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, u8 mem_type, unsigned long tot_len, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 4838d823750f..c4440c1d0142 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -24,6 +24,42 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -36,6 +72,12 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 1f6d800dd1be..9d97080a9d17 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -29,8 +29,22 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_MBOX_LOG, cudbg_collect_mbox_log }, { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, + { CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 }, + { CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 }, + { CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp }, + { CUDBG_CIM_IBQ_SGE0, cudbg_collect_cim_ibq_sge0 }, + { CUDBG_CIM_IBQ_SGE1, cudbg_collect_cim_ibq_sge1 }, + { CUDBG_CIM_IBQ_NCSI, cudbg_collect_cim_ibq_ncsi }, + { CUDBG_CIM_OBQ_ULP0, cudbg_collect_cim_obq_ulp0 }, + { CUDBG_CIM_OBQ_ULP1, cudbg_collect_cim_obq_ulp1 }, + { CUDBG_CIM_OBQ_ULP2, cudbg_collect_cim_obq_ulp2 }, + { CUDBG_CIM_OBQ_ULP3, cudbg_collect_cim_obq_ulp3 }, + { CUDBG_CIM_OBQ_SGE, cudbg_collect_cim_obq_sge }, + { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, + { CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 }, + { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 }, { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, @@ -59,6 +73,24 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_DEV_LOG: len = adap->params.devlog.size; break; + case CUDBG_CIM_IBQ_TP0: + case CUDBG_CIM_IBQ_TP1: + case CUDBG_CIM_IBQ_ULP: + case CUDBG_CIM_IBQ_SGE0: + case CUDBG_CIM_IBQ_SGE1: + case CUDBG_CIM_IBQ_NCSI: + len = CIM_IBQ_SIZE * 4 * sizeof(u32); + break; + case CUDBG_CIM_OBQ_ULP0: + case CUDBG_CIM_OBQ_ULP1: + case CUDBG_CIM_OBQ_ULP2: + case CUDBG_CIM_OBQ_ULP3: + case CUDBG_CIM_OBQ_SGE: + case CUDBG_CIM_OBQ_NCSI: + case CUDBG_CIM_OBQ_RXQ0: + case CUDBG_CIM_OBQ_RXQ1: + len = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32); + break; case CUDBG_EDC0: value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); if (value & EDRAM0_ENABLE_F) { From 5dc874252faa818426480a7c00fa05738fe05402 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 13 Oct 2017 17:29:00 +0100 Subject: [PATCH 0995/2177] cxgb4: fix missing break in switch and indent return statements The break statement for the Macronix case is missing and will fall through to the Winbond case and re-assign the size setting. Fix this by adding the missing break statement. Also correctly indent the return statements. Detected by CoverityScan, CID#1458020 ("Missing break in switch") Fixes: 96ac18f14a5a ("cxgb4: Add support for new flash parts") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8fa40f9e75c4..006414758f65 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -8404,7 +8404,7 @@ static int t4_get_flash_params(struct adapter *adap) default: dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n", flashid, density); - return -EINVAL; + return -EINVAL; } break; } @@ -8423,8 +8423,9 @@ static int t4_get_flash_params(struct adapter *adap) default: dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n", flashid, density); - return -EINVAL; + return -EINVAL; } + break; } case 0xef: { /* Winbond */ /* This Density -> Size decoding table is taken from Winbond @@ -8441,7 +8442,7 @@ static int t4_get_flash_params(struct adapter *adap) default: dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n", flashid, density); - return -EINVAL; + return -EINVAL; } break; } From 47371300dfc269dd8d150e5b872bdbbda98ba809 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 13 Oct 2017 12:28:03 -0700 Subject: [PATCH 0996/2177] hv_netvsc: Rename ind_table to rx_table Rename this variable because it is the Receive indirection table. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 4 ++-- drivers/net/hyperv/rndis_filter.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a81335e8ebe8..65ceb3aec40e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -179,7 +179,7 @@ struct rndis_device { u8 hw_mac_adr[ETH_ALEN]; u8 rss_key[NETVSC_HASH_KEYLEN]; - u16 ind_table[ITAB_NUM]; + u16 rx_table[ITAB_NUM]; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 44746de3dd4c..8fa964e733ad 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1434,7 +1434,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - indir[i] = rndis_dev->ind_table[i]; + indir[i] = rndis_dev->rx_table[i]; } if (key) @@ -1464,7 +1464,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, return -EINVAL; for (i = 0; i < ITAB_NUM; i++) - rndis_dev->ind_table[i] = indir[i]; + rndis_dev->rx_table[i] = indir[i]; } if (!key) { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 065b204d8e17..addf9f69c58c 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) - itab[i] = rdev->ind_table[i]; + itab[i] = rdev->rx_table[i]; /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); @@ -1284,8 +1284,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, net_device->num_chn = min(net_device->max_chn, device_info->num_chn); for (i = 0; i < ITAB_NUM; i++) - rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, - net_device->num_chn); + rndis_device->rx_table[i] = ethtool_rxfh_indir_default( + i, net_device->num_chn); atomic_set(&net_device->open_chn, 1); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); From 39e91cfbf6f5fb26ba64cc2e8874372baf1671e7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 13 Oct 2017 12:28:04 -0700 Subject: [PATCH 0997/2177] hv_netvsc: Rename tx_send_table to tx_table Simplify the variable name: tx_send_table Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 65ceb3aec40e..4958bb6b7376 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -731,7 +731,7 @@ struct net_device_context { u32 tx_checksum_mask; - u32 tx_send_table[VRSS_SEND_TAB_SIZE]; + u32 tx_table[VRSS_SEND_TAB_SIZE]; /* Ethtool settings */ u8 duplex; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6e5194916bbe..d34cf37e949d 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1110,7 +1110,7 @@ static void netvsc_send_table(struct hv_device *hdev, nvmsg->msg.v5_msg.send_table.offset); for (i = 0; i < count; i++) - net_device_ctx->tx_send_table[i] = tab[i]; + net_device_ctx->tx_table[i] = tab[i]; } static void netvsc_send_vf(struct net_device_context *net_device_ctx, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8fa964e733ad..da216ca4f2b2 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -252,8 +252,8 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev, struct sock *sk = skb->sk; int q_idx; - q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) & - (VRSS_SEND_TAB_SIZE - 1)]; + q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) & + (VRSS_SEND_TAB_SIZE - 1)]; /* If queue index changed record the new value */ if (q_idx != old_idx && From 6b0cbe315868d613123cf387052ccda5f09d49ea Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 13 Oct 2017 12:28:05 -0700 Subject: [PATCH 0998/2177] hv_netvsc: Add initialization of tx_table in netvsc_device_add() tx_table is part of the private data of kernel net_device. It is only zero-ed out when allocating net_device. We may recreate netvsc_device w/o recreating net_device, so the private netdev data, including tx_table, are not zeroed. It may contain channel numbers for the older netvsc_device. This patch adds initialization of tx_table each time we recreate netvsc_device. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d34cf37e949d..5bb6a20072dd 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1255,6 +1255,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, if (!net_device) return ERR_PTR(-ENOMEM); + for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) + net_device_ctx->tx_table[i] = 0; + net_device->ring_size = ring_size; /* Because the device uses NAPI, all the interrupt batching and From 657875944726af8f881df7d431afbbe8f50ad4d7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 12:58:13 -0700 Subject: [PATCH 0999/2177] net_sched: fix a compile warning in act_ife MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently ife_meta_id2name() is only called when CONFIG_MODULES is defined. This fixes: net/sched/act_ife.c:251:20: warning: ‘ife_meta_id2name’ defined but not used [-Wunused-function] static const char *ife_meta_id2name(u32 metaid) ^~~~~~~~~~~~~~~~ Fixes: d3f24ba895f0 ("net sched actions: fix module auto-loading") Cc: Roman Mashak Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 252ee7d8c731..3007cb1310ea 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -248,6 +248,7 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) return ret; } +#ifdef CONFIG_MODULES static const char *ife_meta_id2name(u32 metaid) { switch (metaid) { @@ -261,6 +262,7 @@ static const char *ife_meta_id2name(u32 metaid) return "unknown"; } } +#endif /* called when adding new meta information * under ife->tcf_lock for existing action From e086101b150ae8e99e54ab26101ef3835fa9f48d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 13:03:16 -0700 Subject: [PATCH 1000/2177] tcp: add a tracepoint for tcp retransmission We need a real-time notification for tcp retransmission for monitoring. Of course we could use ftrace to dynamically instrument this kernel function too, however we can't retrieve the connection information at the same time, for example perf-tools [1] reads /proc/net/tcp for socket details, which is slow when we have a lots of connections. Therefore, this patch adds a tracepoint for __tcp_retransmit_skb() and exposes src/dst IP addresses and ports of the connection. This also makes it easier to integrate into perf. Note, I expose both IPv4 and IPv6 addresses at the same time: for a IPv4 socket, v4 mapped address is used as IPv6 addresses, for a IPv6 socket, LOOPBACK4_IPV6 is already filled by kernel. Also, add sk and skb pointers as they are useful for BPF. 1. https://github.com/brendangregg/perf-tools/blob/master/net/tcpretrans Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Hannes Frederic Sowa Cc: Brendan Gregg Cc: Neal Cardwell Signed-off-by: Cong Wang Acked-by: Alexei Starovoitov Acked-by: Brendan Gregg Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 68 ++++++++++++++++++++++++++++++++++++++ net/core/net-traces.c | 1 + net/ipv4/tcp_output.c | 3 ++ 3 files changed, 72 insertions(+) create mode 100644 include/trace/events/tcp.h diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h new file mode 100644 index 000000000000..3d1cbd072b7e --- /dev/null +++ b/include/trace/events/tcp.h @@ -0,0 +1,68 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tcp + +#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TCP_H + +#include +#include +#include +#include + +TRACE_EVENT(tcp_retransmit_skb, + + TP_PROTO(struct sock *sk, struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(void *, skbaddr) + __field(void *, skaddr) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + ), + + TP_fast_assign( + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *pin6; + __be32 *p32; + + __entry->skbaddr = skb; + __entry->skaddr = sk; + + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + + if (np) { + pin6 = (struct in6_addr *)__entry->saddr_v6; + *pin6 = np->saddr; + pin6 = (struct in6_addr *)__entry->daddr_v6; + *pin6 = *(np->daddr_cache); + } else { + pin6 = (struct in6_addr *)__entry->saddr_v6; + ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); + pin6 = (struct in6_addr *)__entry->daddr_v6; + ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); + } + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6 daddrv6=%pI6", + __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6) +); + +#endif /* _TRACE_TCP_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 1132820c8e62..f4e4fa2db505 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 696b0a168f16..6c74f2a39778 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -42,6 +42,8 @@ #include #include +#include + /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; @@ -2875,6 +2877,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } From dd6b9c2c332b40f142740d1b11fb77c653ff98ea Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 13 Oct 2017 13:40:24 -0700 Subject: [PATCH 1001/2177] macvlan: Only deliver one copy of the frame to the macvlan interface This patch intoduces a slight adjustment for macvlan to address the fact that in source mode I was seeing two copies of any packet addressed to the macvlan interface being delivered where there should have been only one. The issue appears to be that one copy was delivered based on the source MAC address and then the second copy was being delivered based on the destination MAC address. To fix it I am just treating a unicast address match as though it is not a match since source based macvlan isn't supposed to be matching based on the destination MAC anyway. Fixes: 79cf79abce71 ("macvlan: add source mode") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 858bd66511a2..a4ae8cd0f660 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -480,7 +480,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) struct macvlan_dev, list); else vlan = macvlan_hash_lookup(port, eth->h_dest); - if (vlan == NULL) + if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE) return RX_HANDLER_PASS; dev = vlan->dev; From c8c41ea1bd707eb032e248c55d640f32c950ed19 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 13 Oct 2017 13:40:31 -0700 Subject: [PATCH 1002/2177] macvlan: Only update pkt_type if destination MAC address matches This patch updates the pkt_type to PACKET_HOST only if the destination MAC address matches on the on the source based macvlan. It didn't make sense to be updating broadcast, multicast, and non-local destined frames with PACKET_HOST. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a4ae8cd0f660..1e1df54c5d31 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -413,7 +413,9 @@ static void macvlan_forward_source_one(struct sk_buff *skb, len = nskb->len + ETH_HLEN; nskb->dev = dev; - nskb->pkt_type = PACKET_HOST; + + if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, dev->dev_addr)) + nskb->pkt_type = PACKET_HOST; ret = netif_rx(nskb); macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false); From ae3c33c157565301544570ff3476280279d43d7f Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Fri, 13 Oct 2017 17:51:22 -0400 Subject: [PATCH 1003/2177] tc-testing: Add test cases for flushing actions Tests for flushing gact and mirred were missing. This patch adds test cases to explicitly test the flush of any installed gact/mirred actions. Signed-off-by: Lucas Bates Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/tests.json | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json index 6973bdc5b5bf..2ea00652f9ab 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json @@ -245,6 +245,27 @@ "$TC actions flush action gact" ] }, + { + "id": "3edf", + "name": "Flush gact actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + "$TC actions add action reclassify index 101", + "$TC actions add action reclassify index 102", + "$TC actions add action reclassify index 103", + "$TC actions add action reclassify index 104", + "$TC actions add action reclassify index 105" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify", + "matchCount": "0", + "teardown": [] + }, { "id": "63ec", "name": "Delete pass action", @@ -468,6 +489,32 @@ "$TC actions flush action mirred" ] }, + { + "id": "58c3", + "name": "Flush mirred actions", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress mirror index 1 dev lo", + "$TC actions add action mirred egress redirect index 2 dev lo" + ], + "cmdUnderTest": "$TC actions show action mirred", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "[Mirror|Redirect] to device lo", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, { "id": "d7c0", "name": "Add invalid mirred direction", @@ -1162,4 +1209,4 @@ "$TC actions flush action ife" ] } -] \ No newline at end of file +] From 0923edf456fbd6c75031ad48ad14897a6b912954 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Fri, 13 Oct 2017 17:51:23 -0400 Subject: [PATCH 1004/2177] tc-testing: Split test case files into smaller chunks The original submission had the test cases stored in one monolithic file. This can be unwieldy to edit, especially as more test cases are added. This patch removes the original tests.json file in favour of individual ones broken down by category. Signed-off-by: Lucas Bates Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/gact.json | 469 +++++++ .../tc-testing/tc-tests/actions/ife.json | 52 + .../tc-testing/tc-tests/actions/mirred.json | 223 +++ .../tc-testing/tc-tests/actions/simple.json | 130 ++ .../tc-testing/tc-tests/actions/skbedit.json | 320 +++++ .../tc-testing/tc-tests/actions/tests.json | 1212 ----------------- 6 files changed, 1194 insertions(+), 1212 deletions(-) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/gact.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/ife.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/simple.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/tests.json diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json new file mode 100644 index 000000000000..e2187b6e0b7a --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -0,0 +1,469 @@ +[ + { + "id": "e89a", + "name": "Add valid pass action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pass index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "a02c", + "name": "Add valid pipe action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pipe index 6", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "feef", + "name": "Add valid reclassify action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action reclassify index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "8a7a", + "name": "Add valid drop action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 30", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "9a52", + "name": "Add valid continue action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action continue index 432", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "d700", + "name": "Add invalid action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pump index 386", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action.*index 386 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "9215", + "name": "Add action with duplicate index", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pipe index 15" + ], + "cmdUnderTest": "$TC actions add action drop index 15", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "798e", + "name": "Add action with index exceeding 32-bit maximum", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 4294967296", + "expExitCode": "255", + "verifyCmd": "actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "22be", + "name": "Add action with index at 32-bit maximum", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "ac2a", + "name": "List actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 101", + "$TC actions add action reclassify index 102", + "$TC actions add action reclassify index 103", + "$TC actions add action reclassify index 104", + "$TC actions add action reclassify index 105" + ], + "cmdUnderTest": "$TC actions list action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify", + "matchCount": "5", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "3edf", + "name": "Flush gact actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + "$TC actions add action reclassify index 101", + "$TC actions add action reclassify index 102", + "$TC actions add action reclassify index 103", + "$TC actions add action reclassify index 104", + "$TC actions add action reclassify index 105" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify", + "matchCount": "0", + "teardown": [] + }, + { + "id": "63ec", + "name": "Delete pass action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pass index 1" + ], + "cmdUnderTest": "$TC actions del action gact index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "46be", + "name": "Delete pipe action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pipe index 9" + ], + "cmdUnderTest": "$TC actions del action gact index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "2e08", + "name": "Delete reclassify action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 65536" + ], + "cmdUnderTest": "$TC actions del action gact index 65536", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "99c4", + "name": "Delete drop action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 16" + ], + "cmdUnderTest": "$TC actions del action gact index 16", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "fb6b", + "name": "Delete continue action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action continue index 32" + ], + "cmdUnderTest": "$TC actions del action gact index 32", + "expExitCode": "0", + "verifyCmd": "actions list action gact", + "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "0eb3", + "name": "Delete non-existent action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions del action gact index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "f02c", + "name": "Replace gact action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 10", + "$TC actions add action drop index 12" + ], + "cmdUnderTest": "$TC actions replace action ok index 12", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "action order [0-9]*: gact action pass", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "525f", + "name": "Get gact action by index", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 3900800700" + ], + "cmdUnderTest": "$TC actions get action gact index 3900800700", + "expExitCode": "0", + "verifyCmd": "$TC actions get action gact index 3900800700", + "matchPattern": "index 3900800700", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json new file mode 100644 index 000000000000..9f34f0753969 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -0,0 +1,52 @@ +[ + { + "id": "a568", + "name": "Add action with ife type", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode type 0xDEAD index 1" + ], + "cmdUnderTest": "$TC actions get action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "type 0xDEAD", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "b983", + "name": "Add action without ife type", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode index 1" + ], + "cmdUnderTest": "$TC actions get action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "type 0xED3E", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json new file mode 100644 index 000000000000..0fcccf18399b --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -0,0 +1,223 @@ +[ + { + "id": "5124", + "name": "Add mirred mirror to egress action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "6fb4", + "name": "Add mirred redirect to egress action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "ba38", + "name": "Get mirred actions", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress mirror index 1 dev lo", + "$TC actions add action mirred egress redirect index 2 dev lo" + ], + "cmdUnderTest": "$TC actions show action mirred", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "[Mirror|Redirect] to device lo", + "matchCount": "2", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "d7c0", + "name": "Add invalid mirred direction", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "e213", + "name": "Add invalid mirred action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "2d89", + "name": "Add mirred action with invalid device", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "300b", + "name": "Add mirred action with duplicate index", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress redirect index 15 dev lo" + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "a70e", + "name": "Delete mirred mirror action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress mirror index 5 dev lo" + ], + "cmdUnderTest": "$TC actions del action mirred index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "3fb3", + "name": "Delete mirred redirect action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress redirect index 5 dev lo" + ], + "cmdUnderTest": "$TC actions del action mirred index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json new file mode 100644 index 000000000000..e89a7aa4012d --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -0,0 +1,130 @@ +[ + { + "id": "b078", + "name": "Add simple action", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple .*index 60 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "6d4c", + "name": "Add simple action with duplicate index", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Aruba\" index 4" + ], + "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple .*ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "2542", + "name": "List simple actions", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Rock\"", + "$TC actions add action simple sdata \"Paper\"", + "$TC actions add action simple sdata \"Scissors\" index 98" + ], + "cmdUnderTest": "$TC actions list action simple", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", + "matchCount": "3", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "ea67", + "name": "Delete simple action", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Blinkenlights\" index 1" + ], + "cmdUnderTest": "$TC actions delete action simple index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple .*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "8ff1", + "name": "Flush simple actions", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Kirk\"", + "$TC actions add action simple sdata \"Spock\" index 50", + "$TC actions add action simple sdata \"McCoy\" index 9" + ], + "cmdUnderTest": "$TC actions flush action simple", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", + "matchCount": "0", + "teardown": [ + "" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json new file mode 100644 index 000000000000..99635ea4722e --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -0,0 +1,320 @@ +[ + { + "id": "6236", + "name": "Add skbedit action with valid mark", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "407b", + "name": "Add skbedit action with invalid mark", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 666777888999", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "081d", + "name": "Add skbedit action with priority", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit prio 99", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit priority :99", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "cc37", + "name": "Add skbedit action with invalid priority", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit prio foo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit priority", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "3c95", + "name": "Add skbedit action with queue_mapping", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit queue_mapping 909", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "985c", + "name": "Add skbedit action with invalid queue_mapping", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit queue_mapping", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "224f", + "name": "Add skbedit action with ptype host", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype host", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype host", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "d1a3", + "name": "Add skbedit action with ptype otherhost", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype otherhost", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype otherhost", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "b9c6", + "name": "Add skbedit action with invalid ptype", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype openair", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype openair", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "5172", + "name": "List skbedit actions", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit ptype otherhost", + "$TC actions add action skbedit ptype broadcast", + "$TC actions add action skbedit mark 59", + "$TC actions add action skbedit mark 409" + ], + "cmdUnderTest": "$TC actions list action skbedit", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit", + "matchCount": "4", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "a6d6", + "name": "Add skbedit action with index", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "index 4040404040", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "38f3", + "name": "Delete skbedit action", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit mark 42 index 9009" + ], + "cmdUnderTest": "$TC actions del action skbedit index 9009", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 42", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ce97", + "name": "Flush skbedit actions", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + "$TC actions add action skbedit mark 500", + "$TC actions add action skbedit mark 501", + "$TC actions add action skbedit mark 502", + "$TC actions add action skbedit mark 503", + "$TC actions add action skbedit mark 504", + "$TC actions add action skbedit mark 505", + "$TC actions add action skbedit mark 506" + ], + "cmdUnderTest": "$TC actions flush action skbedit", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json deleted file mode 100644 index 2ea00652f9ab..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json +++ /dev/null @@ -1,1212 +0,0 @@ -[ - { - "id": "e89a", - "name": "Add valid pass action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action pass index 8", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "a02c", - "name": "Add valid pipe action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action pipe index 6", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "feef", - "name": "Add valid reclassify action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action reclassify index 5", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "8a7a", - "name": "Add valid drop action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action drop index 30", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "9a52", - "name": "Add valid continue action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action continue index 432", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "d700", - "name": "Add invalid action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action pump index 386", - "expExitCode": "255", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action.*index 386 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "9215", - "name": "Add action with duplicate index", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action pipe index 15" - ], - "cmdUnderTest": "$TC actions add action drop index 15", - "expExitCode": "255", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "798e", - "name": "Add action with index exceeding 32-bit maximum", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action drop index 4294967296", - "expExitCode": "255", - "verifyCmd": "actions list action gact", - "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "22be", - "name": "Add action with index at 32-bit maximum", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action drop index 4294967295", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "ac2a", - "name": "List actions", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action reclassify index 101", - "$TC actions add action reclassify index 102", - "$TC actions add action reclassify index 103", - "$TC actions add action reclassify index 104", - "$TC actions add action reclassify index 105" - ], - "cmdUnderTest": "$TC actions list action gact", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action reclassify", - "matchCount": "5", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "3edf", - "name": "Flush gact actions", - "category": [ - "actions", - "gact" - ], - "setup": [ - "$TC actions add action reclassify index 101", - "$TC actions add action reclassify index 102", - "$TC actions add action reclassify index 103", - "$TC actions add action reclassify index 104", - "$TC actions add action reclassify index 105" - ], - "cmdUnderTest": "$TC actions flush action gact", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action reclassify", - "matchCount": "0", - "teardown": [] - }, - { - "id": "63ec", - "name": "Delete pass action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action pass index 1" - ], - "cmdUnderTest": "$TC actions del action gact index 1", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "46be", - "name": "Delete pipe action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action pipe index 9" - ], - "cmdUnderTest": "$TC actions del action gact index 9", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "2e08", - "name": "Delete reclassify action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action reclassify index 65536" - ], - "cmdUnderTest": "$TC actions del action gact index 65536", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "99c4", - "name": "Delete drop action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action drop index 16" - ], - "cmdUnderTest": "$TC actions del action gact index 16", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "fb6b", - "name": "Delete continue action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action continue index 32" - ], - "cmdUnderTest": "$TC actions del action gact index 32", - "expExitCode": "0", - "verifyCmd": "actions list action gact", - "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "0eb3", - "name": "Delete non-existent action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions del action gact index 2", - "expExitCode": "255", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order [0-9]*: gact action", - "matchCount": "0", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "5124", - "name": "Add mirred mirror to egress action", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "6fb4", - "name": "Add mirred redirect to egress action", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "ba38", - "name": "Get mirred actions", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ], - "$TC actions add action mirred egress mirror index 1 dev lo", - "$TC actions add action mirred egress redirect index 2 dev lo" - ], - "cmdUnderTest": "$TC actions show action mirred", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "[Mirror|Redirect] to device lo", - "matchCount": "2", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "58c3", - "name": "Flush mirred actions", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ], - "$TC actions add action mirred egress mirror index 1 dev lo", - "$TC actions add action mirred egress redirect index 2 dev lo" - ], - "cmdUnderTest": "$TC actions show action mirred", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "[Mirror|Redirect] to device lo", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "d7c0", - "name": "Add invalid mirred direction", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo", - "expExitCode": "255", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "e213", - "name": "Add invalid mirred action", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo", - "expExitCode": "255", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "2d89", - "name": "Add mirred action with invalid device", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh", - "expExitCode": "255", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "300b", - "name": "Add mirred action with duplicate index", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ], - "$TC actions add action mirred egress redirect index 15 dev lo" - ], - "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo", - "expExitCode": "255", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "a70e", - "name": "Delete mirred mirror action", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ], - "$TC actions add action mirred egress mirror index 5 dev lo" - ], - "cmdUnderTest": "$TC actions del action mirred index 5", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "3fb3", - "name": "Delete mirred redirect action", - "category": [ - "actions", - "mirred" - ], - "setup": [ - [ - "$TC actions flush action mirred", - 0, - 1, - 255 - ], - "$TC actions add action mirred egress redirect index 5 dev lo" - ], - "cmdUnderTest": "$TC actions del action mirred index 5", - "expExitCode": "0", - "verifyCmd": "$TC actions list action mirred", - "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action mirred" - ] - }, - { - "id": "b078", - "name": "Add simple action", - "category": [ - "actions", - "simple" - ], - "setup": [ - [ - "$TC actions flush action simple", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60", - "expExitCode": "0", - "verifyCmd": "$TC actions list action simple", - "matchPattern": "action order [0-9]*: Simple .*index 60 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action simple" - ] - }, - { - "id": "6d4c", - "name": "Add simple action with duplicate index", - "category": [ - "actions", - "simple" - ], - "setup": [ - [ - "$TC actions flush action simple", - 0, - 1, - 255 - ], - "$TC actions add action simple sdata \"Aruba\" index 4" - ], - "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4", - "expExitCode": "255", - "verifyCmd": "$TC actions list action simple", - "matchPattern": "action order [0-9]*: Simple .*ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action simple" - ] - }, - { - "id": "2542", - "name": "List simple actions", - "category": [ - "actions", - "simple" - ], - "setup": [ - [ - "$TC actions flush action simple", - 0, - 1, - 255 - ], - "$TC actions add action simple sdata \"Rock\"", - "$TC actions add action simple sdata \"Paper\"", - "$TC actions add action simple sdata \"Scissors\" index 98" - ], - "cmdUnderTest": "$TC actions list action simple", - "expExitCode": "0", - "verifyCmd": "$TC actions list action simple", - "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", - "matchCount": "3", - "teardown": [ - "$TC actions flush action simple" - ] - }, - { - "id": "ea67", - "name": "Delete simple action", - "category": [ - "actions", - "simple" - ], - "setup": [ - [ - "$TC actions flush action simple", - 0, - 1, - 255 - ], - "$TC actions add action simple sdata \"Blinkenlights\" index 1" - ], - "cmdUnderTest": "$TC actions delete action simple index 1", - "expExitCode": "0", - "verifyCmd": "$TC actions list action simple", - "matchPattern": "action order [0-9]*: Simple .*index 1 ref", - "matchCount": "0", - "teardown": [ - "$TC actions flush action simple" - ] - }, - { - "id": "8ff1", - "name": "Flush simple actions", - "category": [ - "actions", - "simple" - ], - "setup": [ - [ - "$TC actions flush action simple", - 0, - 1, - 255 - ], - "$TC actions add action simple sdata \"Kirk\"", - "$TC actions add action simple sdata \"Spock\" index 50", - "$TC actions add action simple sdata \"McCoy\" index 9" - ], - "cmdUnderTest": "$TC actions flush action simple", - "expExitCode": "0", - "verifyCmd": "$TC actions list action simple", - "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", - "matchCount": "0", - "teardown": [ - "" - ] - }, - { - "id": "6236", - "name": "Add skbedit action with valid mark", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit mark 1", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit mark 1", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "407b", - "name": "Add skbedit action with invalid mark", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit mark 666777888999", - "expExitCode": "255", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit mark", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "081d", - "name": "Add skbedit action with priority", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit prio 99", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit priority :99", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "cc37", - "name": "Add skbedit action with invalid priority", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit prio foo", - "expExitCode": "255", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit priority", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "3c95", - "name": "Add skbedit action with queue_mapping", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit queue_mapping 909", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "985c", - "name": "Add skbedit action with invalid queue_mapping", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000", - "expExitCode": "255", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit queue_mapping", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "224f", - "name": "Add skbedit action with ptype host", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit ptype host", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit ptype host", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "d1a3", - "name": "Add skbedit action with ptype otherhost", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit ptype otherhost", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit ptype otherhost", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "b9c6", - "name": "Add skbedit action with invalid ptype", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit ptype openair", - "expExitCode": "255", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit ptype openair", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "5172", - "name": "List skbedit actions", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ], - "$TC actions add action skbedit ptype otherhost", - "$TC actions add action skbedit ptype broadcast", - "$TC actions add action skbedit mark 59", - "$TC actions add action skbedit mark 409" - ], - "cmdUnderTest": "$TC actions list action skbedit", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit", - "matchCount": "4", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "a6d6", - "name": "Add skbedit action with index", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "index 4040404040", - "matchCount": "1", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "38f3", - "name": "Delete skbedit action", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - [ - "$TC actions flush action skbedit", - 0, - 1, - 255 - ], - "$TC actions add action skbedit mark 42 index 9009" - ], - "cmdUnderTest": "$TC actions del action skbedit index 9009", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit mark 42", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "ce97", - "name": "Flush skbedit actions", - "category": [ - "actions", - "skbedit" - ], - "setup": [ - "$TC actions add action skbedit mark 500", - "$TC actions add action skbedit mark 501", - "$TC actions add action skbedit mark 502", - "$TC actions add action skbedit mark 503", - "$TC actions add action skbedit mark 504", - "$TC actions add action skbedit mark 505", - "$TC actions add action skbedit mark 506" - ], - "cmdUnderTest": "$TC actions flush action skbedit", - "expExitCode": "0", - "verifyCmd": "$TC actions list action skbedit", - "matchPattern": "action order [0-9]*: skbedit", - "matchCount": "0", - "teardown": [ - "$TC actions flush action skbedit" - ] - }, - { - "id": "f02c", - "name": "Replace gact action", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action drop index 10", - "$TC actions add action drop index 12" - ], - "cmdUnderTest": "$TC actions replace action ok index 12", - "expExitCode": "0", - "verifyCmd": "$TC actions ls action gact", - "matchPattern": "action order [0-9]*: gact action pass", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "525f", - "name": "Get gact action by index", - "category": [ - "actions", - "gact" - ], - "setup": [ - [ - "$TC actions flush action gact", - 0, - 1, - 255 - ], - "$TC actions add action drop index 3900800700" - ], - "cmdUnderTest": "$TC actions get action gact index 3900800700", - "expExitCode": "0", - "verifyCmd": "$TC actions get action gact index 3900800700", - "matchPattern": "index 3900800700", - "matchCount": "1", - "teardown": [ - "$TC actions flush action gact" - ] - }, - { - "id": "a568", - "name": "Add action with ife type", - "category": [ - "actions", - "ife" - ], - "setup": [ - [ - "$TC actions flush action ife", - 0, - 1, - 255 - ], - "$TC actions add action ife encode type 0xDEAD index 1" - ], - "cmdUnderTest": "$TC actions get action ife index 1", - "expExitCode": "0", - "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xDEAD", - "matchCount": "1", - "teardown": [ - "$TC actions flush action ife" - ] - }, - { - "id": "b983", - "name": "Add action without ife type", - "category": [ - "actions", - "ife" - ], - "setup": [ - [ - "$TC actions flush action ife", - 0, - 1, - 255 - ], - "$TC actions add action ife encode index 1" - ], - "cmdUnderTest": "$TC actions get action ife index 1", - "expExitCode": "0", - "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xED3E", - "matchCount": "1", - "teardown": [ - "$TC actions flush action ife" - ] - } -] From cf797ac49b94df7247cf86e801ab3882d08b786d Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Fri, 13 Oct 2017 17:51:24 -0400 Subject: [PATCH 1005/2177] tc-testing: Add test cases for police and skbmod Add basic unit tests for police and skbmod actions in tc. Signed-off-by: Lucas Bates Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/police.json | 527 ++++++++++++++++++ .../tc-testing/tc-tests/actions/skbmod.json | 372 +++++++++++++ 2 files changed, 899 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/police.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json new file mode 100644 index 000000000000..0e602a3f9393 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -0,0 +1,527 @@ +[ + { + "id": "49aa", + "name": "Add valid basic police action", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 1Kbit burst 10Kb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "3abe", + "name": "Add police action with duplicate index", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 4Mbit burst 120k index 9" + ], + "cmdUnderTest": "$TC actions add action police rate 8kbit burst 24k index 9", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "49fa", + "name": "Add valid police action with mtu", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 1k index 98", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 98", + "matchPattern": "action order [0-9]*: police 0x62 rate 90Kbit burst 10Kb mtu 1Kb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "7943", + "name": "Add valid police action with peakrate", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100kbit index 3", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x3 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Kbit", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "055e", + "name": "Add police action with peakrate and no mtu", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 5kbit burst 6kb peakrate 10kbit index 9", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x9 rate 5Kb burst 10Kb", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "f057", + "name": "Add police action with valid overhead", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1mbit burst 100k overhead 64 index 64", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 64", + "matchPattern": "action order [0-9]*: police 0x40 rate 1Mbit burst 100Kb mtu 2Kb action reclassify overhead 64b", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "7ffb", + "name": "Add police action with ethernet linklayer type", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer ethernet index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions show action police", + "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "3dda", + "name": "Add police action with atm linklayer type", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer atm index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions show action police", + "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "551b", + "name": "Add police actions with conform-exceed control continue/drop", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed continue/drop index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 1", + "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action continue/drop", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "0c70", + "name": "Add police actions with conform-exceed control pass/reclassify", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/reclassify index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x4 rate 3Mbit burst 250Kb mtu 2Kb action pass/reclassify", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "d946", + "name": "Add police actions with conform-exceed control pass/pipe", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/pipe index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x5 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "336e", + "name": "Delete police action", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 5mbit burst 2m index 12" + ], + "cmdUnderTest": "$TC actions delete action police index 12", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0xc rate 5Mb burst 2Mb", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "77fa", + "name": "Get single police action from many actions", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 1mbit burst 100k index 1", + "$TC actions add action police rate 2mbit burst 200k index 2", + "$TC actions add action police rate 3mbit burst 300k index 3", + "$TC actions add action police rate 4mbit burst 400k index 4", + "$TC actions add action police rate 5mbit burst 500k index 5", + "$TC actions add action police rate 6mbit burst 600k index 6", + "$TC actions add action police rate 7mbit burst 700k index 7", + "$TC actions add action police rate 8mbit burst 800k index 8" + ], + "cmdUnderTest": "$TC actions get action police index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 4", + "matchPattern": "action order [0-9]*: police 0x4 rate 4Mbit burst 400Kb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "aa43", + "name": "Get single police action without specifying index", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 1mbit burst 100k index 1" + ], + "cmdUnderTest": "$TC actions get action police", + "expExitCode": "255", + "verifyCmd": "$TC actions get action police", + "matchPattern": "action order [0-9]*: police", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "858b", + "name": "List police actions", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 1mbit burst 100k index 1", + "$TC actions add action police rate 2mbit burst 200k index 2", + "$TC actions add action police rate 3mbit burst 300k index 3", + "$TC actions add action police rate 4mbit burst 400k index 4", + "$TC actions add action police rate 5mbit burst 500k index 5", + "$TC actions add action police rate 6mbit burst 600k index 6", + "$TC actions add action police rate 7mbit burst 700k index 7", + "$TC actions add action police rate 8mbit burst 800k index 8" + ], + "cmdUnderTest": "$TC actions list action police", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x[1-8] rate [1-8]Mbit burst [1-8]00Kb", + "matchCount": "8", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "1c3a", + "name": "Flush police actions", + "category": [ + "actions", + "police" + ], + "setup": [ + "$TC actions add action police rate 1mbit burst 100k index 1", + "$TC actions add action police rate 2mbit burst 200k index 2", + "$TC actions add action police rate 3mbit burst 300k index 3", + "$TC actions add action police rate 4mbit burst 400k index 4", + "$TC actions add action police rate 5mbit burst 500k index 5", + "$TC actions add action police rate 6mbit burst 600k index 6", + "$TC actions add action police rate 7mbit burst 700k index 7", + "$TC actions add action police rate 8mbit burst 800k index 8" + ], + "cmdUnderTest": "$TC actions flush action police", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police", + "matchCount": "0", + "teardown": [ + "" + ] + }, + { + "id": "7326", + "name": "Add police action with control continue", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 1", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "34fa", + "name": "Add police action with control drop", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "8dd5", + "name": "Add police action with control ok", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "b9d1", + "name": "Add police action with control reclassify", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 1", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "c534", + "name": "Add police action with control pipe", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json new file mode 100644 index 000000000000..e34075059c26 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -0,0 +1,372 @@ +[ + { + "id": "7d50", + "name": "Add skbmod action to set destination mac", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe set dmac 11:22:33:44:55:66\\s+index 5", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "9b29", + "name": "Add skbmod action to set source mac", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set smac 77:88:99:AA:BB:CC index 7", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 7", + "matchPattern": "action order [0-9]*: skbmod pipe set smac 77:88:99:aa:bb:cc\\s+index 7", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "1724", + "name": "Add skbmod action with invalid mac", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set smac 00:44:55:44:55", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe set smac 00:44:55:44:55", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "3cf1", + "name": "Add skbmod action with valid etype", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefe", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFE", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "a749", + "name": "Add skbmod action with invalid etype", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefef", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFEF", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "bfe6", + "name": "Add skbmod action to swap mac", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod swap mac", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 1", + "matchPattern": "action order [0-9]*: skbmod pipe swap mac", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "839b", + "name": "Add skbmod action with control pipe", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod swap mac pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe swap mac", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "c167", + "name": "Add skbmod action with control reclassify", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set etype 0xbeef reclassify", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod reclassify set etype 0xBEEF", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "0c2f", + "name": "Add skbmod action with control drop", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set etype 0x0001 drop", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 1", + "matchPattern": "action order [0-9]*: skbmod drop set etype 0x1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "d113", + "name": "Add skbmod action with control continue", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set etype 0x1 continue", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod continue set etype 0x1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "7242", + "name": "Add skbmod action with control pass", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set smac 00:00:00:00:00:01 pass", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pass set smac 00:00:00:00:00:01", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "58cb", + "name": "List skbmod actions", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x0001", + "$TC actions add action skbmod set etype 0x0011", + "$TC actions add action skbmod set etype 0x0021", + "$TC actions add action skbmod set etype 0x0031", + "$TC actions add action skbmod set etype 0x0041" + ], + "cmdUnderTest": "$TC actions ls action skbmod", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod", + "matchCount": "5", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "9aa8", + "name": "Get a single skbmod action from a list", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x0001", + "$TC actions add action skbmod set etype 0x0011", + "$TC actions add action skbmod set etype 0x0021", + "$TC actions add action skbmod set etype 0x0031", + "$TC actions add action skbmod set etype 0x0041" + ], + "cmdUnderTest": "$TC actions ls action skbmod", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 4", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "e93a", + "name": "Delete an skbmod action", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x1111 index 909" + ], + "cmdUnderTest": "$TC actions del action skbmod index 909", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x1111\\s+index 909", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { + "id": "40c2", + "name": "Flush skbmod actions", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + "$TC actions add action skbmod set etype 0x0001", + "$TC actions add action skbmod set etype 0x0011", + "$TC actions add action skbmod set etype 0x0021", + "$TC actions add action skbmod set etype 0x0031", + "$TC actions add action skbmod set etype 0x0041" + ], + "cmdUnderTest": "$TC actions flush action skbmod", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbmod" + ] + } +] From 7f6661a78a50c3bae8c9f71f67487abbea64dbb3 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Fri, 13 Oct 2017 17:51:25 -0400 Subject: [PATCH 1006/2177] tc-testing: fix the -l argument bug in tdc.py This patch fixes a bug in the tdc script, where executing tdc with the -l argument would cause the tests to start running as opposed to listing all the known test cases. Signed-off-by: Lucas Bates Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index cd61b7844c0d..d2391df2cf7d 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -49,7 +49,7 @@ def exec_cmd(command, nsonly=True): stderr=subprocess.PIPE) (rawout, serr) = proc.communicate() - if proc.returncode != 0: + if proc.returncode != 0 and len(serr) > 0: foutput = serr.decode("utf-8") else: foutput = rawout.decode("utf-8") @@ -203,7 +203,7 @@ def set_args(parser): help='Run tests only from the specified category, or if no category is specified, list known categories.') parser.add_argument('-f', '--file', type=str, help='Run tests from the specified file') - parser.add_argument('-l', '--list', type=str, nargs='?', const="", metavar='CATEGORY', + parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY', help='List all test cases, or those only within the specified category') parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID', help='Display the test case with specified id') @@ -357,10 +357,10 @@ def set_operation_mode(args): testcases = get_categorized_testlist(alltests, ucat) if args.list: - if (len(args.list) == 0): + if (args.list == "++"): list_test_cases(alltests) exit(0) - elif(len(args.list > 0)): + elif(len(args.list) > 0): if (args.list not in ucat): print("Unknown category " + args.list) print("Available categories:") From 32302902ff093891d8e64439cbb8ceae83e21ef8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 12 Oct 2017 11:38:45 -0700 Subject: [PATCH 1007/2177] mqprio: Reserve last 32 classid values for HW traffic classes and misc IDs This patch makes a slight tweak to mqprio in order to bring the classid values used back in line with what is used for mq. The general idea is to reserve values :ffe0 - :ffef to identify hardware traffic classes normally reported via dev->num_tc. By doing this we can maintain a consistent behavior with mq for classid where :1 - :ffdf will represent a physical qdisc mapped onto a Tx queue represented by classid - 1, and the traffic classes will be mapped onto a known subset of classid values reserved for our virtual qdiscs. Note I reserved the range from :fff0 - :ffff since this way we might be able to reuse these classid values with clsact and ingress which would mean that for mq, mqprio, ingress, and clsact we should be able to maintain a similar classid layout. Signed-off-by: Alexander Duyck Tested-by: Jesus Sanchez-Palencia Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_mqprio.c | 79 ++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index e95b5c9b9fad..e7cc3d3c7421 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -74,6 +74,7 @@ struct tc_estimator { #define TC_H_INGRESS (0xFFFFFFF1U) #define TC_H_CLSACT TC_H_INGRESS +#define TC_H_MIN_PRIORITY 0xFFE0U #define TC_H_MIN_INGRESS 0xFFF2U #define TC_H_MIN_EGRESS 0xFFF3U diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index f1ae9be83934..cae91b4b08a6 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -153,6 +153,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; + /* make certain can allocate enough classids to handle queues */ + if (dev->num_tx_queues >= TC_H_MIN_PRIORITY) + return -ENOMEM; + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -305,7 +309,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); - unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); + unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; @@ -447,38 +451,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) struct net_device *dev = qdisc_dev(sch); unsigned int ntx = TC_H_MIN(classid); - if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) - return 0; - return ntx; + /* There are essentially two regions here that have valid classid + * values. The first region will have a classid value of 1 through + * num_tx_queues. All of these are backed by actual Qdiscs. + */ + if (ntx < TC_H_MIN_PRIORITY) + return (ntx <= dev->num_tx_queues) ? ntx : 0; + + /* The second region represents the hardware traffic classes. These + * are represented by classid values of TC_H_MIN_PRIORITY through + * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1 + */ + return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0; } static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct net_device *dev = qdisc_dev(sch); + if (cl < TC_H_MIN_PRIORITY) { + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + int tc = netdev_txq_to_tc(dev, cl - 1); - if (cl <= netdev_get_num_tc(dev)) { + tcm->tcm_parent = (tc < 0) ? 0 : + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(tc + TC_H_MIN_PRIORITY)); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; - } else { - int i; - struct netdev_queue *dev_queue; - - dev_queue = mqprio_queue_get(sch, cl); - tcm->tcm_parent = 0; - for (i = 0; i < netdev_get_num_tc(dev); i++) { - struct netdev_tc_txq tc = dev->tc_to_txq[i]; - int q_idx = cl - netdev_get_num_tc(dev); - - if (q_idx > tc.offset && - q_idx <= tc.offset + tc.count) { - tcm->tcm_parent = - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)); - break; - } - } - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; } tcm->tcm_handle |= TC_H_MIN(cl); return 0; @@ -489,15 +490,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct net_device *dev = qdisc_dev(sch); - - if (cl <= netdev_get_num_tc(dev)) { + if (cl >= TC_H_MIN_PRIORITY) { int i; __u32 qlen = 0; struct Qdisc *qdisc; struct gnet_stats_queue qstats = {0}; struct gnet_stats_basic_packed bstats = {0}; - struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; + struct net_device *dev = qdisc_dev(sch); + struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we @@ -550,12 +550,25 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; - for (ntx = arg->skip; - ntx < dev->num_tx_queues + netdev_get_num_tc(dev); - ntx++) { + for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { + if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + + /* Pad the values and skip over unused traffic classes */ + if (ntx < TC_MAX_QUEUE) { + arg->count = TC_MAX_QUEUE; + ntx = TC_MAX_QUEUE; + } + + /* Reset offset, sort out remaining per-queue qdiscs */ + for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) { if (arg->fn(sch, ntx + 1, arg) < 0) { arg->stop = 1; - break; + return; } arg->count++; } From 69d78ef25c7b0058674145500efb12255738ba8a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:57 +0200 Subject: [PATCH 1008/2177] net: sched: store Qdisc pointer in struct block Prepare for removal of tp->q and store Qdisc pointer in the block structure. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- include/net/sch_generic.h | 1 + net/sched/cls_api.c | 3 ++- net/sched/sch_atm.c | 4 ++-- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_hfsc.c | 4 ++-- net/sched/sch_htb.c | 4 ++-- net/sched/sch_ingress.c | 6 +++--- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- 16 files changed, 23 insertions(+), 21 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 60d39789e4f0..e6c9e1e4d711 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -22,7 +22,7 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain); + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); void tcf_block_put(struct tcf_block *block); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -30,7 +30,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, #else static inline int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) { return 0; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 684d8ed27eaa..df4032ca1b7f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -270,6 +270,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct Qdisc *q; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2977b8a90851..f7d3f1f539b7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -241,7 +241,7 @@ tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, } int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); struct tcf_chain *chain; @@ -257,6 +257,7 @@ int tcf_block_get(struct tcf_block **p_block, goto err_chain_create; } tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + block->q = q; *p_block = block; return 0; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index c5fcdf1a58a0..2dbd249c0b2f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -281,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } - error = tcf_block_get(&flow->block, &flow->filter_list); + error = tcf_block_get(&flow->block, &flow->filter_list, sch); if (error) { kfree(flow); goto err_out; @@ -546,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - err = tcf_block_get(&p->link.block, &p->link.filter_list); + err = tcf_block_get(&p->link.block, &p->link.filter_list, sch); if (err) return err; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index dcef97fa8047..c3b92d62190e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1566,7 +1566,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 2d0e8d4bdc29..753dc7a77b60 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -412,7 +412,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct drr_sched *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; err = qdisc_class_hash_init(&q->clhash); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2836c80c7aa5..fb4fb71c68cf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = tcf_block_get(&p->block, &p->filter_list); + err = tcf_block_get(&p->block, &p->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index de3b57ceca7b..3c40edeff1e8 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -481,7 +481,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) return err; } - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3f88b75488b0..a692184bd333 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1033,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; @@ -1405,7 +1405,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; - err = tcf_block_get(&q->root.block, &q->root.filter_list); + err = tcf_block_get(&q->root.block, &q->root.filter_list, sch); if (err) return err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c6d7ae81b41f..57be73c0e1d2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1030,7 +1030,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -1393,7 +1393,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); goto failure; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 44de4ee51ce9..9ccc1b89b0d9 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -59,7 +59,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->block, &dev->ingress_cl_list); + err = tcf_block_get(&q->block, &dev->ingress_cl_list, sch); if (err) return err; @@ -153,11 +153,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); + err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list, sch); if (err) return err; - err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); + err = tcf_block_get(&q->egress_block, &dev->egress_cl_list, sch); if (err) return err; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index ff4fc3e0facd..31e0a284eeff 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -245,7 +245,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2dd6c68ae91e..95fad348c8d7 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -212,7 +212,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6ddfd4991108..8694c7b6d2b1 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1419,7 +1419,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index cc39e170b4aa..487d375f5a06 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -553,7 +553,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt) struct sfb_sched_data *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 74ea863b8240..123a53af2506 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -725,7 +725,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, (unsigned long)sch); - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; From 855319becbcffec6988a4e781a861b69a71c5b58 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:58 +0200 Subject: [PATCH 1009/2177] net: sched: store net pointer in block and introduce qdisc_net helper Store net pointer in the block structure. Along the way, introduce qdisc_net helper which allows to easily obtain net pointer for qdisc instance. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 7 +++++++ include/net/sch_generic.h | 1 + net/sched/cls_api.c | 1 + 3 files changed, 9 insertions(+) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 259bc191ba59..2d234af15f3e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #define DEFAULT_TX_QUEUE_LEN 1000 @@ -146,4 +148,9 @@ static inline bool is_classid_clsact_egress(u32 classid) TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); } +static inline struct net *qdisc_net(struct Qdisc *q) +{ + return dev_net(q->dev_queue->dev); +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index df4032ca1b7f..9b2cb91dc0d9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -270,6 +270,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct net *net; struct Qdisc *q; }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f7d3f1f539b7..856003caa3bb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -257,6 +257,7 @@ int tcf_block_get(struct tcf_block **p_block, goto err_chain_create; } tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + block->net = qdisc_net(q); block->q = q; *p_block = block; return 0; From 44186460c85a0121562db7cfef132d63c869118f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:00:59 +0200 Subject: [PATCH 1010/2177] net: sched: introduce tcf_block_q and tcf_block_dev helpers These helpers allows to get a q and netdev pointers for given block easily. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e6c9e1e4d711..7bed674ba29a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -24,6 +24,17 @@ void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); void tcf_block_put(struct tcf_block *block); + +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + return block->q; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return tcf_block_q(block)->dev_queue->dev; +} + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -39,6 +50,16 @@ static inline void tcf_block_put(struct tcf_block *block) { } +static inline struct Qdisc *tcf_block_q(struct tcf_block *block) +{ + return NULL; +} + +static inline struct net_device *tcf_block_dev(struct tcf_block *block) +{ + return NULL; +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { From 34e3759cf86a3e75463e34c1bb9691777406a175 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:00 +0200 Subject: [PATCH 1011/2177] net: sched: teach tcf_bind/unbind_filter to use block->q Whenever the block->q is set, it can be used instead of tp->q as it contains the same value. When it is not set, which can't happen now but it might happen with the follow-up shared blocks introduction, the class is not set in the result. That would lead to a class lookup instead of direct class pointer use for classful qdiscs. However, it is not planned to support classful qdisqs sharing filter blocks, so that may never happen. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 7bed674ba29a..49a143e0fe65 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -74,36 +74,43 @@ __cls_set_class(unsigned long *clp, unsigned long cl) } static inline unsigned long -cls_set_class(struct tcf_proto *tp, unsigned long *clp, - unsigned long cl) +cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) { unsigned long old_cl; - - tcf_tree_lock(tp); + + sch_tree_lock(q); old_cl = __cls_set_class(clp, cl); - tcf_tree_unlock(tp); - + sch_tree_unlock(q); return old_cl; } static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; - cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid); - cl = cls_set_class(tp, &r->class, cl); + /* Check q as it is not set for shared blocks. In that case, + * setting class is not supported. + */ + if (!q) + return; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = cls_set_class(q, &r->class, cl); if (cl) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } static inline void tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { + struct Qdisc *q = tp->chain->block->q; unsigned long cl; + if (!q) + return; if ((cl = __cls_set_class(&r->class, 0)) != 0) - tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + q->ops->cl_ops->unbind_tcf(q, cl); } struct tcf_exts { From c1954561cd262b8adf7908c5552fe9ad99f82f81 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:01 +0200 Subject: [PATCH 1012/2177] net: sched: ematch: obtain net pointer from blocks Instead of using tp->q, use block to get the net pointer. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/ematch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 03b677bc0700..1331a4c2d8ff 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp, struct tcf_ematch_hdr *em_hdr = nla_data(nla); int data_len = nla_len(nla) - sizeof(*em_hdr); void *data = (void *) em_hdr + sizeof(*em_hdr); - struct net *net = dev_net(qdisc_dev(tp->q)); + struct net *net = tp->chain->block->net; if (!TCF_EM_REL_VALID(em_hdr->flags)) goto errout; From 7fa9d974f3c2a016b9accb18f4ee2ed2a738585c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:02 +0200 Subject: [PATCH 1013/2177] net: sched: cls_u32: use block instead of q in tc_u_common tc_u_common is now per-q. With blocks, it has to be converted to be per-block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 094d224411a9..b6d46065f661 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -93,7 +93,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; - struct Qdisc *q; + struct tcf_block *block; int refcnt; struct idr handle_idr; struct hlist_node hnode; @@ -335,11 +335,7 @@ static struct hlist_head *tc_u_common_hash; static unsigned int tc_u_hash(const struct tcf_proto *tp) { - struct net_device *dev = tp->q->dev_queue->dev; - u32 qhandle = tp->q->handle; - int ifindex = dev->ifindex; - - return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT); + return hash_64((u64) tp->chain->block, U32_HASH_SHIFT); } static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) @@ -349,7 +345,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) h = tc_u_hash(tp); hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { - if (tc->q == tp->q) + if (tc->block == tp->chain->block) return tc; } return NULL; @@ -378,7 +374,7 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } - tp_c->q = tp->q; + tp_c->block = tp->chain->block; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); From 1abf272022cf1d18469405f47b4ec49c6a3125db Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:03 +0200 Subject: [PATCH 1014/2177] net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc Use helper to get q pointer per block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 7 +++++-- net/sched/cls_fw.c | 5 ++++- net/sched/cls_tcindex.c | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2a3a60ec5b86..f3be6667a253 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -491,8 +491,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } - if (TC_H_MAJ(baseclass) == 0) - baseclass = TC_H_MAKE(tp->q->handle, baseclass); + if (TC_H_MAJ(baseclass) == 0) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + + baseclass = TC_H_MAKE(q->handle, baseclass); + } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 941245ad07fd..aa1e1f3bd20c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -28,6 +28,7 @@ #include #include #include +#include #define HTSIZE 256 @@ -83,9 +84,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { + struct Qdisc *q = tcf_block_q(tp->chain->block); + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || - !(TC_H_MAJ(id ^ tp->q->handle)))) { + !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; res->class = 0; return 0; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 14a7e08b2fa9..d732b5474a4d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Passing parameters to the root seems to be done more awkwardly than really @@ -90,9 +91,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, f = tcindex_lookup(p, key); if (!f) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + if (!p->fall_through) return -1; - res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); + res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key); res->class = 0; pr_debug("alg 0x%x\n", res->classid); return 0; From 74e3be6021d22df2ffcb691eae1affeb2bd0128e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:04 +0200 Subject: [PATCH 1015/2177] net: sched: use tcf_block_q helper to get q pointer for sch_tree_lock Use tcf_block_q helper to get q pointer to be used for direct call of sch_tree_lock/unlock instead of tcf_tree_lock/unlock. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 --- net/sched/sch_api.c | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9b2cb91dc0d9..0aea9e23e97a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -359,9 +359,6 @@ static inline void sch_tree_unlock(const struct Qdisc *q) spin_unlock_bh(qdisc_root_sleeping_lock(q)); } -#define tcf_tree_lock(tp) sch_tree_lock((tp)->q) -#define tcf_tree_unlock(tp) sch_tree_unlock((tp)->q) - extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aa82116ed10c..a9ac912f1d67 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1661,9 +1661,11 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) struct tcf_bind_args *a = (void *)arg; if (tp->ops->bind_class) { - tcf_tree_lock(tp); + struct Qdisc *q = tcf_block_q(tp->chain->block); + + sch_tree_lock(q); tp->ops->bind_class(n, a->classid, a->cl); - tcf_tree_unlock(tp); + sch_tree_unlock(q); } return 0; } From a10fa20101ae48fed2102f8e84d98f5aac2e5c40 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2017 14:01:05 +0200 Subject: [PATCH 1016/2177] net: sched: propagate q and parent from caller down to tcf_fill_node The callers have this info, they will pass it down to tcf_fill_node. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 55 ++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 856003caa3bb..2e8e87fd9d97 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -420,8 +420,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } static int tcf_fill_node(struct net *net, struct sk_buff *skb, - struct tcf_proto *tp, void *fh, u32 portid, - u32 seq, u16 flags, int event) + struct tcf_proto *tp, struct Qdisc *q, u32 parent, + void *fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -434,8 +434,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; - tcm->tcm_parent = tp->classid; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = parent; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; @@ -458,6 +458,7 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, void *fh, int event, bool unicast) { struct sk_buff *skb; @@ -467,7 +468,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { kfree_skb(skb); return -EINVAL; @@ -482,6 +483,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, void *fh, bool unicast, bool *last) { struct sk_buff *skb; @@ -492,7 +494,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { kfree_skb(skb); return -EINVAL; @@ -512,6 +514,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, } static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, + struct Qdisc *q, u32 parent, struct nlmsghdr *n, struct tcf_chain *chain, int event) { @@ -519,7 +522,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (tp = rtnl_dereference(chain->filter_chain); tp; tp = rtnl_dereference(tp->next)) - tfilter_notify(net, oskb, n, tp, 0, event, false); + tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false); } /* Add/change/delete/get a filter node */ @@ -638,7 +641,8 @@ replay: } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { - tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); + tfilter_notify_chain(net, skb, q, parent, n, + chain, RTM_DELTFILTER); tcf_chain_flush(chain); err = 0; goto errout; @@ -685,7 +689,7 @@ replay: if (!fh) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_DELTFILTER, false); tcf_proto_destroy(tp); err = 0; @@ -710,8 +714,8 @@ replay: } break; case RTM_DELTFILTER: - err = tfilter_del_notify(net, skb, n, tp, fh, false, - &last); + err = tfilter_del_notify(net, skb, n, tp, q, parent, + fh, false, &last); if (err) goto errout; if (last) { @@ -720,7 +724,7 @@ replay: } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, + err = tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_NEWTFILTER, true); goto errout; default: @@ -734,7 +738,8 @@ replay: if (err == 0) { if (tp_created) tcf_chain_tp_insert(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); + tfilter_notify(net, skb, n, tp, q, parent, fh, + RTM_NEWTFILTER, false); } else { if (tp_created) tcf_proto_destroy(tp); @@ -753,6 +758,8 @@ struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; + struct Qdisc *q; + u32 parent; }; static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) @@ -760,13 +767,14 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, + return tcf_fill_node(net, a->skb, tp, a->q, a->parent, + n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } -static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, - struct netlink_callback *cb, +static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, + struct sk_buff *skb, struct netlink_callback *cb, long index_start, long *p_index) { struct net *net = sock_net(skb->sk); @@ -788,7 +796,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, + if (tcf_fill_node(net, skb, tp, q, parent, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) @@ -801,6 +809,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; + arg.q = q; + arg.parent = parent; arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; @@ -826,6 +836,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; long index_start; long index; + u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) @@ -839,10 +850,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) return skb->len; - if (!tcm->tcm_parent) + parent = tcm->tcm_parent; + if (!parent) { q = dev->qdisc; - else + parent = q->handle; + } else { q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + } if (!q) goto out; cops = q->ops->cl_ops; @@ -866,7 +880,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; - if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) + if (!tcf_chain_dump(chain, q, parent, skb, cb, + index_start, &index)) break; } From 9db9583839b760fc492a7b288edfe2213184a579 Mon Sep 17 00:00:00 2001 From: Abhijit Ayarekar Date: Fri, 13 Oct 2017 12:24:06 -0700 Subject: [PATCH 1017/2177] bpf: Add -target to clang switch while cross compiling. Update to llvm excludes assembly instructions. llvm git revision is below commit 65fad7c26569 ("bpf: add inline-asm support") This change will be part of llvm release 6.0 __ASM_SYSREG_H define is not required for native compile. -target switch includes appropriate target specific files while cross compiling Tested on x86 and arm64. Signed-off-by: Abhijit Ayarekar Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index ebc2ad69b62c..81f9fcd736b7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -180,6 +180,7 @@ CLANG ?= clang # Detect that we're cross compiling and use the cross compiler ifdef CROSS_COMPILE HOSTCC = $(CROSS_COMPILE)gcc +CLANG_ARCH_ARGS = -target $(ARCH) endif # Trick to allow make to be run from this directory @@ -229,9 +230,9 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ - -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ + -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ - -Wno-unknown-warning-option \ + -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ From 0e80193bd8c1d97654f1a2f45934e7372e9a512e Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 13 Oct 2017 15:01:08 -0700 Subject: [PATCH 1018/2177] ipv6: check fn before doing FIB6_SUBTREE(fn) In fib6_locate(), we need to first make sure fn is not NULL before doing FIB6_SUBTREE(fn) to avoid crash. This fixes the following static checker warning: net/ipv6/ip6_fib.c:1462 fib6_locate() warn: variable dereferenced before check 'fn' (see line 1459) net/ipv6/ip6_fib.c 1458 if (src_len) { 1459 struct fib6_node *subtree = FIB6_SUBTREE(fn); ^^^^^^^^^^^^^^^^ We shifted this dereference 1460 1461 WARN_ON(saddr == NULL); 1462 if (fn && subtree) ^^ before the check for NULL. 1463 fn = fib6_locate_1(subtree, saddr, src_len, 1464 offsetof(struct rt6_info, rt6i_src) Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Reported-by: Dan Carpenter Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c2ecd5ec638a..548af48212fc 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1456,13 +1456,16 @@ struct fib6_node *fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { - struct fib6_node *subtree = FIB6_SUBTREE(fn); - WARN_ON(saddr == NULL); - if (fn && subtree) - fn = fib6_locate_1(subtree, saddr, src_len, + if (fn) { + struct fib6_node *subtree = FIB6_SUBTREE(fn); + + if (subtree) { + fn = fib6_locate_1(subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src), exact_match); + } + } } #endif From 0da4af00b2ed3dbe46788623a696c4169447eadc Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 13 Oct 2017 15:08:07 -0700 Subject: [PATCH 1019/2177] ipv6: only update __use and lastusetime once per jiffy at most In order to not dirty the cacheline too often, we try to only update dst->__use and dst->lastusetime at most once per jiffy. As dst->lastusetime is only used by ipv6 garbage collector, it should be good enough time resolution. And __use is only used in ipv6_route_seq_show() to show how many times a dst has been used. And as __use is not atomic_t right now, it does not show the precise number of usage times anyway. So we think it should be OK to only update it at most once per jiffy. According to my latest syn flood test on a machine with intel Xeon 6th gen processor and 2 10G mlx nics bonded together, each with 8 rx queues on 2 NUMA nodes: With this patch, the packet process rate increases from ~3.49Mpps to ~3.75Mpps with a 7% increase rate. Note: dst_use() is being renamed to dst_hold_and_use() to better specify the purpose of the function. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 19 ++++++++++--------- net/decnet/dn_route.c | 8 ++++---- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 204c19e25456..5047e8053d6c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -255,17 +255,18 @@ static inline void dst_hold(struct dst_entry *dst) WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } -static inline void dst_use(struct dst_entry *dst, unsigned long time) -{ - dst_hold(dst); - dst->__use++; - dst->lastuse = time; -} - static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - dst->__use++; - dst->lastuse = time; + if (time != dst->lastuse) { + dst->__use++; + dst->lastuse = time; + } +} + +static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) +{ + dst_hold(dst); + dst_use_noref(dst, time); } static inline struct dst_entry *dst_clone(struct dst_entry *dst) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0bd3afd01dd2..bff5ab88cdbb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - dst_use(&rth->dst, now); + dst_hold_and_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dst_release_immediate(&rt->dst); @@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_use(&rt->dst, now); + dst_hold_and_use(&rt->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn * (flp->flowidn_mark == rt->fld.flowidn_mark) && dn_is_output_route(rt) && (rt->fld.flowidn_oif == flp->flowidn_oif)) { - dst_use(&rt->dst, jiffies); + dst_hold_and_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; return 0; @@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb) (rt->fld.flowidn_oif == 0) && (rt->fld.flowidn_mark == skb->mark) && (rt->fld.flowidn_iif == cb->iif)) { - dst_use(&rt->dst, jiffies); + dst_hold_and_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); return 0; From 9185a610f8f7f1b4e4d28c9de27d1969cf58e0f1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 12 Oct 2017 18:40:02 -0400 Subject: [PATCH 1020/2177] tracing: bpf: Hide bpf trace events when they are not used All the trace events defined in include/trace/events/bpf.h are only used when CONFIG_BPF_SYSCALL is defined. But this file gets included by include/linux/bpf_trace.h which is included by the networking code with CREATE_TRACE_POINTS defined. If a trace event is created but not used it still has data structures and functions created for its use, even though nothing is using them. To not waste space, do not define the BPF trace events in bpf.h unless CONFIG_BPF_SYSCALL is defined. Signed-off-by: Steven Rostedt (VMware) Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/trace/events/bpf.h | 5 ++++- kernel/bpf/core.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h index 52c8425d144b..1fb58faa4a44 100644 --- a/include/trace/events/bpf.h +++ b/include/trace/events/bpf.h @@ -4,6 +4,9 @@ #if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BPF_H +/* These are only used within the BPF_SYSCALL code */ +#ifdef CONFIG_BPF_SYSCALL + #include #include #include @@ -345,7 +348,7 @@ TRACE_EVENT(bpf_map_next_key, __print_hex(__get_dynamic_array(nxt), __entry->key_len), __entry->key_trunc ? " ..." : "") ); - +#endif /* CONFIG_BPF_SYSCALL */ #endif /* _TRACE_BPF_H */ #include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 248961af2421..8e7c8bf2b687 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1580,5 +1580,8 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); +/* These are only used within the BPF_SYSCALL code */ +#ifdef CONFIG_BPF_SYSCALL EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); +#endif From d01174fcd2c1ffefdd0554f847c4045a5c731591 Mon Sep 17 00:00:00 2001 From: Xue Liu Date: Tue, 10 Oct 2017 17:17:45 +0200 Subject: [PATCH 1021/2177] ieee802154: netlink: fix typo of the name of struct genl_ops Signed-off-by: Xue Liu Signed-off-by: Stefan Schmidt --- net/ieee802154/netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 6bde9e5a5503..96636e3b7aa9 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) return genlmsg_reply(msg, info); } -static const struct genl_ops ieee8021154_ops[] = { +static const struct genl_ops ieee802154_ops[] = { /* see nl-phy.c */ IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, ieee802154_dump_phy), @@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = { .version = 1, .maxattr = IEEE802154_ATTR_MAX, .module = THIS_MODULE, - .ops = ieee8021154_ops, - .n_ops = ARRAY_SIZE(ieee8021154_ops), + .ops = ieee802154_ops, + .n_ops = ARRAY_SIZE(ieee802154_ops), .mcgrps = ieee802154_mcgrps, .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), }; From 2a600d97cbb2a9311e6b42547d37e0eca9b9d6d6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 14 Oct 2017 17:04:40 +0300 Subject: [PATCH 1022/2177] pch_gbe: Switch to new PCI IRQ allocation API This removes custom flag handling. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- .../net/ethernet/oki-semi/pch_gbe/pch_gbe.h | 3 +- .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 42 +++++++------------ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 8d710a3b4db0..697e29dd4bd3 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -613,7 +613,6 @@ struct pch_gbe_privdata { * @rx_ring: Pointer of Rx descriptor ring structure * @rx_buffer_len: Receive buffer length * @tx_queue_len: Transmit queue length - * @have_msi: PCI MSI mode flag * @pch_gbe_privdata: PCI Device ID driver_data */ @@ -623,6 +622,7 @@ struct pch_gbe_adapter { atomic_t irq_sem; struct net_device *netdev; struct pci_dev *pdev; + int irq; struct net_device *polling_netdev; struct napi_struct napi; struct pch_gbe_hw hw; @@ -637,7 +637,6 @@ struct pch_gbe_adapter { struct pch_gbe_rx_ring *rx_ring; unsigned long rx_buffer_len; unsigned long tx_queue_len; - bool have_msi; bool rx_stop_flag; int hwts_tx_en; int hwts_rx_en; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 5ae9681a2da7..457ee80307ea 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -781,11 +781,8 @@ static void pch_gbe_free_irq(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - free_irq(adapter->pdev->irq, netdev); - if (adapter->have_msi) { - pci_disable_msi(adapter->pdev); - netdev_dbg(netdev, "call pci_disable_msi\n"); - } + free_irq(adapter->irq, netdev); + pci_free_irq_vectors(adapter->pdev); } /** @@ -799,7 +796,7 @@ static void pch_gbe_irq_disable(struct pch_gbe_adapter *adapter) atomic_inc(&adapter->irq_sem); iowrite32(0, &hw->reg->INT_EN); ioread32(&hw->reg->INT_ST); - synchronize_irq(adapter->pdev->irq); + synchronize_irq(adapter->irq); netdev_dbg(adapter->netdev, "INT_EN reg : 0x%08x\n", ioread32(&hw->reg->INT_EN)); @@ -1903,30 +1900,23 @@ static int pch_gbe_request_irq(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; int err; - int flags; - flags = IRQF_SHARED; - adapter->have_msi = false; - err = pci_enable_msi(adapter->pdev); - netdev_dbg(netdev, "call pci_enable_msi\n"); - if (err) { - netdev_dbg(netdev, "call pci_enable_msi - Error: %d\n", err); - } else { - flags = 0; - adapter->have_msi = true; - } - err = request_irq(adapter->pdev->irq, &pch_gbe_intr, - flags, netdev->name, netdev); + err = pci_alloc_irq_vectors(adapter->pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (err < 0) + return err; + + adapter->irq = pci_irq_vector(adapter->pdev, 0); + + err = request_irq(adapter->irq, &pch_gbe_intr, IRQF_SHARED, + netdev->name, netdev); if (err) netdev_err(netdev, "Unable to allocate interrupt Error: %d\n", err); - netdev_dbg(netdev, - "adapter->have_msi : %d flags : 0x%04x return : 0x%04x\n", - adapter->have_msi, flags, err); + netdev_dbg(netdev, "have_msi : %d return : 0x%04x\n", + pci_dev_msi_enabled(adapter->pdev), err); return err; } - /** * pch_gbe_up - Up GbE network device * @adapter: Board private structure @@ -2399,9 +2389,9 @@ static void pch_gbe_netpoll(struct net_device *netdev) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); - disable_irq(adapter->pdev->irq); - pch_gbe_intr(adapter->pdev->irq, netdev); - enable_irq(adapter->pdev->irq); + disable_irq(adapter->irq); + pch_gbe_intr(adapter->irq, netdev); + enable_irq(adapter->irq); } #endif From bc28df6e8543d41abfb48d4a4c6d445638d6768c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 15 Oct 2017 13:22:10 -0500 Subject: [PATCH 1023/2177] net: dccp: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that for options.c file, I placed the "fall through" comment on its own line, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/dccp/input.c | 1 + net/dccp/options.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index fa6be9750bb4..d28d46bff6ab 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_DATA: if (sk->sk_state == DCCP_RESPOND) break; + /* fall through */ case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* diff --git a/net/dccp/options.c b/net/dccp/options.c index 51cdfc3bd8ca..4e40db017e19 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, * Ack vectors are processed by the TX CCID if it is * interested. The RX CCID need not parse Ack Vectors, * since it is only interested in clearing old state. - * Fall through. */ + /* fall through */ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) From 92c43fcafe9db18dc51cd2a44cdafd80a4bcb2b5 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 15 Oct 2017 22:00:52 +0100 Subject: [PATCH 1024/2177] hamradio: baycom_par: use new parport device model Modify baycom driver to use the new parallel port device model. Signed-off-by: Sudip Mukherjee Signed-off-by: David S. Miller --- drivers/net/hamradio/baycom_par.c | 48 ++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index e1783832d304..1f7ceafd61ff 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -311,7 +311,9 @@ static void par96_wakeup(void *handle) static int par96_open(struct net_device *dev) { struct baycom_state *bc = netdev_priv(dev); + struct pardev_cb par_cb; struct parport *pp; + int i; if (!dev || !bc) return -ENXIO; @@ -332,8 +334,21 @@ static int par96_open(struct net_device *dev) } memset(&bc->modem, 0, sizeof(bc->modem)); bc->hdrv.par.bitrate = 9600; - bc->pdev = parport_register_device(pp, dev->name, NULL, par96_wakeup, - par96_interrupt, PARPORT_DEV_EXCL, dev); + memset(&par_cb, 0, sizeof(par_cb)); + par_cb.wakeup = par96_wakeup; + par_cb.irq_func = par96_interrupt; + par_cb.private = (void *)dev; + par_cb.flags = PARPORT_DEV_EXCL; + for (i = 0; i < NR_PORTS; i++) + if (baycom_device[i] == dev) + break; + + if (i == NR_PORTS) { + pr_err("%s: no device found\n", bc_drvname); + parport_put_port(pp); + return -ENODEV; + } + bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i); parport_put_port(pp); if (!bc->pdev) { printk(KERN_ERR "baycom_par: cannot register parport at 0x%lx\n", dev->base_addr); @@ -490,12 +505,34 @@ MODULE_LICENSE("GPL"); /* --------------------------------------------------------------------- */ +static int baycom_par_probe(struct pardevice *par_dev) +{ + struct device_driver *drv = par_dev->dev.driver; + int len = strlen(drv->name); + + if (strncmp(par_dev->name, drv->name, len)) + return -ENODEV; + + return 0; +} + +static struct parport_driver baycom_par_driver = { + .name = "bcp", + .probe = baycom_par_probe, + .devmodel = true, +}; + static int __init init_baycompar(void) { - int i, found = 0; + int i, found = 0, ret; char set_hw = 1; printk(bc_drvinfo); + + ret = parport_register_driver(&baycom_par_driver); + if (ret) + return ret; + /* * register net devices */ @@ -524,8 +561,10 @@ static int __init init_baycompar(void) baycom_device[i] = dev; } - if (!found) + if (!found) { + parport_unregister_driver(&baycom_par_driver); return -ENXIO; + } return 0; } @@ -539,6 +578,7 @@ static void __exit cleanup_baycompar(void) if (dev) hdlcdrv_unregister(dev); } + parport_unregister_driver(&baycom_par_driver); } module_init(init_baycompar); From 00fb3a7c7c14dd28feb0372a8458267e058add66 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2017 13:32:36 +0200 Subject: [PATCH 1025/2177] net: systemport: add NET_DSA dependency The notifier cause a link error when NET_DSA is a loadable module: drivers/net/ethernet/broadcom/bcmsysport.o: In function `bcm_sysport_remove': bcmsysport.c:(.text+0x1582): undefined reference to `unregister_dsa_notifier' drivers/net/ethernet/broadcom/bcmsysport.o: In function `bcm_sysport_probe': bcmsysport.c:(.text+0x278d): undefined reference to `register_dsa_notifier' This adds a dependency that forces the systemport driver to be a loadable module as well when that happens, but otherwise allows it to be built normally when DSA is either built-in or completely disabled. Fixes: d156576362c0 ("net: systemport: Establish lower/upper queue mapping") Signed-off-by: Arnd Bergmann Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 67134ece1107..af75156919ed 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -184,6 +184,7 @@ config BGMAC_PLATFORM config SYSTEMPORT tristate "Broadcom SYSTEMPORT internal MAC support" depends on OF + depends on NET_DSA || !NET_DSA select MII select PHYLIB select FIXED_PHY From d85969f1a981b9cd57f5037ebcb9c6d385c0bc70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 16 Oct 2017 14:33:21 +0100 Subject: [PATCH 1026/2177] tcp: cdg: make struct tcp_cdg static The structure tcp_cdg is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'tcp_cdg' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ipv4/tcp_cdg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 66ac69f7bd19..06fbe102a425 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -389,7 +389,7 @@ static void tcp_cdg_release(struct sock *sk) kfree(ca->gradients); } -struct tcp_congestion_ops tcp_cdg __read_mostly = { +static struct tcp_congestion_ops tcp_cdg __read_mostly = { .cong_avoid = tcp_cdg_cong_avoid, .cwnd_event = tcp_cdg_cwnd_event, .pkts_acked = tcp_cdg_acked, From 070cbf5be7774dcf0ceca081c7321c6f2ae833a4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Oct 2017 15:44:35 +0200 Subject: [PATCH 1027/2177] rtnetlink: place link af dump into own helper next patch will rcu-ify rtnl af_ops, i.e. allow af_ops lookup and function calls with rcu read lock held instead of rtnl mutex. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 72 ++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6a09f3d575af..a49cad25e577 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1382,6 +1382,47 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, return 0; } +static int rtnl_fill_link_af(struct sk_buff *skb, + const struct net_device *dev, + u32 ext_filter_mask) +{ + const struct rtnl_af_ops *af_ops; + struct nlattr *af_spec; + + af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af_spec) + return -EMSGSIZE; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + struct nlattr *af; + int err; + + if (!af_ops->fill_link_af) + continue; + + af = nla_nest_start(skb, af_ops->family); + if (!af) + return -EMSGSIZE; + + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + return -EMSGSIZE; + + nla_nest_end(skb, af); + } + + nla_nest_end(skb, af_spec); + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, @@ -1389,8 +1430,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct nlattr *af_spec; - struct rtnl_af_ops *af_ops; ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1477,36 +1516,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; - if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { - if (af_ops->fill_link_af) { - struct nlattr *af; - int err; - - if (!(af = nla_nest_start(skb, af_ops->family))) - goto nla_put_failure; - - err = af_ops->fill_link_af(skb, dev, ext_filter_mask); - - /* - * Caller may return ENODATA to indicate that there - * was no data to be dumped. This is not an error, it - * means we should trim the attribute header and - * continue. - */ - if (err == -ENODATA) - nla_nest_cancel(skb, af); - else if (err < 0) - goto nla_put_failure; - - nla_nest_end(skb, af); - } - } - - nla_nest_end(skb, af_spec); - nlmsg_end(skb, nlh); return 0; From 5fa85a09390c4a525cb4d06a0c4644b01a47976b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Oct 2017 15:44:36 +0200 Subject: [PATCH 1028/2177] net: core: rcu-ify rtnl af_ops rtnl af_ops currently rely on rtnl mutex: unregister (called from module exit functions) takes the rtnl mutex and all users that do af_ops lookup also take the rtnl mutex. IOW, parallel rmmod will block until doit() callback is done. As none of the af_ops implementation sleep we can use rcu instead. doit functions that need the af_ops can now use rcu instead of the rtnl mutex provided the mutex isn't needed for other reasons. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 62 ++++++++++++++++++++++++++++++++------------ net/ipv4/devinet.c | 4 +-- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a49cad25e577..20b550d07fe3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) { const struct rtnl_af_ops *ops; - list_for_each_entry(ops, &rtnl_af_ops, list) { + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { if (ops->family == family) return ops; } @@ -470,7 +470,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) void rtnl_af_register(struct rtnl_af_ops *ops) { rtnl_lock(); - list_add_tail(&ops->list, &rtnl_af_ops); + list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); } EXPORT_SYMBOL_GPL(rtnl_af_register); @@ -482,8 +482,10 @@ EXPORT_SYMBOL_GPL(rtnl_af_register); void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); - list_del(&ops->list); + list_del_rcu(&ops->list); rtnl_unlock(); + + synchronize_rcu(); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -496,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, /* IFLA_AF_SPEC */ size = nla_total_size(sizeof(struct nlattr)); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + af_ops->get_link_af_size(dev, ext_filter_mask); } } + rcu_read_unlock(); return size; } @@ -1393,7 +1397,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, if (!af_spec) return -EMSGSIZE; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { struct nlattr *af; int err; @@ -1516,12 +1520,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; + rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) - goto nla_put_failure; + goto nla_put_failure_rcu; + rcu_read_unlock(); nlmsg_end(skb, nlh); return 0; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -1783,17 +1791,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + rcu_read_lock(); + af_ops = rtnl_af_lookup(nla_type(af)); + if (!af_ops) { + rcu_read_unlock(); return -EAFNOSUPPORT; + } - if (!af_ops->set_link_af) + if (!af_ops->set_link_af) { + rcu_read_unlock(); return -EOPNOTSUPP; + } if (af_ops->validate_link_af) { err = af_ops->validate_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); return err; + } } + + rcu_read_unlock(); } } @@ -2251,13 +2269,18 @@ static int do_setlink(const struct sk_buff *skb, nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; + rcu_read_lock(); + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) BUG(); err = af_ops->set_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto errout; + } + rcu_read_unlock(); status |= DO_SETLINK_NOTIFY; } } @@ -4004,25 +4027,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->fill_stats_af) { struct nlattr *af; int err; af = nla_nest_start(skb, af_ops->family); - if (!af) + if (!af) { + rcu_read_unlock(); goto nla_put_failure; - + } err = af_ops->fill_stats_af(skb, dev); - if (err == -ENODATA) + if (err == -ENODATA) { nla_nest_cancel(skb, af); - else if (err < 0) + } else if (err < 0) { + rcu_read_unlock(); goto nla_put_failure; + } nla_nest_end(skb, af); } } + rcu_read_unlock(); nla_nest_end(skb, attr); @@ -4091,7 +4119,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, /* for IFLA_STATS_AF_SPEC */ size += nla_total_size(0); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_stats_af_size) { size += nla_total_size( af_ops->get_stats_af_size(dev)); @@ -4100,6 +4129,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, size += nla_total_size(0); } } + rcu_read_unlock(); } return size; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7ce22a2c07ce..6d9b072d903b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1757,7 +1757,7 @@ static int inet_validate_link_af(const struct net_device *dev, struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; - if (dev && !__in_dev_get_rtnl(dev)) + if (dev && !__in_dev_get_rcu(dev)) return -EAFNOSUPPORT; err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); @@ -1781,7 +1781,7 @@ static int inet_validate_link_af(const struct net_device *dev, static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int rem; From 36c0a9dfc6613242ba1de012e2d15145cdaae805 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 16 Oct 2017 16:04:51 +0200 Subject: [PATCH 1029/2177] tipc: fix rebasing error In commit 2f487712b893 ("tipc: guarantee that group broadcast doesn't bypass group unicast") there was introduced a last-minute rebasing error that broke non-group communication. We fix this here. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 723dd6998684..870b9b8f877a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1052,6 +1052,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, return true; } case CONN_MANAGER: + skb_queue_tail(inputq, skb); return true; case GROUP_PROTOCOL: skb_queue_tail(mc_inputq, skb); From c30f5d012edf755959c44d71757fbf4648ad75a8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:35 +0200 Subject: [PATCH 1030/2177] mlxsw: spectrum: Move netdevice NB to struct mlxsw_sp So far, all netdevice notifications that the driver cared about were related to its own ports, and mlxsw_sp could be retrieved from the netdevice's private data. For IP-in-IP offloading however, the driver cares about events on foreign netdevices, and getting at mlxsw_sp or router data structures from the handler is inconvenient. Therefore move the netdevice notifier blocks from global scope to struct mlxsw_sp to allow retrieval from the notifier block pointer itself. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 23 +++++++++++++------ .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 321988ac57cc..83f9c2564f61 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3667,6 +3667,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3736,6 +3739,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_router_init; } + /* Initialize netdevice notifier after router is initialized, so that + * the event handler can use router structures. + */ + mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; + err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); + goto err_netdev_notifier; + } + err = mlxsw_sp_span_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); @@ -3769,6 +3782,8 @@ err_dpipe_init: err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); +err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: mlxsw_sp_afa_fini(mlxsw_sp); @@ -3795,6 +3810,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_dpipe_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -4501,10 +4517,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { - .notifier_call = mlxsw_sp_netdevice_event, -}; - static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_event, .priority = 10, /* Must be called before FIB notifier block */ @@ -4532,7 +4544,6 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); @@ -4553,7 +4564,6 @@ err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; } @@ -4564,7 +4574,6 @@ static void __exit mlxsw_sp_module_exit(void) unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8e45183dc9bb..e1a0157c0b94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -161,6 +161,7 @@ struct mlxsw_sp { struct { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; + struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; struct { From 6698c168bf48cb85505d7f6e77f0091a83aa497e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:36 +0200 Subject: [PATCH 1031/2177] mlxsw: spectrum_router: Move mlxsw_sp_netdev_ipip_type() Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6a356f4b99a3..c5e574bf3e08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1295,6 +1295,25 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, return NULL; } +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -2785,25 +2804,6 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev, - enum mlxsw_sp_ipip_type *p_type) -{ - struct mlxsw_sp_router *router = mlxsw_sp->router; - const struct mlxsw_sp_ipip_ops *ipip_ops; - enum mlxsw_sp_ipip_type ipipt; - - for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { - ipip_ops = router->ipip_ops_arr[ipipt]; - if (dev->type == ipip_ops->dev_type) { - if (p_type) - *p_type = ipipt; - return true; - } - } - return false; -} - static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, From 0063587d358733008423c80302cb7b077be8e237 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:37 +0200 Subject: [PATCH 1032/2177] mlxsw: spectrum: Support decap-only IP-in-IP tunnels Current code for offloading IP-in-IP tunneling assumes that there is no decap without encap. But that's never true for IPv6 overlays, and is not true for IPv4 ones either, if net.ipv4.conf.*.rp_filter is unset. To support decap-only tunnels, an IPIP entry is now created as soon as an offloadable tunneling device is created. When that netdevice is up'd, a decap route is looked up and possibly offloaded. Thus decap is not handled implicitly as part of mlxsw_sp_ipip_entry_get() call anymore, but needs to be done explicitly after the get, if desired. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 8 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 6 + .../ethernet/mellanox/mlxsw/spectrum_router.c | 105 ++++++++++++++++-- 3 files changed, 109 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 83f9c2564f61..c3ae650fbe5e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4497,13 +4497,17 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp *mlxsw_sp; int err = 0; - if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event); + else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e1a0157c0b94..a4f21afd7f00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -395,6 +395,12 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +int +mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c5e574bf3e08..db834220a2fe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1206,7 +1206,6 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, { u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); struct mlxsw_sp_router *router = mlxsw_sp->router; - struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr saddr; @@ -1231,11 +1230,6 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(ipip_entry)) return ipip_entry; - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); @@ -1250,8 +1244,6 @@ mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, { if (--ipip_entry->ref_count == 0) { list_del(&ipip_entry->ipip_list_node); - if (ipip_entry->decap_fib_entry) - mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); mlxsw_sp_ipip_entry_destroy(ipip_entry); } } @@ -1314,6 +1306,103 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, return false; } +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (ipip_entry->ol_dev == ol_dev) + return ipip_entry; + + return NULL; +} + +static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_ipip_type ipipt; + + mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); + if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV4) || + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV6)) { + ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); +} + +static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry && ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event) +{ + switch (event) { + case NETDEV_REGISTER: + return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev); + case NETDEV_UNREGISTER: + mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev); + case NETDEV_DOWN: + mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); + return 0; + } + return 0; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; From f63ce4e54a424d9f99bad2ba099c972a07eab517 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:38 +0200 Subject: [PATCH 1033/2177] mlxsw: spectrum: Support IPIP overlay VRF migration IPIP entries are created as soon as an offloadable device is created. That means that when such a device is later moved to a different VRF, the loopback device that backs the tunnel is wrong. Thus when an offloadable encapsulating netdevice moves from one VRF to another, make sure that the loopback is updated as necessary. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 47 ++++++++++++++++++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c3ae650fbe5e..e1e11c726c16 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4506,7 +4506,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) - err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event); + err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr); else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a4f21afd7f00..28feb745a38a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -400,7 +400,8 @@ bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, - unsigned long event); + unsigned long event, + struct netdev_notifier_changeupper_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index db834220a2fe..082cf00eaadb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1384,9 +1384,49 @@ static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } +static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_rif_ipip_lb *lb_rif; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + return 0; + + /* When a tunneling device is moved to a different VRF, we need to + * update the backing loopback. Since RIFs can't be edited, we need to + * destroy and recreate it. That might create a window of opportunity + * where RALUE and RATR registers end up referencing a RIF that's + * already gone. RATRs are handled by the RIF destroy, and to take care + * of RALUE, demote the decap route back. + */ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + + lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt, + ol_dev); + if (IS_ERR(lb_rif)) + return PTR_ERR(lb_rif); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + ipip_entry->ol_lb = lb_rif; + + if (ol_dev->flags & IFF_UP) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, - unsigned long event) + unsigned long event, + struct netdev_notifier_changeupper_info *info) { switch (event) { case NETDEV_REGISTER: @@ -1399,6 +1439,11 @@ int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); return 0; + case NETDEV_CHANGEUPPER: + if (netif_is_l3_master(info->upper_dev)) + return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp, + ol_dev); + return 0; } return 0; } From 4cccb737d2fd0d78b939a97b5ac1831b9a27d4c0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:39 +0200 Subject: [PATCH 1034/2177] mlxsw: spectrum: Drop refcounting of IPIP entries Formerly, IPIP entries were created lazily by next hops that referenced an offloadable IP-in-IP netdevice. However now that they are created eagerly as a reaction to events on such netdevices, the reference counting is useless. Hence drop it. The routes whose next hops reference an offloaded IP-in-IP netdevice actually linger around a bit after their device is unregistered. However, mlxsw_sp_ipip_entry_destroy() also destroys the backing loopback, and mlxsw_sp_rif_destroy() transitively (via mlxsw_sp_nexthop_rif_gone_sync()) calls mlxsw_sp_nexthop_ipip_fini(), which unlinks the IPIP entry from a next hop. Thus no dangling pointers are left behind for the brief window after netdevice is gone, but routes not yet. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 1 - .../ethernet/mellanox/mlxsw/spectrum_router.c | 49 ++++++++----------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 1c2db831d83b..6fb49129ce87 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -47,7 +47,6 @@ struct mlxsw_sp_ipip_entry { enum mlxsw_sp_ipip_type ipipt; struct net_device *ol_dev; /* Overlay. */ struct mlxsw_sp_rif_ipip_lb *ol_lb; - unsigned int ref_count; /* Number of next hops using the tunnel. */ struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 082cf00eaadb..3330120f2f8e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1002,9 +1002,8 @@ err_ol_ipip_lb_create: } static void -mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) { - WARN_ON(ipip_entry->ref_count > 0); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); kfree(ipip_entry); } @@ -1200,9 +1199,9 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) +mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) { u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); struct mlxsw_sp_router *router = mlxsw_sp->router; @@ -1210,15 +1209,12 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr saddr; + /* The configuration where several tunnels have the same local address + * in the same underlay table needs special treatment in the HW. That is + * currently not implemented in the driver. + */ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - if (ipip_entry->ol_dev == ol_dev) - goto inc_ref_count; - - /* The configuration where several tunnels have the same local - * address in the same underlay table needs special treatment in - * the HW. That is currently not implemented in the driver. - */ ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, @@ -1233,19 +1229,15 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); -inc_ref_count: - ++ipip_entry->ref_count; return ipip_entry; } static void -mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { - if (--ipip_entry->ref_count == 0) { - list_del(&ipip_entry->ipip_list_node); - mlxsw_sp_ipip_entry_destroy(ipip_entry); - } + list_del(&ipip_entry->ipip_list_node); + mlxsw_sp_ipip_entry_dealloc(ipip_entry); } static bool @@ -1338,7 +1330,8 @@ static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, MLXSW_SP_L3_PROTO_IPV4) || router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6)) { - ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); if (IS_ERR(ipip_entry)) return PTR_ERR(ipip_entry); } @@ -1353,7 +1346,7 @@ static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); if (ipip_entry) - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); } static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, @@ -2939,16 +2932,15 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, struct net_device *ol_dev) { if (!nh->nh_grp->gateway || nh->ipip_entry) return 0; - nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); - if (IS_ERR(nh->ipip_entry)) - return PTR_ERR(nh->ipip_entry); + nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!nh->ipip_entry) + return -ENOENT; __mlxsw_sp_nexthop_neigh_update(nh, false); return 0; @@ -2963,7 +2955,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, return; __mlxsw_sp_nexthop_neigh_update(nh, true); - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); nh->ipip_entry = NULL; } @@ -3007,7 +2998,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); @@ -4269,7 +4260,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV6)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); From 6a4bc44b012cbc29c9d824be2c7ab9eac8ee6b6f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Oct 2017 09:31:47 +0200 Subject: [PATCH 1035/2177] batman-adv: Avoid spurious warnings from bat_v neigh_cmp implementation The neighbor compare API implementation for B.A.T.M.A.N. V checks whether the neigh_ifinfo for this neighbor on a specific interface exists. A warning is printed when it isn't found. But it is not called inside a lock which would prevent that this information is lost right before batadv_neigh_ifinfo_get. It must therefore be expected that batadv_v_neigh_(cmp|is_sob) might not be able to get the requested neigh_ifinfo. A WARN_ON for such a situation seems not to be appropriate because this will only flood the kernel logs. The warnings must therefore be removed. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 93ef1c06227e..341ceab8338d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; From a7986ce1cb01816d461bdfc0d349c0b73026c6a8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 14 Oct 2017 20:54:02 -0500 Subject: [PATCH 1036/2177] rtlwifi: Fix typo in if ... else if ... else construct The kbuild test robot reports two conditions with no effect (if == else). These are the result of copy and paste typographical errors. Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Cc: kbuild-all@01.org Cc: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 3a297c5551ed..0b34886321f1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -831,7 +831,7 @@ static u8 _rtl_get_vht_highest_n_rate(struct ieee80211_hw *hw, else if ((tx_mcs_map & 0x000c) >> 2 == IEEE80211_VHT_MCS_SUPPORT_0_8) hw_rate = - rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9]; + rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS8]; else hw_rate = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9]; @@ -843,7 +843,7 @@ static u8 _rtl_get_vht_highest_n_rate(struct ieee80211_hw *hw, else if ((tx_mcs_map & 0x0003) == IEEE80211_VHT_MCS_SUPPORT_0_8) hw_rate = - rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9]; + rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS8]; else hw_rate = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9]; From 66cc044249603e12e1dbba347f03bdbc9f171fdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 16 Oct 2017 14:54:32 +0200 Subject: [PATCH 1037/2177] bcma: use bcma_debug and pr_cont in MIPS driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using bcma_debug gives a device-specific prefix for messages and pr_cont is a common helper for continuing a line. Signed-off-by: Rafał Miłecki Acked-By: Hauke Mehrtens Signed-off-by: Kalle Valo --- drivers/bcma/driver_mips.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 89af807cf29c..5904ef1aa624 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -184,10 +184,11 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq) { int i; static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"}; - printk(KERN_DEBUG KBUILD_MODNAME ": core 0x%04x, irq :", dev->id.id); + + bcma_debug(dev->bus, "core 0x%04x, irq :", dev->id.id); for (i = 0; i <= 6; i++) - printk(" %s%s", irq_name[i], i == irq ? "*" : " "); - printk("\n"); + pr_cont(" %s%s", irq_name[i], i == irq ? "*" : " "); + pr_cont("\n"); } static void bcma_core_mips_dump_irq(struct bcma_bus *bus) From 8a5f2166a6288ee4b5a393f1ebc8cfb26b0510f0 Mon Sep 17 00:00:00 2001 From: Henrik Austad Date: Tue, 17 Oct 2017 12:10:10 +0200 Subject: [PATCH 1038/2177] net: export netdev_txq_to_tc to allow sch_mqprio to compile as module In commit 32302902ff09 ("mqprio: Reserve last 32 classid values for HW traffic classes and misc IDs") sch_mqprio started using netdev_txq_to_tc to find the correct tc instead of dev->tc_to_txq[] However, when mqprio is compiled as a module, it cannot resolve the symbol, leading to this error: ERROR: "netdev_txq_to_tc" [net/sched/sch_mqprio.ko] undefined! This adds an EXPORT_SYMBOL() since the other user in the kernel (netif_set_xps_queue) is also EXPORT_SYMBOL() (and not _GPL) or in a sysfs-callback. Cc: Alexander Duyck Cc: Jesus Sanchez-Palencia Cc: David S. Miller Signed-off-by: Henrik Austad Reviewed-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index fcddccb6be41..d2b20e73080e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2040,6 +2040,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) return 0; } +EXPORT_SYMBOL(netdev_txq_to_tc); #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); From 1c142e1c639bcbcb5b5db210d8fa4d2ecef6037e Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:31 -0700 Subject: [PATCH 1039/2177] i40e: rename 'cmd' variables in ethtool interface After the switch to the new ethtool API, ethtool passes us ethtool_ksettings structs instead of ethtool_command structs, however we were still referring to them as 'cmd' variables. This renames them to 'ks' variables which makes the code easier to understand. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 145 +++++++++--------- 1 file changed, 74 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 72d5f2cdf419..06514a76ff91 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -378,12 +378,12 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, /** * i40e_get_settings_link_up - Get the Link settings for when link is up * @hw: hw structure - * @ecmd: ethtool command to fill in + * @ks: ethtool ksettings to fill in * @netdev: network interface device structure - * + * @pf: pointer to physical function struct **/ static void i40e_get_settings_link_up(struct i40e_hw *hw, - struct ethtool_link_ksettings *cmd, + struct ethtool_link_ksettings *ks, struct net_device *netdev, struct i40e_pf *pf) { @@ -394,9 +394,9 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, u32 supported, advertising; ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); + ks->link_modes.supported); ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); + ks->link_modes.advertising); /* Initialize supported and advertised settings based on phy settings */ switch (hw_link_info->phy_type) { @@ -528,48 +528,49 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, /* Set speed and duplex */ switch (link_speed) { case I40E_LINK_SPEED_40GB: - cmd->base.speed = SPEED_40000; + ks->base.speed = SPEED_40000; break; case I40E_LINK_SPEED_25GB: #ifdef SPEED_25000 - cmd->base.speed = SPEED_25000; + ks->base.speed = SPEED_25000; #else netdev_info(netdev, "Speed is 25G, display not supported by this version of ethtool.\n"); #endif break; case I40E_LINK_SPEED_20GB: - cmd->base.speed = SPEED_20000; + ks->base.speed = SPEED_20000; break; case I40E_LINK_SPEED_10GB: - cmd->base.speed = SPEED_10000; + ks->base.speed = SPEED_10000; break; case I40E_LINK_SPEED_1GB: - cmd->base.speed = SPEED_1000; + ks->base.speed = SPEED_1000; break; case I40E_LINK_SPEED_100MB: - cmd->base.speed = SPEED_100; + ks->base.speed = SPEED_100; break; default: break; } - cmd->base.duplex = DUPLEX_FULL; + ks->base.duplex = DUPLEX_FULL; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, advertising); } /** * i40e_get_settings_link_down - Get the Link settings for when link is down * @hw: hw structure - * @ecmd: ethtool command to fill in + * @ks: ethtool ksettings to fill in + * @pf: pointer to physical function struct * * Reports link settings that can be determined when link is down **/ static void i40e_get_settings_link_down(struct i40e_hw *hw, - struct ethtool_link_ksettings *cmd, + struct ethtool_link_ksettings *ks, struct i40e_pf *pf) { u32 supported, advertising; @@ -579,25 +580,25 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw, */ i40e_phy_type_to_ethtool(pf, &supported, &advertising); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, advertising); /* With no link speed and duplex are unknown */ - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; } /** - * i40e_get_settings - Get Link Speed and Duplex settings + * i40e_get_link_ksettings - Get Link Speed and Duplex settings * @netdev: network interface device structure - * @ecmd: ethtool command + * @ks: ethtool ksettings * * Reports speed/duplex settings based on media_type **/ static int i40e_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) + struct ethtool_link_ksettings *ks) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -607,74 +608,74 @@ static int i40e_get_link_ksettings(struct net_device *netdev, u32 advertising; if (link_up) - i40e_get_settings_link_up(hw, cmd, netdev, pf); + i40e_get_settings_link_up(hw, ks, netdev, pf); else - i40e_get_settings_link_down(hw, cmd, pf); + i40e_get_settings_link_down(hw, ks, pf); /* Now set the settings that don't rely on link being up/down */ /* Set autoneg settings */ - cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? - AUTONEG_ENABLE : AUTONEG_DISABLE); + ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); switch (hw->phy.media_type) { case I40E_MEDIA_TYPE_BACKPLANE: - ethtool_link_ksettings_add_link_mode(cmd, supported, + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, supported, + ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Backplane); - cmd->base.port = PORT_NONE; + ks->base.port = PORT_NONE; break; case I40E_MEDIA_TYPE_BASET: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - cmd->base.port = PORT_TP; + ethtool_link_ksettings_add_link_mode(ks, supported, TP); + ethtool_link_ksettings_add_link_mode(ks, advertising, TP); + ks->base.port = PORT_TP; break; case I40E_MEDIA_TYPE_DA: case I40E_MEDIA_TYPE_CX4: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - cmd->base.port = PORT_DA; + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE); + ks->base.port = PORT_DA; break; case I40E_MEDIA_TYPE_FIBER: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - cmd->base.port = PORT_FIBRE; + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ks->base.port = PORT_FIBRE; break; case I40E_MEDIA_TYPE_UNKNOWN: default: - cmd->base.port = PORT_OTHER; + ks->base.port = PORT_OTHER; break; } /* Set flow control settings */ - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Pause); switch (hw->fc.requested_mode) { case I40E_FC_FULL: - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); break; case I40E_FC_TX_PAUSE: - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; case I40E_FC_RX_PAUSE: - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; default: ethtool_convert_link_mode_to_legacy_u32( - &advertising, cmd->link_modes.advertising); + &advertising, ks->link_modes.advertising); advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.advertising, advertising); + ks->link_modes.advertising, advertising); break; } @@ -682,14 +683,14 @@ static int i40e_get_link_ksettings(struct net_device *netdev, } /** - * i40e_set_settings - Set Speed and Duplex + * i40e_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure - * @ecmd: ethtool command + * @ks: ethtool ksettings * * Set speed/duplex per media_types advertised/forced **/ static int i40e_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) + const struct ethtool_link_ksettings *ks) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_aq_get_phy_abilities_resp abilities; @@ -697,8 +698,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev, struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi = np->vsi; struct i40e_hw *hw = &pf->hw; - struct ethtool_link_ksettings safe_cmd; - struct ethtool_link_ksettings copy_cmd; + struct ethtool_link_ksettings safe_ks; + struct ethtool_link_ksettings copy_ks; i40e_status status = 0; bool change = false; int timeout = 50; @@ -733,31 +734,31 @@ static int i40e_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; } - /* copy the cmd to copy_cmd to avoid modifying the origin */ - memcpy(©_cmd, cmd, sizeof(struct ethtool_link_ksettings)); + /* copy the ksettings to copy_ks to avoid modifying the origin */ + memcpy(©_ks, ks, sizeof(struct ethtool_link_ksettings)); /* get our own copy of the bits to check against */ - memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings)); - i40e_get_link_ksettings(netdev, &safe_cmd); + memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); + i40e_get_link_ksettings(netdev, &safe_ks); - /* save autoneg and speed out of cmd */ - autoneg = cmd->base.autoneg; + /* save autoneg and speed out of ksettings */ + autoneg = ks->base.autoneg; ethtool_convert_link_mode_to_legacy_u32(&advertise, - cmd->link_modes.advertising); + ks->link_modes.advertising); /* set autoneg and speed back to what they currently are */ - copy_cmd.base.autoneg = safe_cmd.base.autoneg; + copy_ks.base.autoneg = safe_ks.base.autoneg; ethtool_convert_link_mode_to_legacy_u32( - &tmp, safe_cmd.link_modes.advertising); + &tmp, safe_ks.link_modes.advertising); ethtool_convert_legacy_u32_to_link_mode( - copy_cmd.link_modes.advertising, tmp); + copy_ks.link_modes.advertising, tmp); - copy_cmd.base.cmd = safe_cmd.base.cmd; + copy_ks.base.cmd = safe_ks.base.cmd; - /* If copy_cmd and safe_cmd are not the same now, then they are + /* If copy_ks and safe_ks are not the same now, then they are * trying to set something that we do not support */ - if (memcmp(©_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings))) + if (memcmp(©_ks, &safe_ks, sizeof(struct ethtool_link_ksettings))) return -EOPNOTSUPP; while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { @@ -786,8 +787,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* If autoneg was not already enabled */ if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) { /* If autoneg is not supported, return error */ - if (!ethtool_link_ksettings_test_link_mode( - &safe_cmd, supported, Autoneg)) { + if (!ethtool_link_ksettings_test_link_mode(&safe_ks, + supported, + Autoneg)) { netdev_info(netdev, "Autoneg not supported on this phy\n"); err = -EINVAL; goto done; @@ -803,8 +805,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* If autoneg is supported 10GBASE_T is the only PHY * that can disable it, so otherwise return error */ - if (ethtool_link_ksettings_test_link_mode( - &safe_cmd, supported, Autoneg) && + if (ethtool_link_ksettings_test_link_mode(&safe_ks, + supported, + Autoneg) && hw->phy.link_info.phy_type != I40E_PHY_TYPE_10GBASE_T) { netdev_info(netdev, "Autoneg cannot be disabled on this phy\n"); @@ -819,7 +822,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, } ethtool_convert_link_mode_to_legacy_u32(&tmp, - safe_cmd.link_modes.supported); + safe_ks.link_modes.supported); if (advertise & ~tmp) { err = -EINVAL; goto done; From c6faca730dc01391e08011945fab5e67b09cdd05 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:32 -0700 Subject: [PATCH 1040/2177] i40e: remove ifdef SPEED_25000 This 'ifdef' doesn't accomplish anything so remove it. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 06514a76ff91..c250116e5e22 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -531,12 +531,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, ks->base.speed = SPEED_40000; break; case I40E_LINK_SPEED_25GB: -#ifdef SPEED_25000 ks->base.speed = SPEED_25000; -#else - netdev_info(netdev, - "Speed is 25G, display not supported by this version of ethtool.\n"); -#endif break; case I40E_LINK_SPEED_20GB: ks->base.speed = SPEED_20000; From 21675bdc214b34d2ce4e30396e9ff36f0e61ae93 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:33 -0700 Subject: [PATCH 1041/2177] i40e: add function header for i40e_get_rxfh Someone left this poor little function naked with no header. This dresses it up in a proper function header it deserves. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c250116e5e22..f4a70ef3f2e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3968,6 +3968,16 @@ static u32 i40e_get_rxfh_indir_size(struct net_device *netdev) return I40E_HLUT_ARRAY_SIZE; } +/** + * i40e_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Reads the indirection table directly from the hardware. Returns 0 on + * success. + **/ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { From 5f434994ba94f7f02c1f47a9dd13204d1fbc9686 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:34 -0700 Subject: [PATCH 1042/2177] i40e: fix clearing link masks in i40e_get_link_ksettings This fixes two issues in i40e_get_link_ksettings. It adds calls to ethtool_link_ksettings_zero_link_mode to make sure advertising and supported link masks are cleared before we start setting bits in them. This also replaces some funky bit manipulations with a much nicer call to ethtool_link_ksettings_del_link_mode when removing link modes. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index f4a70ef3f2e0..fe0b2327de5b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -600,7 +600,9 @@ static int i40e_get_link_ksettings(struct net_device *netdev, struct i40e_hw *hw = &pf->hw; struct i40e_link_status *hw_link_info = &hw->phy.link_info; bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP; - u32 advertising; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); if (link_up) i40e_get_settings_link_up(hw, ks, netdev, pf); @@ -664,13 +666,9 @@ static int i40e_get_link_ksettings(struct net_device *netdev, Asym_Pause); break; default: - ethtool_convert_link_mode_to_legacy_u32( - &advertising, ks->link_modes.advertising); - - advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); - - ethtool_convert_legacy_u32_to_link_mode( - ks->link_modes.advertising, advertising); + ethtool_link_ksettings_del_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_del_link_mode(ks, advertising, + Asym_Pause); break; } From 52e2d02e42e9a9ac299b61a1b1acbac06fe7949d Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:35 -0700 Subject: [PATCH 1043/2177] i40e: fix i40e_phy_type_to_ethtool function header The function header erroneously listed 'phy_types' as a parameter. The correct parameter is 'pf'. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index fe0b2327de5b..a137675c1426 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -253,7 +253,7 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf) /** * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes - * @phy_types: PHY types to convert + * @pf: PF struct with phy_types * @supported: pointer to the ethtool supported variable to fill in * @advertising: pointer to the ethtool advertising variable to fill in * From 91a5c44722c077e30c5ee2b22c5a460d9694ea1d Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:36 -0700 Subject: [PATCH 1044/2177] i40e: fix comment typo Someone forgot a word in this comment and it's confusing without it. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a137675c1426..e40fb559dacb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -516,8 +516,8 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, } /* Now that we've worked out everything that could be supported by the - * current PHY type, get what is supported by the NVM and them to - * get what is truly supported + * current PHY type, get what is supported by the NVM and intersect + * them to get what is truly supported */ i40e_phy_type_to_ethtool(pf, &e_supported, &e_advertising); From a03af69f5c2813e3c76630d043716a3b685c9d30 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:37 -0700 Subject: [PATCH 1045/2177] i40e: fix whitespace issues in i40e_ethtool.c There's a number of minor incidental whitespace issues in this file. This addresses most of the ones I could find. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 44 ++++++++----------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index e40fb559dacb..89ab398a7d30 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -511,7 +511,8 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, break; default: /* if we got here and link is up something bad is afoot */ - netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + netdev_info(netdev, + "WARNING: Link is up but PHY type 0x%x is not recognized.\n", hw_link_info->phy_type); } @@ -614,14 +615,12 @@ static int i40e_get_link_ksettings(struct net_device *netdev, ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? AUTONEG_ENABLE : AUTONEG_DISABLE); + /* Set media type settings */ switch (hw->phy.media_type) { case I40E_MEDIA_TYPE_BACKPLANE: - ethtool_link_ksettings_add_link_mode(ks, supported, - Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - Backplane); - ethtool_link_ksettings_add_link_mode(ks, advertising, - Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); ethtool_link_ksettings_add_link_mode(ks, advertising, Backplane); ks->base.port = PORT_NONE; @@ -652,16 +651,14 @@ static int i40e_get_link_ksettings(struct net_device *netdev, switch (hw->fc.requested_mode) { case I40E_FC_FULL: - ethtool_link_ksettings_add_link_mode(ks, advertising, - Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); break; case I40E_FC_TX_PAUSE: ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; case I40E_FC_RX_PAUSE: - ethtool_link_ksettings_add_link_mode(ks, advertising, - Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; @@ -708,17 +705,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev, i40e_partition_setting_complaint(pf); return -EOPNOTSUPP; } - if (vsi != pf->vsi[pf->lan_vsi]) return -EOPNOTSUPP; - if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET && hw->phy.media_type != I40E_MEDIA_TYPE_FIBER && hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE && hw->phy.media_type != I40E_MEDIA_TYPE_DA && hw->phy.link_info.link_info & I40E_AQ_LINK_UP) return -EOPNOTSUPP; - if (hw->device_id == I40E_DEV_ID_KX_B || hw->device_id == I40E_DEV_ID_KX_C || hw->device_id == I40E_DEV_ID_20G_KR2 || @@ -844,7 +838,6 @@ static int i40e_set_link_ksettings(struct net_device *netdev, */ if (!config.link_speed) config.link_speed = abilities.link_speed; - if (change || (abilities.link_speed != config.link_speed)) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; @@ -872,7 +865,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* make the aq call */ status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { - netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n", + netdev_info(netdev, + "Set phy config failed, err %s aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); err = -EAGAIN; @@ -881,7 +875,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev, status = i40e_update_link_info(hw); if (status) - netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n", + netdev_dbg(netdev, + "Updating link info failed with err %s aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); @@ -2072,14 +2067,13 @@ static int __i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - /* rx and tx usecs has per queue value. If user doesn't specify the queue, - * return queue 0's value to represent. + /* rx and tx usecs has per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. */ - if (queue < 0) { + if (queue < 0) queue = 0; - } else if (queue >= vsi->num_queue_pairs) { + else if (queue >= vsi->num_queue_pairs) return -EINVAL; - } rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; @@ -2093,7 +2087,6 @@ static int __i40e_get_coalesce(struct net_device *netdev, ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; - /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -2143,7 +2136,6 @@ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * * Change the ITR settings for a specific queue. **/ - static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2265,8 +2257,8 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->int_rate_limit); } - /* rx and tx usecs has per queue value. If user doesn't specify the queue, - * apply to all queues. + /* rx and tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. */ if (queue < 0) { for (i = 0; i < vsi->num_queue_pairs; i++) From 6987bd25e2be49ec0c25c6c15ba2bcb6327f9ed4 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:38 -0700 Subject: [PATCH 1046/2177] i40e: group autoneg PHY types together This separates the setting of autoneg in i40e_phy_types_to_ethtool into its own conditional. Doing this adds clarity as what PHYs support/advertise autoneg and makes it easier to add new PHY types in the future. This also fixes an issue on devices with CRT_RETIMER where advertising autoneg was being set, but supported autoneg was not. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 87 +++++++++---------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 89ab398a7d30..30deae77e745 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -268,9 +268,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, *advertising = 0x0; if (phy_types & I40E_CAP_PHY_TYPE_SGMII) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_1000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) *advertising |= ADVERTISED_1000baseT_Full; if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { @@ -289,9 +287,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, phy_types & I40E_CAP_PHY_TYPE_10GBASE_T || phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR || phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_10000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) *advertising |= ADVERTISED_10000baseT_Full; } @@ -301,16 +297,12 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, *supported |= SUPPORTED_40000baseCR4_Full; if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU || phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_40000baseCR4_Full; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_40000baseCR4_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB) *advertising |= ADVERTISED_40000baseCR4_Full; } if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_100baseT_Full; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_100baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) *advertising |= ADVERTISED_100baseT_Full; } @@ -318,9 +310,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_1000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) *advertising |= ADVERTISED_1000baseT_Full; } @@ -329,47 +319,54 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4) *supported |= SUPPORTED_40000baseLR4_Full; if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) { - *supported |= SUPPORTED_40000baseKR4_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_40000baseKR4_Full | - ADVERTISED_Autoneg; + *supported |= SUPPORTED_40000baseKR4_Full; + *advertising |= ADVERTISED_40000baseKR4_Full; } if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) { - *supported |= SUPPORTED_20000baseKR2_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_20000baseKR2_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB) *advertising |= ADVERTISED_20000baseKR2_Full; } - if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) { - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *supported |= SUPPORTED_10000baseKR_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; - if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *advertising |= ADVERTISED_10000baseKR_Full; - } if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) { - *supported |= SUPPORTED_10000baseKX4_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + *supported |= SUPPORTED_10000baseKX4_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) *advertising |= ADVERTISED_10000baseKX4_Full; } - if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) { - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *supported |= SUPPORTED_1000baseKX_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; - if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *advertising |= ADVERTISED_1000baseKX_Full; + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR && + !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + *supported |= SUPPORTED_10000baseKR_Full; + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + *advertising |= ADVERTISED_10000baseKR_Full; } - if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR || - phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR || + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX && + !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + *supported |= SUPPORTED_1000baseKX_Full; + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) + *advertising |= ADVERTISED_1000baseKX_Full; + } + /* Autoneg PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_SGMII || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 || phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || - phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) { + phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR || + phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_T || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX || + phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { *supported |= SUPPORTED_Autoneg; *advertising |= ADVERTISED_Autoneg; } From 211b4c140a9de0a672a8f5c3cbaa3639ef507205 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Thu, 5 Oct 2017 14:53:39 -0700 Subject: [PATCH 1047/2177] i40e: Add new PHY types for 25G AOC and ACC support This patch adds support for 25G Active Optical Cables (AOC) and Active Copper Cables (ACC) PHY types. Signed-off-by: Sudheer Mogilappagari Signed-off-by: Krzysztof Malek Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 4 ++++ drivers/net/ethernet/intel/i40e/i40e_common.c | 2 ++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 ++ drivers/net/ethernet/intel/i40e/i40e_type.h | 4 ++++ drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 4 ++++ 5 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index a8f65aed5421..6a5db1b33fa2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1771,6 +1771,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_25GBASE_AOC = 0x23, + I40E_PHY_TYPE_25GBASE_ACC = 0x24, I40E_PHY_TYPE_MAX, I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, I40E_PHY_TYPE_EMPTY = 0xFE, @@ -1831,6 +1833,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02 #define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 #define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 +#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10 +#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20 u8 fec_cfg_curr_mod_ext_info; #define I40E_AQ_ENABLE_FEC_KR 0x01 #define I40E_AQ_ENABLE_FEC_RS 0x02 diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index aeb497258f20..8d0ee006606b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1180,6 +1180,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_40GBASE_AOC: case I40E_PHY_TYPE_10GBASE_AOC: case I40E_PHY_TYPE_25GBASE_CR: + case I40E_PHY_TYPE_25GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_ACC: media = I40E_MEDIA_TYPE_DA; break; case I40E_PHY_TYPE_1000BASE_KX: diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 30deae77e745..a4210ccdaa5f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -502,6 +502,8 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, case I40E_PHY_TYPE_25GBASE_CR: case I40E_PHY_TYPE_25GBASE_SR: case I40E_PHY_TYPE_25GBASE_LR: + case I40E_PHY_TYPE_25GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_ACC: supported = SUPPORTED_Autoneg; advertising = ADVERTISED_Autoneg; /* TODO: add speeds when ethtool is ready to support*/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 0410fcbdbb94..17a99b53acd9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -271,6 +271,10 @@ struct i40e_phy_info { I40E_PHY_TYPE_OFFSET) #define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \ I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_AOC BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \ + I40E_PHY_TYPE_OFFSET) #define I40E_HW_CAP_MAX_GPIO 30 /* Capabilities of a PF or a VF or the whole device */ struct i40e_hw_capabilities { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 60c892f559b9..463e331a70a9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1767,6 +1767,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_25GBASE_AOC = 0x23, + I40E_PHY_TYPE_25GBASE_ACC = 0x24, I40E_PHY_TYPE_MAX, I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, I40E_PHY_TYPE_EMPTY = 0xFE, @@ -1827,6 +1829,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02 #define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 #define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 +#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10 +#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20 u8 fec_cfg_curr_mod_ext_info; #define I40E_AQ_ENABLE_FEC_KR 0x01 #define I40E_AQ_ENABLE_FEC_RS 0x02 From 5a6cd6de76ae78b651e7c36eba8b1da465d65f06 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:40 -0700 Subject: [PATCH 1048/2177] ethtool: add ethtool_intersect_link_masks This function provides a way to intersect two link masks together to find the common ground between them. For example in i40e, the driver first generates link masks for what is supported by the PHY type. The driver then gets the link masks for what the NVM supports. The resulting intersection between them yields what can truly be supported. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/ethtool.h | 10 ++++++++++ net/core/ethtool.c | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4587a4c36923..c77fa3529e15 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -163,6 +163,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +/** + * ethtool_intersect_link_masks - Given two link masks, AND them together + * @dst: first mask and where result is stored + * @src: second mask to intersect with + * + * Given two link mode masks, AND them together and save the result in dst. + */ +void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, + struct ethtool_link_ksettings *src); + void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3228411ada0f..0c406306792a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } +/* Given two link masks, AND them together and save the result in dst. */ +void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, + struct ethtool_link_ksettings *src) +{ + unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS); + unsigned int idx = 0; + + for (; idx < size; idx++) { + dst->link_modes.supported[idx] &= + src->link_modes.supported[idx]; + dst->link_modes.advertising[idx] &= + src->link_modes.advertising[idx]; + } +} +EXPORT_SYMBOL(ethtool_intersect_link_masks); + void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) { From 1eaae5198e0db1f523fa3432ded69247aa33bf20 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:41 -0700 Subject: [PATCH 1049/2177] i40e: convert i40e_phy_type_to_ethtool to new API We are still largely using the old ethtool API macros. This is problematic because eventually they will be removed and they only support 32 bits of PHY types. This overhauls i40e_phy_type_to_ethtool to use only the new API. Doing this also allows us to provide much better support for newer 25G and 10G PHY types which is included here as well. The remaining usages of the old ethtool API will be addressed in other patches in the series. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 204 ++++++++++++------ 1 file changed, 140 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a4210ccdaa5f..0cef8aa85c1d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -254,95 +254,180 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf) /** * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes * @pf: PF struct with phy_types - * @supported: pointer to the ethtool supported variable to fill in - * @advertising: pointer to the ethtool advertising variable to fill in + * @ks: ethtool link ksettings struct to fill out * **/ -static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, - u32 *advertising) +static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, + struct ethtool_link_ksettings *ks) { struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info; u64 phy_types = pf->hw.phy.phy_types; - *supported = 0x0; - *advertising = 0x0; + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); if (phy_types & I40E_CAP_PHY_TYPE_SGMII) { - *supported |= SUPPORTED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - *advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { - *supported |= SUPPORTED_100baseT_Full; - *advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); } } if (phy_types & I40E_CAP_PHY_TYPE_XAUI || phy_types & I40E_CAP_PHY_TYPE_XFI || phy_types & I40E_CAP_PHY_TYPE_SFI || phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) - *supported |= SUPPORTED_10000baseT_Full; - if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_T || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) { - *supported |= SUPPORTED_10000baseT_Full; + phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - *advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); } if (phy_types & I40E_CAP_PHY_TYPE_XLAUI || phy_types & I40E_CAP_PHY_TYPE_XLPPI || phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC) - *supported |= SUPPORTED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU || phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) { - *supported |= SUPPORTED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB) - *advertising |= ADVERTISED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseCR4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { - *supported |= SUPPORTED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - *advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) { - *supported |= SUPPORTED_1000baseT_Full; + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - *advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); } if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4) - *supported |= SUPPORTED_40000baseSR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseSR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4) - *supported |= SUPPORTED_40000baseLR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) { - *supported |= SUPPORTED_40000baseKR4_Full; - *advertising |= ADVERTISED_40000baseKR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseLR4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) { - *supported |= SUPPORTED_20000baseKR2_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 20000baseKR2_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB) - *advertising |= ADVERTISED_20000baseKR2_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 20000baseKR2_Full); } if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) { - *supported |= SUPPORTED_10000baseKX4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKX4_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - *advertising |= ADVERTISED_10000baseKX4_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKX4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR && !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { - *supported |= SUPPORTED_10000baseKR_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKR_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - *advertising |= ADVERTISED_10000baseKR_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKR_Full); } if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX && !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { - *supported |= SUPPORTED_1000baseKX_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseKX_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - *advertising |= ADVERTISED_1000baseKX_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseKX_Full); + } + /* need to add 25G PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseKR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseKR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseSR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseSR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + } + /* need to add new 10G PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseCR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseSR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseSR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseLR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseX_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseX_Full); } /* Autoneg PHY types */ if (phy_types & I40E_CAP_PHY_TYPE_SGMII || @@ -367,8 +452,10 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX || phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { - *supported |= SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Autoneg); } } @@ -385,9 +472,8 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, struct i40e_pf *pf) { struct i40e_link_status *hw_link_info = &hw->phy.link_info; + struct ethtool_link_ksettings cap_ksettings; u32 link_speed = hw_link_info->link_speed; - u32 e_advertising = 0x0; - u32 e_supported = 0x0; u32 supported, advertising; ethtool_convert_link_mode_to_legacy_u32(&supported, @@ -519,11 +605,13 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, * current PHY type, get what is supported by the NVM and intersect * them to get what is truly supported */ - i40e_phy_type_to_ethtool(pf, &e_supported, - &e_advertising); - - supported = supported & e_supported; - advertising = advertising & e_advertising; + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, + advertising); + memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings)); + i40e_phy_type_to_ethtool(pf, &cap_ksettings); + ethtool_intersect_link_masks(ks, &cap_ksettings); /* Set speed and duplex */ switch (link_speed) { @@ -549,11 +637,6 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, break; } ks->base.duplex = DUPLEX_FULL; - - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, - advertising); } /** @@ -568,17 +651,10 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw, struct ethtool_link_ksettings *ks, struct i40e_pf *pf) { - u32 supported, advertising; - /* link is down and the driver needs to fall back on * supported phy types to figure out what info to display */ - i40e_phy_type_to_ethtool(pf, &supported, &advertising); - - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, - advertising); + i40e_phy_type_to_ethtool(pf, ks); /* With no link speed and duplex are unknown */ ks->base.speed = SPEED_UNKNOWN; From 79f04a3aba91531a3b979f6ebd846367a664638f Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:42 -0700 Subject: [PATCH 1050/2177] i40e: convert i40e_get_settings_link_up to new API This removes references to old ethtool API macros and functions in i40e_get_settings_link_up as part of the process of converting to the new API. The new API also allows us to provide more explicit support for new 25G and 10G PHY types so some of the PHY types have been adjusted where necessary as well. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 187 ++++++++++++------ 1 file changed, 125 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 0cef8aa85c1d..913ba91fac6c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -474,125 +474,192 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, struct i40e_link_status *hw_link_info = &hw->phy.link_info; struct ethtool_link_ksettings cap_ksettings; u32 link_speed = hw_link_info->link_speed; - u32 supported, advertising; - - ethtool_convert_link_mode_to_legacy_u32(&supported, - ks->link_modes.supported); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - ks->link_modes.advertising); /* Initialize supported and advertised settings based on phy settings */ switch (hw_link_info->phy_type) { case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: - supported = SUPPORTED_Autoneg | - SUPPORTED_40000baseCR4_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseCR4_Full); break; case I40E_PHY_TYPE_XLAUI: case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: - supported = SUPPORTED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); break; case I40E_PHY_TYPE_40GBASE_SR4: - supported = SUPPORTED_40000baseSR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseSR4_Full); break; case I40E_PHY_TYPE_40GBASE_LR4: - supported = SUPPORTED_40000baseLR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); break; + case I40E_PHY_TYPE_25GBASE_SR: + case I40E_PHY_TYPE_25GBASE_LR: case I40E_PHY_TYPE_10GBASE_SR: case I40E_PHY_TYPE_10GBASE_LR: case I40E_PHY_TYPE_1000BASE_SX: case I40E_PHY_TYPE_1000BASE_LX: - supported = SUPPORTED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseX_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseX_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); if (hw_link_info->module_type[2] & I40E_MODULE_TYPE_1000BASE_SX || hw_link_info->module_type[2] & I40E_MODULE_TYPE_1000BASE_LX) { - supported |= SUPPORTED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode( + ks, advertising, 1000baseT_Full); } if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_10GBASE_T: case I40E_PHY_TYPE_1000BASE_T: case I40E_PHY_TYPE_100BASE_TX: - supported = SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full | - SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full; - advertising = ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: - supported = SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); break; case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: - supported = SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_SFI: case I40E_PHY_TYPE_10GBASE_SFPP_CU: case I40E_PHY_TYPE_10GBASE_AOC: - supported = SUPPORTED_10000baseT_Full; - advertising = SUPPORTED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_SGMII: - supported = SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { - supported |= SUPPORTED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode( + ks, advertising, 100baseT_Full); } break; case I40E_PHY_TYPE_40GBASE_KR4: + case I40E_PHY_TYPE_25GBASE_KR: case I40E_PHY_TYPE_20GBASE_KR2: case I40E_PHY_TYPE_10GBASE_KR: case I40E_PHY_TYPE_10GBASE_KX4: case I40E_PHY_TYPE_1000BASE_KX: - supported |= SUPPORTED_40000baseKR4_Full | - SUPPORTED_20000baseKR2_Full | - SUPPORTED_10000baseKR_Full | - SUPPORTED_10000baseKX4_Full | - SUPPORTED_1000baseKX_Full | - SUPPORTED_Autoneg; - advertising |= ADVERTISED_40000baseKR4_Full | - ADVERTISED_20000baseKR2_Full | - ADVERTISED_10000baseKR_Full | - ADVERTISED_10000baseKX4_Full | - ADVERTISED_1000baseKX_Full | - ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 20000baseKR2_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKX4_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseKX_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 20000baseKR2_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKX4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseKX_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); break; - case I40E_PHY_TYPE_25GBASE_KR: case I40E_PHY_TYPE_25GBASE_CR: - case I40E_PHY_TYPE_25GBASE_SR: - case I40E_PHY_TYPE_25GBASE_LR: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + break; case I40E_PHY_TYPE_25GBASE_AOC: case I40E_PHY_TYPE_25GBASE_ACC: - supported = SUPPORTED_Autoneg; - advertising = ADVERTISED_Autoneg; - /* TODO: add speeds when ethtool is ready to support*/ + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseCR_Full); break; default: /* if we got here and link is up something bad is afoot */ @@ -605,10 +672,6 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, * current PHY type, get what is supported by the NVM and intersect * them to get what is truly supported */ - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(ks->link_modes.advertising, - advertising); memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings)); i40e_phy_type_to_ethtool(pf, &cap_ksettings); ethtool_intersect_link_masks(ks, &cap_ksettings); From 636b62d778302149216fad6aaa5e8d84c934a794 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:43 -0700 Subject: [PATCH 1051/2177] i40e: rename 'change' variable to 'autoneg_changed' This variable isn't actually very descriptive and makes the code a bit confusing as to what it is being used for. This patch enhances the variable with the longer name, 'autoneg_changed', which makes it clear we are concerned with autoneg changing in this context. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 913ba91fac6c..9c70555bf49c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -822,14 +822,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_aq_get_phy_abilities_resp abilities; + struct ethtool_link_ksettings safe_ks; + struct ethtool_link_ksettings copy_ks; struct i40e_aq_set_phy_config config; struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi = np->vsi; struct i40e_hw *hw = &pf->hw; - struct ethtool_link_ksettings safe_ks; - struct ethtool_link_ksettings copy_ks; + bool autoneg_changed = false; i40e_status status = 0; - bool change = false; int timeout = 50; int err = 0; u32 autoneg; @@ -922,7 +922,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* Autoneg is allowed to change */ config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_AN; - change = true; + autoneg_changed = true; } } else { /* If autoneg is currently enabled */ @@ -942,7 +942,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* Autoneg is allowed to change */ config.abilities = abilities.abilities & ~I40E_AQ_PHY_ENABLE_AN; - change = true; + autoneg_changed = true; } } @@ -976,7 +976,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, */ if (!config.link_speed) config.link_speed = abilities.link_speed; - if (change || (abilities.link_speed != config.link_speed)) { + if (autoneg_changed || abilities.link_speed != config.link_speed) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; From cee919959b6193325f34223370650402dee34e30 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:44 -0700 Subject: [PATCH 1052/2177] i40e: convert i40e_set_link_ksettings to new API This finishes off the conversion to the new ethtool API by removing the old macros being used in i40e_set_link_ksettings and replacing them with shiny new ones. This conversion also allows us to provide link speed support for new 25G and 10G macros which is included here as well. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 93 ++++++++++++------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9c70555bf49c..9eb618799a30 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -832,9 +832,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, i40e_status status = 0; int timeout = 50; int err = 0; - u32 autoneg; - u32 advertise; - u32 tmp; + u8 autoneg; /* Changing port settings is not supported if this isn't the * port's controlling PF @@ -862,28 +860,34 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* copy the ksettings to copy_ks to avoid modifying the origin */ memcpy(©_ks, ks, sizeof(struct ethtool_link_ksettings)); + /* save autoneg out of ksettings */ + autoneg = copy_ks.base.autoneg; + + memset(&safe_ks, 0, sizeof(safe_ks)); + /* Get link modes supported by hardware and check against modes + * requested by the user. Return an error if unsupported mode was set. + */ + i40e_phy_type_to_ethtool(pf, &safe_ks); + if (!bitmap_subset(copy_ks.link_modes.advertising, + safe_ks.link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; + /* get our own copy of the bits to check against */ memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); + safe_ks.base.cmd = copy_ks.base.cmd; + safe_ks.base.link_mode_masks_nwords = + copy_ks.base.link_mode_masks_nwords; i40e_get_link_ksettings(netdev, &safe_ks); - /* save autoneg and speed out of ksettings */ - autoneg = ks->base.autoneg; - ethtool_convert_link_mode_to_legacy_u32(&advertise, - ks->link_modes.advertising); - - /* set autoneg and speed back to what they currently are */ + /* set autoneg back to what it currently is */ copy_ks.base.autoneg = safe_ks.base.autoneg; - ethtool_convert_link_mode_to_legacy_u32( - &tmp, safe_ks.link_modes.advertising); - ethtool_convert_legacy_u32_to_link_mode( - copy_ks.link_modes.advertising, tmp); - copy_ks.base.cmd = safe_ks.base.cmd; - - /* If copy_ks and safe_ks are not the same now, then they are - * trying to set something that we do not support + /* If copy_ks.base and safe_ks.base are not the same now, then they are + * trying to set something that we do not support. */ - if (memcmp(©_ks, &safe_ks, sizeof(struct ethtool_link_ksettings))) + if (memcmp(©_ks.base, &safe_ks.base, + sizeof(struct ethtool_link_settings))) return -EOPNOTSUPP; while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { @@ -946,28 +950,45 @@ static int i40e_set_link_ksettings(struct net_device *netdev, } } - ethtool_convert_link_mode_to_legacy_u32(&tmp, - safe_ks.link_modes.supported); - if (advertise & ~tmp) { - err = -EINVAL; - goto done; - } - - if (advertise & ADVERTISED_100baseT_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 100baseT_Full)) config.link_speed |= I40E_LINK_SPEED_100MB; - if (advertise & ADVERTISED_1000baseT_Full || - advertise & ADVERTISED_1000baseKX_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseT_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseX_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseKX_Full)) config.link_speed |= I40E_LINK_SPEED_1GB; - if (advertise & ADVERTISED_10000baseT_Full || - advertise & ADVERTISED_10000baseKX4_Full || - advertise & ADVERTISED_10000baseKR_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseT_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseKX4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseKR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseCR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseSR_Full)) config.link_speed |= I40E_LINK_SPEED_10GB; - if (advertise & ADVERTISED_20000baseKR2_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 20000baseKR2_Full)) config.link_speed |= I40E_LINK_SPEED_20GB; - if (advertise & ADVERTISED_40000baseKR4_Full || - advertise & ADVERTISED_40000baseCR4_Full || - advertise & ADVERTISED_40000baseSR4_Full || - advertise & ADVERTISED_40000baseLR4_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseCR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseKR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseSR_Full)) + config.link_speed |= I40E_LINK_SPEED_25GB; + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseKR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseCR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseSR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseLR4_Full)) config.link_speed |= I40E_LINK_SPEED_40GB; /* If speed didn't get set, set it to what it currently is. From 6c32e0d9fdd56a7af54512aff700e20d85563499 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Mon, 9 Oct 2017 15:48:45 -0700 Subject: [PATCH 1053/2177] i40e: fix u64 division usage Commit 52eb1ff93e98 ("i40e: Add support setting TC max bandwidth rates") and commit 1ea6f21ae530 ("i40e: Refactor VF BW rate limiting") add some needed functionality for TC bandwidth rate limiting. Unfortunately they introduce several usages of unsigned 64-bit division which needs to be handled special by the kernel to support all architectures. Fixes: 52eb1ff93e98 ("i40e: Add support setting TC max bandwidth rates") Fixes: 1ea6f21ae530 ("i40e: Refactor VF BW rate limiting") Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 3 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 58 ++++++++++++++------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 266e1dc5e786..eb017763646d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -130,7 +130,8 @@ /* BW rate limiting */ #define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */ -#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ +#define I40E_BW_MBPS_DIVISOR 125000 /* rate / (1000000 / 8) Mbps */ +#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ /* driver state flags */ enum i40e_state_t { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bb31d53c4923..1252aaf92fd3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5442,6 +5442,7 @@ int i40e_get_link_speed(struct i40e_vsi *vsi) int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) { struct i40e_pf *pf = vsi->back; + u64 credits = 0; int speed = 0; int ret = 0; @@ -5459,8 +5460,9 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) } /* Tx rate credits are in values of 50Mbps, 0 is disabled */ - ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, - max_tx_rate / I40E_BW_CREDIT_DIVISOR, + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits, I40E_MAX_BW_INACTIVE_ACCUM, NULL); if (ret) dev_err(&pf->pdev->dev, @@ -6063,13 +6065,17 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, /* configure VSI for BW limit */ if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) return -EINVAL; + do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&pf->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", ch->max_tx_rate, - ch->max_tx_rate / I40E_BW_CREDIT_DIVISOR, ch->seid); + credits, + ch->seid); } /* in case of VF, this will be main SRIOV VSI */ @@ -6090,6 +6096,7 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, static int i40e_configure_queue_channels(struct i40e_vsi *vsi) { struct i40e_channel *ch; + u64 max_rate = 0; int ret = 0, i; /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ @@ -6110,8 +6117,9 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) /* Bandwidth limit through tc interface is in bytes/s, * change to Mbit/s */ - ch->max_tx_rate = - vsi->mqprio_qopt.max_rate[i] / (1000000 / 8); + max_rate = vsi->mqprio_qopt.max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + ch->max_tx_rate = max_rate; list_add_tail(&ch->list, &vsi->ch_list); @@ -6540,6 +6548,7 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { u64 sum_max_rate = 0; + u64 max_rate = 0; int i; if (mqprio_qopt->qopt.offset[0] != 0 || @@ -6554,7 +6563,9 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, "Invalid min tx rate (greater than 0) specified\n"); return -EINVAL; } - sum_max_rate += (mqprio_qopt->max_rate[i] / (1000000 / 8)); + max_rate = mqprio_qopt->max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + sum_max_rate += max_rate; if (i >= mqprio_qopt->qopt.num_tc - 1) break; @@ -6698,14 +6709,18 @@ config_tc: if (pf->flags & I40E_FLAG_TC_MQPRIO) { if (vsi->mqprio_qopt.max_rate[0]) { - u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0] / - (1000000 / 8); + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (!ret) { + u64 credits = max_tx_rate; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", max_tx_rate, - max_tx_rate / I40E_BW_CREDIT_DIVISOR, + credits, vsi->seid); } else { need_reset = true; @@ -8166,14 +8181,17 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) return ret; } if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) return -EINVAL; + do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", ch->max_tx_rate, - ch->max_tx_rate / I40E_BW_CREDIT_DIVISOR, + credits, ch->seid); } } @@ -8446,17 +8464,21 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } if (vsi->mqprio_qopt.max_rate[0]) { - u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0] / (1000000 / 8); + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 credits = 0; + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); - if (!ret) - dev_dbg(&vsi->back->pdev->dev, - "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", - max_tx_rate, - max_tx_rate / I40E_BW_CREDIT_DIVISOR, - vsi->seid); - else + if (ret) goto end_unlock; + + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + credits, + vsi->seid); } /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs From dfdddd92a5781bb4bbd176a5c85b7244580a8efe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Sep 2017 12:24:51 +0200 Subject: [PATCH 1054/2177] iwlwifi: mvm: allocate reorder buffer according to need Now that we may have up to 256 entries per reorder buffer, and possibly up to 16 queues, we can use a LOT of memory for this (64k for each station). Allocate it according to what we need, which is of course much less for HT stations (only 16k at a max of 16 queues). However, this comes at the expense of complicating the code a bit to calculate the right entry structure to use for each frame. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 39 ++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 30 +++++++++----- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 38 ++++++++++++++++-- 3 files changed, 90 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index bf25c3ce7c95..52a2f49132e7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -590,8 +590,6 @@ enum iwl_mvm_tdls_cs_state { * @last_amsdu: track last ASMDU SN for duplication detection * @last_sub_index: track ASMDU sub frame index for duplication detection * @tid: the tid - * @entries: list of skbs stored - * @reorder_time: time the packet was stored in the reorder buffer * @reorder_timer: timer for frames are in the reorder buffer. For AMSDU * it is the time of last received sub-frame * @removed: prevent timer re-arming @@ -608,8 +606,6 @@ struct iwl_mvm_reorder_buffer { u16 last_amsdu; u8 last_sub_index; u8 tid; - struct sk_buff_head entries[IEEE80211_MAX_AMPDU_BUF]; - unsigned long reorder_time[IEEE80211_MAX_AMPDU_BUF]; struct timer_list reorder_timer; bool removed; bool valid; @@ -617,16 +613,39 @@ struct iwl_mvm_reorder_buffer { struct iwl_mvm *mvm; } ____cacheline_aligned_in_smp; +/** + * struct _iwl_mvm_reorder_buf_entry - reorder buffer entry per-queue/per-seqno + * @frames: list of skbs stored + * @reorder_time: time the packet was stored in the reorder buffer + */ +struct _iwl_mvm_reorder_buf_entry { + struct sk_buff_head frames; + unsigned long reorder_time; +}; + +/* make this indirection to get the aligned thing */ +struct iwl_mvm_reorder_buf_entry { + struct _iwl_mvm_reorder_buf_entry e; +} +#ifndef __CHECKER__ +/* sparse doesn't like this construct: "bad integer constant expression" */ +__aligned(roundup_pow_of_two(sizeof(struct _iwl_mvm_reorder_buf_entry))) +#endif +; + /** * struct iwl_mvm_baid_data - BA session data * @sta_id: station id * @tid: tid of the session * @baid baid of the session * @timeout: the timeout set in the addba request + * @entries_per_queue: # of buffers per queue, this actually gets + * aligned up to avoid cache line sharing between queues * @last_rx: last rx jiffies, updated only if timeout passed from last update * @session_timer: timer to check if BA session expired, runs at 2 * timeout * @mvm: mvm pointer, needed for timer context * @reorder_buf: reorder buffer, allocated per queue + * @reorder_buf_data: data */ struct iwl_mvm_baid_data { struct rcu_head rcu_head; @@ -634,12 +653,22 @@ struct iwl_mvm_baid_data { u8 tid; u8 baid; u16 timeout; + u16 entries_per_queue; unsigned long last_rx; struct timer_list session_timer; struct iwl_mvm *mvm; - struct iwl_mvm_reorder_buffer reorder_buf[]; + struct iwl_mvm_reorder_buffer reorder_buf[IWL_MAX_RX_HW_QUEUES]; + struct iwl_mvm_reorder_buf_entry entries[]; }; +static inline struct iwl_mvm_baid_data * +iwl_mvm_baid_data_from_reorder_buf(struct iwl_mvm_reorder_buffer *buf) +{ + return (void *)((u8 *)buf - + offsetof(struct iwl_mvm_baid_data, reorder_buf) - + sizeof(*buf) * buf->queue); +} + /* * enum iwl_mvm_queue_status - queue status * @IWL_MVM_QUEUE_FREE: the queue is not allocated nor reserved diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 5e679859f948..bb1a1ac9f6ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -412,6 +412,11 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, struct iwl_mvm_reorder_buffer *reorder_buf, u16 nssn) { + struct iwl_mvm_baid_data *baid_data = + iwl_mvm_baid_data_from_reorder_buf(reorder_buf); + struct iwl_mvm_reorder_buf_entry *entries = + &baid_data->entries[reorder_buf->queue * + baid_data->entries_per_queue]; u16 ssn = reorder_buf->head_sn; lockdep_assert_held(&reorder_buf->lock); @@ -422,7 +427,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, while (iwl_mvm_is_sn_less(ssn, nssn, reorder_buf->buf_size)) { int index = ssn % reorder_buf->buf_size; - struct sk_buff_head *skb_list = &reorder_buf->entries[index]; + struct sk_buff_head *skb_list = &entries[index].e.frames; struct sk_buff *skb; ssn = ieee80211_sn_inc(ssn); @@ -445,11 +450,11 @@ set_timer: if (reorder_buf->num_stored && !reorder_buf->removed) { u16 index = reorder_buf->head_sn % reorder_buf->buf_size; - while (skb_queue_empty(&reorder_buf->entries[index])) + while (skb_queue_empty(&entries[index].e.frames)) index = (index + 1) % reorder_buf->buf_size; /* modify timer to match next frame's expiration time */ mod_timer(&reorder_buf->reorder_timer, - reorder_buf->reorder_time[index] + 1 + + entries[index].e.reorder_time + 1 + RX_REORDER_BUF_TIMEOUT_MQ); } else { del_timer(&reorder_buf->reorder_timer); @@ -459,6 +464,10 @@ set_timer: void iwl_mvm_reorder_timer_expired(unsigned long data) { struct iwl_mvm_reorder_buffer *buf = (void *)data; + struct iwl_mvm_baid_data *baid_data = + iwl_mvm_baid_data_from_reorder_buf(buf); + struct iwl_mvm_reorder_buf_entry *entries = + &baid_data->entries[buf->queue * baid_data->entries_per_queue]; int i; u16 sn = 0, index = 0; bool expired = false; @@ -474,7 +483,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) for (i = 0; i < buf->buf_size ; i++) { index = (buf->head_sn + i) % buf->buf_size; - if (skb_queue_empty(&buf->entries[index])) { + if (skb_queue_empty(&entries[index].e.frames)) { /* * If there is a hole and the next frame didn't expire * we want to break and not advance SN @@ -482,7 +491,8 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) cont = false; continue; } - if (!cont && !time_after(jiffies, buf->reorder_time[index] + + if (!cont && + !time_after(jiffies, entries[index].e.reorder_time + RX_REORDER_BUF_TIMEOUT_MQ)) break; @@ -515,7 +525,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) * accordingly to this frame. */ mod_timer(&buf->reorder_timer, - buf->reorder_time[index] + + entries[index].e.reorder_time + 1 + RX_REORDER_BUF_TIMEOUT_MQ); } spin_unlock(&buf->lock); @@ -610,6 +620,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, u8 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; u8 sub_frame_idx = desc->amsdu_info & IWL_RX_MPDU_AMSDU_SUBFRAME_IDX_MASK; + struct iwl_mvm_reorder_buf_entry *entries; int index; u16 nssn, sn; u8 baid; @@ -660,6 +671,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, IWL_RX_MPDU_REORDER_SN_SHIFT; buffer = &baid_data->reorder_buf[queue]; + entries = &baid_data->entries[queue * baid_data->entries_per_queue]; spin_lock_bh(&buffer->lock); @@ -716,7 +728,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, * If it is the same SN then if the subframe index is incrementing it * is the same AMSDU - otherwise it is a retransmission. */ - tail = skb_peek_tail(&buffer->entries[index]); + tail = skb_peek_tail(&entries[index].e.frames); if (tail && !amsdu) goto drop; else if (tail && (sn != buffer->last_amsdu || @@ -724,9 +736,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, goto drop; /* put in reorder buffer */ - __skb_queue_tail(&buffer->entries[index], skb); + __skb_queue_tail(&entries[index].e.frames, skb); buffer->num_stored++; - buffer->reorder_time[index] = jiffies; + entries[index].e.reorder_time = jiffies; if (amsdu) { buffer->last_amsdu = sn; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 282424f40c43..43e18e75ef42 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2104,6 +2104,8 @@ static void iwl_mvm_free_reorder(struct iwl_mvm *mvm, int j; struct iwl_mvm_reorder_buffer *reorder_buf = &data->reorder_buf[i]; + struct iwl_mvm_reorder_buf_entry *entries = + &data->entries[i * data->entries_per_queue]; spin_lock_bh(&reorder_buf->lock); if (likely(!reorder_buf->num_stored)) { @@ -2119,7 +2121,7 @@ static void iwl_mvm_free_reorder(struct iwl_mvm *mvm, WARN_ON(1); for (j = 0; j < reorder_buf->buf_size; j++) - __skb_queue_purge(&reorder_buf->entries[j]); + __skb_queue_purge(&entries[j].e.frames); /* * Prevent timer re-arm. This prevents a very far fetched case * where we timed out on the notification. There may be prior @@ -2144,6 +2146,8 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, for (i = 0; i < mvm->trans->num_rx_queues; i++) { struct iwl_mvm_reorder_buffer *reorder_buf = &data->reorder_buf[i]; + struct iwl_mvm_reorder_buf_entry *entries = + &data->entries[i * data->entries_per_queue]; int j; reorder_buf->num_stored = 0; @@ -2161,7 +2165,7 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, reorder_buf->tid = data->tid; reorder_buf->valid = false; for (j = 0; j < reorder_buf->buf_size; j++) - __skb_queue_head_init(&reorder_buf->entries[j]); + __skb_queue_head_init(&entries[j].e.frames); } } @@ -2182,16 +2186,44 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } if (iwl_mvm_has_new_rx_api(mvm) && start) { + u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); + + /* sparse doesn't like the __align() so don't check */ +#ifndef __CHECKER__ + /* + * The division below will be OK if either the cache line size + * can be divided by the entry size (ALIGN will round up) or if + * if the entry size can be divided by the cache line size, in + * which case the ALIGN() will do nothing. + */ + BUILD_BUG_ON(SMP_CACHE_BYTES % sizeof(baid_data->entries[0]) && + sizeof(baid_data->entries[0]) % SMP_CACHE_BYTES); +#endif + + /* + * Upward align the reorder buffer size to fill an entire cache + * line for each queue, to avoid sharing cache lines between + * different queues. + */ + reorder_buf_size = ALIGN(reorder_buf_size, SMP_CACHE_BYTES); + /* * Allocate here so if allocation fails we can bail out early * before starting the BA session in the firmware */ baid_data = kzalloc(sizeof(*baid_data) + mvm->trans->num_rx_queues * - sizeof(baid_data->reorder_buf[0]), + reorder_buf_size, GFP_KERNEL); if (!baid_data) return -ENOMEM; + + /* + * This division is why we need the above BUILD_BUG_ON(), + * if that doesn't hold then this will not be right. + */ + baid_data->entries_per_queue = + reorder_buf_size / sizeof(baid_data->entries[0]); } cmd.mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color); From 3f1c4c58068757da94b07e0914321b443501e20f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 2 Oct 2017 12:07:59 +0300 Subject: [PATCH 1055/2177] iwlwifi: mvm: remove duplicated fields in mvm reorder buffer The reason station id and tid fields are both in baid data and in the reorder buffer per queue is that we couldn't access the baid_data in the reorder timer functions. Now that we do some pointer math and access it anyway, those fields can be removed. This save some space and some code. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 ---- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 7 ++++--- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +----- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 52a2f49132e7..e34b3eb8e08b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -585,11 +585,9 @@ enum iwl_mvm_tdls_cs_state { * @head_sn: reorder window head sn * @num_stored: number of mpdus stored in the buffer * @buf_size: the reorder buffer size as set by the last addba request - * @sta_id: sta id of this reorder buffer * @queue: queue of this reorder buffer * @last_amsdu: track last ASMDU SN for duplication detection * @last_sub_index: track ASMDU sub frame index for duplication detection - * @tid: the tid * @reorder_timer: timer for frames are in the reorder buffer. For AMSDU * it is the time of last received sub-frame * @removed: prevent timer re-arming @@ -601,11 +599,9 @@ struct iwl_mvm_reorder_buffer { u16 head_sn; u16 num_stored; u8 buf_size; - u8 sta_id; int queue; u16 last_amsdu; u8 last_sub_index; - u8 tid; struct timer_list reorder_timer; bool removed; bool valid; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index bb1a1ac9f6ed..4230b56bd52c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -505,17 +505,18 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) if (expired) { struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; + u8 sta_id = baid_data->sta_id; rcu_read_lock(); - sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[buf->sta_id]); + sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[sta_id]); mvmsta = iwl_mvm_sta_from_mac80211(sta); /* SN is set to the last expired frame + 1 */ IWL_DEBUG_HT(buf->mvm, "Releasing expired frames for sta %u, sn %d\n", - buf->sta_id, sn); + sta_id, sn); iwl_mvm_event_frame_timeout_callback(buf->mvm, mvmsta->vif, - sta, buf->tid); + sta, baid_data->tid); iwl_mvm_release_frames(buf->mvm, sta, NULL, buf, sn); rcu_read_unlock(); } else { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 43e18e75ef42..23787cc9c89e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2137,7 +2137,6 @@ static void iwl_mvm_free_reorder(struct iwl_mvm *mvm, } static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, - u32 sta_id, struct iwl_mvm_baid_data *data, u16 ssn, u8 buf_size) { @@ -2161,8 +2160,6 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, spin_lock_init(&reorder_buf->lock); reorder_buf->mvm = mvm; reorder_buf->queue = i; - reorder_buf->sta_id = sta_id; - reorder_buf->tid = data->tid; reorder_buf->valid = false; for (j = 0; j < reorder_buf->buf_size; j++) __skb_queue_head_init(&entries[j].e.frames); @@ -2294,8 +2291,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, mod_timer(&baid_data->session_timer, TU_TO_EXP_TIME(timeout * 2)); - iwl_mvm_init_reorder_buffer(mvm, mvm_sta->sta_id, - baid_data, ssn, buf_size); + iwl_mvm_init_reorder_buffer(mvm, baid_data, ssn, buf_size); /* * protect the BA data with RCU to cover a case where our * internal RX sync mechanism will timeout (not that it's From 76f4a85e1ddd1876d52c226d60dad15f29f6d9d7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Oct 2017 13:43:27 +0200 Subject: [PATCH 1056/2177] iwlwifi: mvm: pass baid_data to iwl_mvm_release_frames() All callers of iwl_mvm_release_frames() already have the baid_data pointer, so we don't need to (re)calculate it inside the function. Just pass it instead. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 4230b56bd52c..b84756dc9d6c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -409,11 +409,10 @@ static bool iwl_mvm_is_sn_less(u16 sn1, u16 sn2, u16 buffer_size) static void iwl_mvm_release_frames(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct napi_struct *napi, + struct iwl_mvm_baid_data *baid_data, struct iwl_mvm_reorder_buffer *reorder_buf, u16 nssn) { - struct iwl_mvm_baid_data *baid_data = - iwl_mvm_baid_data_from_reorder_buf(reorder_buf); struct iwl_mvm_reorder_buf_entry *entries = &baid_data->entries[reorder_buf->queue * baid_data->entries_per_queue]; @@ -517,7 +516,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) sta_id, sn); iwl_mvm_event_frame_timeout_callback(buf->mvm, mvmsta->vif, sta, baid_data->tid); - iwl_mvm_release_frames(buf->mvm, sta, NULL, buf, sn); + iwl_mvm_release_frames(buf->mvm, sta, NULL, baid_data, buf, sn); rcu_read_unlock(); } else { /* @@ -557,7 +556,7 @@ static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, /* release all frames that are in the reorder buffer to the stack */ spin_lock_bh(&reorder_buf->lock); - iwl_mvm_release_frames(mvm, sta, NULL, reorder_buf, + iwl_mvm_release_frames(mvm, sta, NULL, ba_data, reorder_buf, ieee80211_sn_add(reorder_buf->head_sn, reorder_buf->buf_size)); spin_unlock_bh(&reorder_buf->lock); @@ -685,7 +684,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, } if (ieee80211_is_back_req(hdr->frame_control)) { - iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn); + iwl_mvm_release_frames(mvm, sta, napi, baid_data, buffer, nssn); goto drop; } @@ -701,7 +700,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, !ieee80211_sn_less(sn, buffer->head_sn + buffer->buf_size)) { u16 min_sn = ieee80211_sn_less(sn, nssn) ? sn : nssn; - iwl_mvm_release_frames(mvm, sta, napi, buffer, min_sn); + iwl_mvm_release_frames(mvm, sta, napi, baid_data, buffer, + min_sn); } /* drop any oudated packets */ @@ -758,7 +758,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, * release notification with up to date NSSN. */ if (!amsdu || last_subframe) - iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn); + iwl_mvm_release_frames(mvm, sta, napi, baid_data, buffer, nssn); spin_unlock_bh(&buffer->lock); return true; @@ -1078,7 +1078,7 @@ void iwl_mvm_rx_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi, reorder_buf = &ba_data->reorder_buf[queue]; spin_lock_bh(&reorder_buf->lock); - iwl_mvm_release_frames(mvm, sta, napi, reorder_buf, + iwl_mvm_release_frames(mvm, sta, napi, ba_data, reorder_buf, le16_to_cpu(release->nssn)); spin_unlock_bh(&reorder_buf->lock); From 0e1be40a45d767a07c1fb50ebfba273368f57484 Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Thu, 28 Sep 2017 12:10:55 +0300 Subject: [PATCH 1057/2177] iwlwifi: mvm: allow reading UMAC error data from SMEM in A000 devices Currently, UMAC error data reading is restricted to DCCM. A000 NICs use SMEM for this data. Signed-off-by: Beni Lev Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 2da1b088ac01..4ade688db63b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -459,7 +459,8 @@ static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm) base = mvm->umac_error_event_table; - if (base < 0x800000) { + if (base < (mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000 ? + 0x400000 : 0x800000)) { IWL_ERR(mvm, "Not valid error log pointer 0x%08X for %s uCode\n", base, From fb5b28469d2a5be83aae0c754a9a69bb37fec6ff Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 2 Oct 2017 15:44:20 +0300 Subject: [PATCH 1058/2177] iwlwifi: mvm: move umac_error_event_table validity check to where it's set There's no point in checking the validity of the umac_error_event_table pointer every time we generate a dump. It's cleaner to do so when we read the value, namely when we receive the alive data. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 13 +++++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 18 ++++-------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index f476882291ae..70d0505a176e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -200,10 +200,19 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, mvm->umac_error_event_table = le32_to_cpu(umac->error_info_addr); + if (mvm->umac_error_event_table < + (mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000 ? + 0x400000 : 0x800000)) + IWL_ERR(mvm, + "Not valid error log pointer 0x%08X for %s uCode\n", + mvm->umac_error_event_table, + (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) ? + "Init" : "RT"); + else + mvm->support_umac_log = true; + alive_data->scd_base_addr = le32_to_cpu(lmac1->scd_base_ptr); alive_data->valid = status == IWL_ALIVE_STATUS_OK; - if (mvm->umac_error_event_table) - mvm->support_umac_log = true; IWL_DEBUG_FW(mvm, "Alive ucode status 0x%04x revision 0x%01X 0x%01X\n", diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 4ade688db63b..d46115e2d69e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -455,21 +455,12 @@ static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm) { struct iwl_trans *trans = mvm->trans; struct iwl_umac_error_event_table table; - u32 base; - base = mvm->umac_error_event_table; - - if (base < (mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000 ? - 0x400000 : 0x800000)) { - IWL_ERR(mvm, - "Not valid error log pointer 0x%08X for %s uCode\n", - base, - (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) - ? "Init" : "RT"); + if (!mvm->support_umac_log) return; - } - iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table)); + iwl_trans_read_mem_bytes(trans, mvm->umac_error_event_table, &table, + sizeof(table)); if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { IWL_ERR(trans, "Start IWL Error Log Dump:\n"); @@ -609,8 +600,7 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) if (mvm->error_event_table[1]) iwl_mvm_dump_lmac_error_log(mvm, mvm->error_event_table[1]); - if (mvm->support_umac_log) - iwl_mvm_dump_umac_error_log(mvm); + iwl_mvm_dump_umac_error_log(mvm); } int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq) From 3485e76e73495382f953d3a6dd45c00c7c404e4a Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 2 Oct 2017 16:12:23 +0300 Subject: [PATCH 1059/2177] iwlwifi: define minimum valid address for umac_error_event_table in cfg We now have two different minimum valid values for umac_error_event_table. To avoid hardcoding the minimum value in the driver, add a value to cfg where it can be read from. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/8000.c | 3 ++- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 3 ++- drivers/net/wireless/intel/iwlwifi/cfg/a000.c | 3 ++- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 17 +++++++++++------ 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c index c2a5936ccede..1dce74afcd75 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c @@ -165,7 +165,8 @@ static const struct iwl_tt_params iwl8000_tt_params = { .thermal_params = &iwl8000_tt_params, \ .apmg_not_supported = true, \ .nvm_type = IWL_NVM_EXT, \ - .dbgc_supported = true + .dbgc_supported = true, \ + .min_umac_error_event_table = 0x800000 #define IWL_DEVICE_8000 \ IWL_DEVICE_8000_COMMON, \ diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index e8b5ff42f5a8..af7c4f36b66f 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -149,7 +149,8 @@ static const struct iwl_tt_params iwl9000_tt_params = { .mac_addr_from_csr = true, \ .rf_id = true, \ .nvm_type = IWL_NVM_EXT, \ - .dbgc_supported = true + .dbgc_supported = true, \ + .min_umac_error_event_table = 0x800000 const struct iwl_cfg iwl9160_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9160", diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c index c911d55fde49..ea8206515171 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c @@ -135,7 +135,8 @@ static const struct iwl_ht_params iwl_a000_ht_params = { .gen2 = true, \ .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true, \ - .tx_cmd_queue_size = 32 + .tx_cmd_queue_size = 32, \ + .min_umac_error_event_table = 0x400000 const struct iwl_cfg iwla000_2ac_cfg_hr = { .name = "Intel(R) Dual Band Wireless AC a000", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 0347a03b6f1b..86a796025750 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -396,6 +396,7 @@ struct iwl_cfg { u8 max_vht_ampdu_exponent; u8 ucode_api_max; u8 ucode_api_min; + u32 min_umac_error_event_table; }; /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 70d0505a176e..0296df625cd5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -176,6 +176,7 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, struct iwl_lmac_alive *lmac1; struct iwl_lmac_alive *lmac2 = NULL; u16 status; + u32 umac_error_event_table; if (iwl_rx_packet_payload_len(pkt) == sizeof(*palive)) { palive = (void *)pkt->data; @@ -198,18 +199,22 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, mvm->sf_space.addr = le32_to_cpu(lmac1->st_fwrd_addr); mvm->sf_space.size = le32_to_cpu(lmac1->st_fwrd_size); - mvm->umac_error_event_table = le32_to_cpu(umac->error_info_addr); + umac_error_event_table = le32_to_cpu(umac->error_info_addr); - if (mvm->umac_error_event_table < - (mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000 ? - 0x400000 : 0x800000)) + if (!umac_error_event_table) { + mvm->support_umac_log = false; + } else if (umac_error_event_table >= + mvm->trans->cfg->min_umac_error_event_table) { + mvm->support_umac_log = true; + mvm->umac_error_event_table = umac_error_event_table; + } else { IWL_ERR(mvm, "Not valid error log pointer 0x%08X for %s uCode\n", mvm->umac_error_event_table, (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) ? "Init" : "RT"); - else - mvm->support_umac_log = true; + mvm->support_umac_log = false; + } alive_data->scd_base_addr = le32_to_cpu(lmac1->scd_base_ptr); alive_data->valid = status == IWL_ALIVE_STATUS_OK; From 41fd2fec56db2564f02532ed7244e1f69193b4ad Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Mon, 9 Oct 2017 11:01:33 +0300 Subject: [PATCH 1060/2177] iwlwifi: mvm: add missing lq_color In the compressed BA notif, the driver didn't parse out the LQ color, so statistics for the rates tried were always thrown out. Add it so it gets correctly used. While at it, fix the name of the relevant field in the struct. Fixes: c46e7724bfe9 ("iwlwifi: mvm: support new BA notification response") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/tx.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h index 14ad9fb895f9..f5d5ba7e37ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h @@ -710,7 +710,7 @@ enum iwl_mvm_ba_resp_flags { * @reduced_txp: power reduced according to TPC. This is the actual value and * not a copy from the LQ command. Thus, if not the first rate was used * for Tx-ing then this value will be set to 0 by FW. - * @initial_rate: TLC rate info, initial rate index, TLC table color + * @tlc_rate_info: TLC rate info, initial rate index, TLC table color * @retry_cnt: retry count * @query_byte_cnt: SCD query byte count * @query_frame_cnt: SCD query frame count @@ -730,7 +730,7 @@ struct iwl_mvm_compressed_ba_notif { __le32 flags; u8 sta_id; u8 reduced_txp; - u8 initial_rate; + u8 tlc_rate_info; u8 retry_cnt; __le32 query_byte_cnt; __le16 query_frame_cnt; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 6f2e2af23219..00a0efab20e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1746,6 +1746,7 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) if (iwl_mvm_has_new_tx_api(mvm)) { struct iwl_mvm_compressed_ba_notif *ba_res = (void *)pkt->data; + u8 lq_color = TX_RES_RATE_TABLE_COL_GET(ba_res->tlc_rate_info); int i; sta_id = ba_res->sta_id; @@ -1759,11 +1760,18 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) if (!le16_to_cpu(ba_res->tfd_cnt)) goto out; + rcu_read_lock(); + + mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id); + if (!mvmsta) + goto out_unlock; + /* Free per TID */ for (i = 0; i < le16_to_cpu(ba_res->tfd_cnt); i++) { struct iwl_mvm_compressed_ba_tfd *ba_tfd = &ba_res->tfd[i]; + mvmsta->tid_data[i].lq_color = lq_color; iwl_mvm_tx_reclaim(mvm, sta_id, ba_tfd->tid, (int)(le16_to_cpu(ba_tfd->q_num)), le16_to_cpu(ba_tfd->tfd_index), @@ -1771,6 +1779,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) le32_to_cpu(ba_res->tx_rate)); } +out_unlock: + rcu_read_unlock(); out: IWL_DEBUG_TX_REPLY(mvm, "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n", From 1105a337375258515ed09b92a83fd7bfd6775958 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 12 Oct 2017 11:20:50 +0300 Subject: [PATCH 1061/2177] iwlwifi: pcie: sort IDs for the 9000 series for easier comparisons It's hard to find values that are missing in the list, so sorting the values and comparing them makes it much easier. To simplify this task, sort the devices in the list. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index ce7254ec0514..aa3c07192624 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -512,65 +512,65 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)}, /* 9000 Series */ - {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, - {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, - {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0034, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0038, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x003C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA370, 0x003C, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x003C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0034, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0038, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x003C, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x003C, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg_hr_cdb)}, From 3c798a45318e098e9937b0fee1e0cf986174fbbe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Oct 2017 11:54:03 +0200 Subject: [PATCH 1062/2177] iwlwifi: pcie: remove set but not used variable tcph This variable is never used, so remove the code to set it. After this, the variable 'iph' also has the same fate. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 79e4c73a9709..16b345f54ff0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -289,8 +289,7 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct sk_buff *csum_skb = NULL; unsigned int tb_len; dma_addr_t tb_phys; - struct tcphdr *tcph; - u8 *iph, *subf_hdrs_start = hdr_page->pos; + u8 *subf_hdrs_start = hdr_page->pos; total_len -= data_left; @@ -312,8 +311,6 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, * as MAC header. */ tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len); - iph = hdr_page->pos + 8; - tcph = (void *)(iph + ip_hdrlen); hdr_page->pos += snap_ip_tcp_hdrlen; From a68f4a27f55f1d54e35c270aff89383da4b1b656 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2017 11:36:39 +0100 Subject: [PATCH 1063/2177] rxrpc: Support service upgrade from a kernel service Provide support for a kernel service to make use of the service upgrade facility. This involves: (1) Pass an upgrade request flag to rxrpc_kernel_begin_call(). (2) Make rxrpc_kernel_recv_data() return the call's current service ID so that the caller can detect service upgrade and see what the service was upgraded to. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 17 +++++++++++++++-- fs/afs/internal.h | 1 + fs/afs/rxrpc.c | 11 +++++++---- include/net/af_rxrpc.h | 5 +++-- net/rxrpc/af_rxrpc.c | 5 ++++- net/rxrpc/recvmsg.c | 5 ++++- 6 files changed, 34 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 810620153a44..9fb61a6bc7cf 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -782,7 +782,9 @@ The kernel interface functions are as follows: struct key *key, unsigned long user_call_ID, s64 tx_total_len, - gfp_t gfp); + gfp_t gfp, + rxrpc_notify_rx_t notify_rx, + bool upgrade); This allocates the infrastructure to make a new RxRPC call and assigns call and connection numbers. The call will be made on the UDP port that @@ -803,6 +805,13 @@ The kernel interface functions are as follows: allows the kernel to encrypt directly to the packet buffers, thereby saving a copy. The value may not be less than -1. + notify_rx is a pointer to a function to be called when events such as + incoming data packets or remote aborts happen. + + upgrade should be set to true if a client operation should request that + the server upgrade the service to a better one. The resultant service ID + is returned by rxrpc_kernel_recv_data(). + If this function is successful, an opaque reference to the RxRPC call is returned. The caller now holds a reference on this and it must be properly ended. @@ -850,7 +859,8 @@ The kernel interface functions are as follows: size_t size, size_t *_offset, bool want_more, - u32 *_abort) + u32 *_abort, + u16 *_service) This is used to receive data from either the reply part of a client call or the request part of a service call. buf and size specify how much @@ -873,6 +883,9 @@ The kernel interface functions are as follows: If a remote ABORT is detected, the abort code received will be stored in *_abort and ECONNABORTED will be returned. + The service ID that the call ended up with is returned into *_service. + This can be used to see if a call got a service upgrade. + (*) Abort a call. void rxrpc_kernel_abort_call(struct socket *sock, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 82e16556afea..3f03f7888302 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -100,6 +100,7 @@ struct afs_call { bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ + bool upgrade; /* T to request service upgrade */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ u32 operation_ID; /* operation ID for an incoming call */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 0bf191f0dbaf..172a4f9747ac 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -387,7 +387,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, tx_total_len, gfp, (async ? afs_wake_up_async_call : - afs_wake_up_call_waiter)); + afs_wake_up_call_waiter), + call->upgrade); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -443,7 +444,7 @@ error_do_abort: abort_code = 0; offset = 0; rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, - false, &abort_code); + false, &abort_code, &call->service_id); ret = call->type->abort_to_error(abort_code); } error_kill_call: @@ -471,7 +472,8 @@ static void afs_deliver_to_call(struct afs_call *call) size_t offset = 0; ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, NULL, 0, &offset, false, - &call->abort_code); + &call->abort_code, + &call->service_id); trace_afs_recv_data(call, 0, offset, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) @@ -851,7 +853,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, buf, count, &call->offset, - want_more, &call->abort_code); + want_more, &call->abort_code, + &call->service_id); trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 3ac79150291f..820dd365a08e 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,12 +49,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, unsigned long, s64, gfp_t, - rxrpc_notify_rx_t); + rxrpc_notify_rx_t, + bool); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, - void *, size_t, size_t *, bool, u32 *); + void *, size_t, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fb17552fd292..481f7dc90ba2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -265,6 +265,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @tx_total_len: Total length of data to transmit during the call (or -1) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue + * @upgrade: Request service upgrade for call * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -279,7 +280,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, unsigned long user_call_ID, s64 tx_total_len, gfp_t gfp, - rxrpc_notify_rx_t notify_rx) + rxrpc_notify_rx_t notify_rx, + bool upgrade) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -304,6 +306,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.key = key; cp.security_level = 0; cp.exclusive = false; + cp.upgrade = upgrade; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, gfp); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index bdece21f313d..e4937b3f3685 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -607,6 +607,7 @@ wait_error: * @_offset: The running offset into the buffer. * @want_more: True if more data is expected to be read * @_abort: Where the abort code is stored if -ECONNABORTED is returned + * @_service: Where to store the actual service ID (may be upgraded) * * Allow a kernel service to receive data and pick up information about the * state of a call. Returns 0 if got what was asked for and there's more @@ -624,7 +625,7 @@ wait_error: */ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, void *buf, size_t size, size_t *_offset, - bool want_more, u32 *_abort) + bool want_more, u32 *_abort, u16 *_service) { struct iov_iter iter; struct kvec iov; @@ -680,6 +681,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, read_phase_complete: ret = 1; out: + if (_service) + *_service = call->service_id; mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); return ret; From f4d15fb6f99af9b99f688bd87579137be44f85ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2017 11:07:31 +0100 Subject: [PATCH 1064/2177] rxrpc: Provide functions for allowing cleaner handling of signals Provide a couple of functions to allow cleaner handling of signals in a kernel service. They are: (1) rxrpc_kernel_get_rtt() This allows the kernel service to find out the RTT time for a call, so as to better judge how large a timeout to employ. Note, though, that whilst this returns a value in nanoseconds, the timeouts can only actually be in jiffies. (2) rxrpc_kernel_check_life() This returns a number that is updated when ACKs are received from the peer (notably including PING RESPONSE ACKs which we can elicit by sending PING ACKs to see if the call still exists on the server). The caller should compare the numbers of two calls to see if the call is still alive. These can be used to provide an extending timeout rather than returning immediately in the case that a signal occurs that would otherwise abort an RPC operation. The timeout would be extended if the server is still responsive and the call is still apparently alive on the server. For most operations this isn't that necessary - but for FS.StoreData it is: OpenAFS writes the data to storage as it comes in without making a backup, so if we immediately abort it when partially complete on a CTRL+C, say, we have no idea of the state of the file after the abort. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 24 ++++++++++++++++++++++++ include/net/af_rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 19 +++++++++++++++++++ net/rxrpc/peer_object.c | 13 +++++++++++++ 4 files changed, 58 insertions(+) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 9fb61a6bc7cf..1fb5c553aedd 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1033,6 +1033,30 @@ The kernel interface functions are as follows: It returns 0 if the call was requeued and an error otherwise. + (*) Get call RTT. + + u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call); + + Get the RTT time to the peer in use by a call. The value returned is in + nanoseconds. + + (*) Check call still alive. + + u32 rxrpc_kernel_check_life(struct socket *sock, + struct rxrpc_call *call); + + This returns a number that is updated when ACKs are received from the peer + (notably including PING RESPONSE ACKs which we can elicit by sending PING + ACKs to see if the call still exists on the server). The caller should + compare the numbers of two calls to see if the call is still alive after + waiting for a suitable interval. + + This allows the caller to work out if the server is still contactable and + if the call is still alive on the server whilst waiting for the server to + process a client operation. + + This function may transmit a PING ACK. + ======================= CONFIGURABLE PARAMETERS diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 820dd365a08e..2b3a6eec4570 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -61,6 +61,7 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); @@ -68,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); +u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 481f7dc90ba2..73c980e26581 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -339,6 +339,25 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) } EXPORT_SYMBOL(rxrpc_kernel_end_call); +/** + * rxrpc_kernel_check_life - Check to see whether a call is still alive + * @sock: The socket the call is on + * @call: The call to check + * + * Allow a kernel service to find out whether a call is still alive - ie. we're + * getting ACKs from the server. Returns a number representing the life state + * which can be compared to that returned by a previous call. + * + * If this is a client call, ping ACKs will be sent to the server to find out + * whether it's still responsive and whether the call is still alive on the + * server. + */ +u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call) +{ + return call->acks_latest; +} +EXPORT_SYMBOL(rxrpc_kernel_check_life); + /** * rxrpc_kernel_check_call - Check a call's state * @sock: The socket the call is on diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 5787f97f5330..d02a99f37f5f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, *_srx = call->peer->srx; } EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_get_rtt - Get a call's peer RTT + * @sock: The socket on which the call is in progress. + * @call: The call to query + * + * Get the call's peer RTT. + */ +u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +{ + return call->peer->rtt; +} +EXPORT_SYMBOL(rxrpc_kernel_get_rtt); From bc5e3a546d553e5223851fc199e69040eb70f68b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2017 11:07:31 +0100 Subject: [PATCH 1065/2177] rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals Make AF_RXRPC accept MSG_WAITALL as a flag to sendmsg() to tell it to ignore signals whilst loading up the message queue, provided progress is being made in emptying the queue at the other side. Progress is defined as the base of the transmit window having being advanced within 2 RTT periods. If the period is exceeded with no progress, sendmsg() will return anyway, indicating how much data has been copied, if any. Once the supplied buffer is entirely decanted, the sendmsg() will return. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 12 ++++ fs/afs/rxrpc.c | 31 +++++++-- net/rxrpc/sendmsg.c | 107 ++++++++++++++++++++++------- 3 files changed, 119 insertions(+), 31 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 1fb5c553aedd..b5407163d53b 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -280,6 +280,18 @@ Interaction with the user of the RxRPC socket: nominated by a socket option. +Notes on sendmsg: + + (*) MSG_WAITALL can be set to tell sendmsg to ignore signals if the peer is + making progress at accepting packets within a reasonable time such that we + manage to queue up all the data for transmission. This requires the + client to accept at least one packet per 2*RTT time period. + + If this isn't set, sendmsg() will return immediately, either returning + EINTR/ERESTARTSYS if nothing was consumed or returning the amount of data + consumed. + + Notes on recvmsg: (*) If there's a sequence of data messages belonging to a particular call on diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 172a4f9747ac..bb1e2caa1720 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -407,7 +407,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = (call->send_pages ? MSG_MORE : 0); + msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); /* We have to change the state *before* sending the last packet as * rxrpc might give us the reply before it returns from sending the @@ -538,15 +538,26 @@ call_complete: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { + signed long rtt2, timeout; int ret; + u64 rtt; + u32 life, last_life; DECLARE_WAITQUEUE(myself, current); _enter(""); + rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall); + rtt2 = nsecs_to_jiffies64(rtt) * 2; + if (rtt2 < 2) + rtt2 = 2; + + timeout = rtt2; + last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + add_wait_queue(&call->waitq, &myself); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); /* deliver any messages that are in the queue */ if (call->state < AFS_CALL_COMPLETE && call->need_attention) { @@ -556,10 +567,20 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - if (call->state == AFS_CALL_COMPLETE || - signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE) break; - schedule(); + + life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + if (timeout == 0 && + life == last_life && signal_pending(current)) + break; + + if (life != last_life) { + timeout = rtt2; + last_life = life; + } + + timeout = schedule_timeout(timeout); } remove_wait_queue(&call->waitq, &myself); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9ea6f972767e..2d9edc656ca3 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -37,13 +37,87 @@ struct rxrpc_send_params { bool upgrade; /* If the connection is upgradeable */ }; +/* + * Wait for space to appear in the Tx queue or a signal to occur. + */ +static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (signal_pending(current)) + return sock_intr_errno(*timeo); + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + mutex_unlock(&call->user_mutex); + *timeo = schedule_timeout(*timeo); + if (mutex_lock_interruptible(&call->user_mutex) < 0) + return sock_intr_errno(*timeo); + } +} + +/* + * Wait for space to appear in the Tx queue uninterruptibly, but with + * a timeout of 2*RTT if no progress was made and a signal occurred. + */ +static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, + struct rxrpc_call *call) +{ + rxrpc_seq_t tx_start, tx_win; + signed long rtt2, timeout; + u64 rtt; + + rtt = READ_ONCE(call->peer->rtt); + rtt2 = nsecs_to_jiffies64(rtt) * 2; + if (rtt2 < 1) + rtt2 = 1; + + timeout = rtt2; + tx_start = READ_ONCE(call->tx_hard_ack); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + + tx_win = READ_ONCE(call->tx_hard_ack); + if (call->tx_top - tx_win < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (timeout == 0 && + tx_win == tx_start && signal_pending(current)) + return -EINTR; + + if (tx_win != tx_start) { + timeout = rtt2; + tx_start = tx_win; + } + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + timeout = schedule_timeout(timeout); + } +} + /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked */ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, struct rxrpc_call *call, - long *timeo) + long *timeo, + bool waitall) { DECLARE_WAITQUEUE(myself, current); int ret; @@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, add_wait_queue(&call->waitq, &myself); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - ret = 0; - if (call->tx_top - call->tx_hard_ack < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) - break; - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = call->error; - break; - } - if (signal_pending(current)) { - ret = sock_intr_errno(*timeo); - break; - } - - trace_rxrpc_transmit(call, rxrpc_transmit_wait); - mutex_unlock(&call->user_mutex); - *timeo = schedule_timeout(*timeo); - if (mutex_lock_interruptible(&call->user_mutex) < 0) { - ret = sock_intr_errno(*timeo); - break; - } - } + if (waitall) + ret = rxrpc_wait_for_tx_window_nonintr(rx, call); + else + ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); @@ -254,7 +308,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; ret = rxrpc_wait_for_tx_window(rx, call, - &timeo); + &timeo, + msg->msg_flags & MSG_WAITALL); if (ret < 0) goto maybe_error; } From 4b70c62b9eafcee0505b440732d2e00c50f3085d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 13 Oct 2017 12:16:38 +0800 Subject: [PATCH 1066/2177] net: ftgmac100: Request clock and set speed According to the ASPEED datasheet, gigabit speeds require a clock of 100MHz or higher. Other speeds require 25MHz or higher. This patch configures a 100MHz clock if the system has a direct-attached PHY, or 25MHz if the system is running NC-SI which is limited to 100MHz. There appear to be no other upstream users of the FTGMAC100 driver it is hard to know the clocking requirements of other platforms. Therefore a conservative approach was taken with enabling clocks. If the platform is not ASPEED, both requesting the clock and configuring the speed is skipped. Signed-off-by: Joel Stanley Tested-by: Andrew Jeffery Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9ed8e4b81530..78db8e62a83f 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -59,6 +60,9 @@ /* Min number of tx ring entries before stopping queue */ #define TX_THRESHOLD (MAX_SKB_FRAGS + 1) +#define FTGMAC_100MHZ 100000000 +#define FTGMAC_25MHZ 25000000 + struct ftgmac100 { /* Registers */ struct resource *res; @@ -96,6 +100,7 @@ struct ftgmac100 { struct napi_struct napi; struct work_struct reset_task; struct mii_bus *mii_bus; + struct clk *clk; /* Link management */ int cur_speed; @@ -1734,6 +1739,22 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) nd->link_up ? "up" : "down"); } +static void ftgmac100_setup_clk(struct ftgmac100 *priv) +{ + priv->clk = devm_clk_get(priv->dev, NULL); + if (IS_ERR(priv->clk)) + return; + + clk_prepare_enable(priv->clk); + + /* Aspeed specifies a 100MHz clock is required for up to + * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz + * is sufficient + */ + clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : + FTGMAC_100MHZ); +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1830,6 +1851,9 @@ static int ftgmac100_probe(struct platform_device *pdev) goto err_setup_mdio; } + if (priv->is_aspeed) + ftgmac100_setup_clk(priv); + /* Default ring sizes */ priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES; priv->tx_q_entries = priv->new_tx_q_entries = DEF_TX_QUEUE_ENTRIES; @@ -1883,6 +1907,8 @@ static int ftgmac100_remove(struct platform_device *pdev) unregister_netdev(netdev); + clk_disable_unprepare(priv->clk); + /* There's a small chance the reset task will have been re-queued, * during stop, make sure it's gone before we free the structure. */ From 6710e1126934d8b4372b4d2f9ae1646cd3f151bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:28 +0200 Subject: [PATCH 1067/2177] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP The 'cpumap' is primarily used as a backend map for XDP BPF helper call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. This patch implement the main part of the map. It is not connected to the XDP redirect system yet, and no SKB allocation are done yet. The main concern in this patch is to ensure the datapath can run without any locking. This adds complexity to the setup and tear-down procedure, which assumptions are extra carefully documented in the code comments. V2: - make sure array isn't larger than NR_CPUS - make sure CPUs added is a valid possible CPU V3: fix nitpicks from Jakub Kicinski V5: - Restrict map allocation to root / CAP_SYS_ADMIN - WARN_ON_ONCE if queue is not empty on tear-down - Return -EPERM on memlock limit instead of -ENOMEM - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup() - Moved cpu_map_enqueue() to next patch V6: all notice by Daniel Borkmann - Fix err return code in cpu_map_alloc() introduced in V5 - Move cpu_possible() check after max_entries boundary check - Forbid usage initially in check_map_func_compatibility() V7: - Fix alloc error path spotted by Daniel Borkmann - Did stress test adding+removing CPUs from the map concurrently - Fixed refcnt issue on cpu_map_entry, kthread started too soon - Make sure packets are flushed during tear-down, involved use of rcu_barrier() and kthread_run only exit after queue is empty - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring V8: - Nitpicking comments and gramma by Edward Cree - Fix missing semi-colon introduced in V7 due to rebasing - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/Makefile | 1 + kernel/bpf/cpumap.c | 560 +++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 8 +- kernel/bpf/verifier.c | 5 + tools/include/uapi/linux/bpf.h | 1 + 7 files changed, 576 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/cpumap.c diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 6f1a567667b8..814c1081a4a9 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) #ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6db9e1d679cd..4303fb6c3817 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, }; enum bpf_prog_type { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 53fb09f92e3f..e597daae6120 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list obj-$(CONFIG_BPF_SYSCALL) += disasm.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o +obj-$(CONFIG_BPF_SYSCALL) += cpumap.o ifeq ($(CONFIG_STREAM_PARSER),y) obj-$(CONFIG_BPF_SYSCALL) += sockmap.o endif diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c new file mode 100644 index 000000000000..e1e25ddba038 --- /dev/null +++ b/kernel/bpf/cpumap.c @@ -0,0 +1,560 @@ +/* bpf/cpumap.c + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ + +/* The 'cpumap' is primarily used as a backend map for XDP BPF helper + * call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. + * + * Unlike devmap which redirects XDP frames out another NIC device, + * this map type redirects raw XDP frames to another CPU. The remote + * CPU will do SKB-allocation and call the normal network stack. + * + * This is a scalability and isolation mechanism, that allow + * separating the early driver network XDP layer, from the rest of the + * netstack, and assigning dedicated CPUs for this stage. This + * basically allows for 10G wirespeed pre-filtering via bpf. + */ +#include +#include +#include + +#include +#include +#include +#include + +/* General idea: XDP packets getting XDP redirected to another CPU, + * will maximum be stored/queued for one driver ->poll() call. It is + * guaranteed that setting flush bit and flush operation happen on + * same CPU. Thus, cpu_map_flush operation can deduct via this_cpu_ptr() + * which queue in bpf_cpu_map_entry contains packets. + */ + +#define CPU_MAP_BULK_SIZE 8 /* 8 == one cacheline on 64-bit archs */ +struct xdp_bulk_queue { + void *q[CPU_MAP_BULK_SIZE]; + unsigned int count; +}; + +/* Struct for every remote "destination" CPU in map */ +struct bpf_cpu_map_entry { + u32 qsize; /* Queue size placeholder for map lookup */ + + /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ + struct xdp_bulk_queue __percpu *bulkq; + + /* Queue with potential multi-producers, and single-consumer kthread */ + struct ptr_ring *queue; + struct task_struct *kthread; + struct work_struct kthread_stop_wq; + + atomic_t refcnt; /* Control when this struct can be free'ed */ + struct rcu_head rcu; +}; + +struct bpf_cpu_map { + struct bpf_map map; + /* Below members specific for map type */ + struct bpf_cpu_map_entry **cpu_map; + unsigned long __percpu *flush_needed; +}; + +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, + struct xdp_bulk_queue *bq); + +static u64 cpu_map_bitmap_size(const union bpf_attr *attr) +{ + return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); +} + +static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) +{ + struct bpf_cpu_map *cmap; + int err = -ENOMEM; + u64 cost; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + return ERR_PTR(-EINVAL); + + cmap = kzalloc(sizeof(*cmap), GFP_USER); + if (!cmap) + return ERR_PTR(-ENOMEM); + + /* mandatory map attributes */ + cmap->map.map_type = attr->map_type; + cmap->map.key_size = attr->key_size; + cmap->map.value_size = attr->value_size; + cmap->map.max_entries = attr->max_entries; + cmap->map.map_flags = attr->map_flags; + cmap->map.numa_node = bpf_map_attr_numa_node(attr); + + /* Pre-limit array size based on NR_CPUS, not final CPU check */ + if (cmap->map.max_entries > NR_CPUS) { + err = -E2BIG; + goto free_cmap; + } + + /* make sure page count doesn't overflow */ + cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); + cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); + if (cost >= U32_MAX - PAGE_SIZE) + goto free_cmap; + cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + /* Notice returns -EPERM on if map size is larger than memlock limit */ + ret = bpf_map_precharge_memlock(cmap->map.pages); + if (ret) { + err = ret; + goto free_cmap; + } + + /* A per cpu bitfield with a bit per possible CPU in map */ + cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), + __alignof__(unsigned long)); + if (!cmap->flush_needed) + goto free_cmap; + + /* Alloc array for possible remote "destination" CPUs */ + cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * + sizeof(struct bpf_cpu_map_entry *), + cmap->map.numa_node); + if (!cmap->cpu_map) + goto free_percpu; + + return &cmap->map; +free_percpu: + free_percpu(cmap->flush_needed); +free_cmap: + kfree(cmap); + return ERR_PTR(err); +} + +void __cpu_map_queue_destructor(void *ptr) +{ + /* The tear-down procedure should have made sure that queue is + * empty. See __cpu_map_entry_replace() and work-queue + * invoked cpu_map_kthread_stop(). Catch any broken behaviour + * gracefully and warn once. + */ + if (WARN_ON_ONCE(ptr)) + page_frag_free(ptr); +} + +static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) +{ + if (atomic_dec_and_test(&rcpu->refcnt)) { + /* The queue should be empty at this point */ + ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor); + kfree(rcpu->queue); + kfree(rcpu); + } +} + +static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) +{ + atomic_inc(&rcpu->refcnt); +} + +/* called from workqueue, to workaround syscall using preempt_disable */ +static void cpu_map_kthread_stop(struct work_struct *work) +{ + struct bpf_cpu_map_entry *rcpu; + + rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); + + /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, + * as it waits until all in-flight call_rcu() callbacks complete. + */ + rcu_barrier(); + + /* kthread_stop will wake_up_process and wait for it to complete */ + kthread_stop(rcpu->kthread); +} + +static int cpu_map_kthread_run(void *data) +{ + struct bpf_cpu_map_entry *rcpu = data; + + set_current_state(TASK_INTERRUPTIBLE); + + /* When kthread gives stop order, then rcpu have been disconnected + * from map, thus no new packets can enter. Remaining in-flight + * per CPU stored packets are flushed to this queue. Wait honoring + * kthread_stop signal until queue is empty. + */ + while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { + struct xdp_pkt *xdp_pkt; + + schedule(); + /* Do work */ + while ((xdp_pkt = ptr_ring_consume(rcpu->queue))) { + /* For now just "refcnt-free" */ + page_frag_free(xdp_pkt); + } + __set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + put_cpu_map_entry(rcpu); + return 0; +} + +struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) +{ + gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; + struct bpf_cpu_map_entry *rcpu; + int numa, err; + + /* Have map->numa_node, but choose node of redirect target CPU */ + numa = cpu_to_node(cpu); + + rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa); + if (!rcpu) + return NULL; + + /* Alloc percpu bulkq */ + rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq), + sizeof(void *), gfp); + if (!rcpu->bulkq) + goto free_rcu; + + /* Alloc queue */ + rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa); + if (!rcpu->queue) + goto free_bulkq; + + err = ptr_ring_init(rcpu->queue, qsize, gfp); + if (err) + goto free_queue; + + rcpu->qsize = qsize; + + /* Setup kthread */ + rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, + "cpumap/%d/map:%d", cpu, map_id); + if (IS_ERR(rcpu->kthread)) + goto free_ptr_ring; + + get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ + get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ + + /* Make sure kthread runs on a single CPU */ + kthread_bind(rcpu->kthread, cpu); + wake_up_process(rcpu->kthread); + + return rcpu; + +free_ptr_ring: + ptr_ring_cleanup(rcpu->queue, NULL); +free_queue: + kfree(rcpu->queue); +free_bulkq: + free_percpu(rcpu->bulkq); +free_rcu: + kfree(rcpu); + return NULL; +} + +void __cpu_map_entry_free(struct rcu_head *rcu) +{ + struct bpf_cpu_map_entry *rcpu; + int cpu; + + /* This cpu_map_entry have been disconnected from map and one + * RCU graze-period have elapsed. Thus, XDP cannot queue any + * new packets and cannot change/set flush_needed that can + * find this entry. + */ + rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); + + /* Flush remaining packets in percpu bulkq */ + for_each_online_cpu(cpu) { + struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); + + /* No concurrent bq_enqueue can run at this point */ + bq_flush_to_queue(rcpu, bq); + } + free_percpu(rcpu->bulkq); + /* Cannot kthread_stop() here, last put free rcpu resources */ + put_cpu_map_entry(rcpu); +} + +/* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to + * ensure any driver rcu critical sections have completed, but this + * does not guarantee a flush has happened yet. Because driver side + * rcu_read_lock/unlock only protects the running XDP program. The + * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a + * pending flush op doesn't fail. + * + * The bpf_cpu_map_entry is still used by the kthread, and there can + * still be pending packets (in queue and percpu bulkq). A refcnt + * makes sure to last user (kthread_stop vs. call_rcu) free memory + * resources. + * + * The rcu callback __cpu_map_entry_free flush remaining packets in + * percpu bulkq to queue. Due to caller map_delete_elem() disable + * preemption, cannot call kthread_stop() to make sure queue is empty. + * Instead a work_queue is started for stopping kthread, + * cpu_map_kthread_stop, which waits for an RCU graze period before + * stopping kthread, emptying the queue. + */ +void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, + u32 key_cpu, struct bpf_cpu_map_entry *rcpu) +{ + struct bpf_cpu_map_entry *old_rcpu; + + old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu); + if (old_rcpu) { + call_rcu(&old_rcpu->rcu, __cpu_map_entry_free); + INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop); + schedule_work(&old_rcpu->kthread_stop_wq); + } +} + +int cpu_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + u32 key_cpu = *(u32 *)key; + + if (key_cpu >= map->max_entries) + return -EINVAL; + + /* notice caller map_delete_elem() use preempt_disable() */ + __cpu_map_entry_replace(cmap, key_cpu, NULL); + return 0; +} + +int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + struct bpf_cpu_map_entry *rcpu; + + /* Array index key correspond to CPU number */ + u32 key_cpu = *(u32 *)key; + /* Value is the queue size */ + u32 qsize = *(u32 *)value; + + if (unlikely(map_flags > BPF_EXIST)) + return -EINVAL; + if (unlikely(key_cpu >= cmap->map.max_entries)) + return -E2BIG; + if (unlikely(map_flags == BPF_NOEXIST)) + return -EEXIST; + if (unlikely(qsize > 16384)) /* sanity limit on qsize */ + return -EOVERFLOW; + + /* Make sure CPU is a valid possible cpu */ + if (!cpu_possible(key_cpu)) + return -ENODEV; + + if (qsize == 0) { + rcpu = NULL; /* Same as deleting */ + } else { + /* Updating qsize cause re-allocation of bpf_cpu_map_entry */ + rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id); + if (!rcpu) + return -ENOMEM; + } + rcu_read_lock(); + __cpu_map_entry_replace(cmap, key_cpu, rcpu); + rcu_read_unlock(); + return 0; +} + +void cpu_map_free(struct bpf_map *map) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + int cpu; + u32 i; + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the bpf programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map. + * It does __not__ ensure pending flush operations (if any) are + * complete. + */ + synchronize_rcu(); + + /* To ensure all pending flush operations have completed wait for flush + * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. + * Because the above synchronize_rcu() ensures the map is disconnected + * from the program we can assume no new bits will be set. + */ + for_each_online_cpu(cpu) { + unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu); + + while (!bitmap_empty(bitmap, cmap->map.max_entries)) + cond_resched(); + } + + /* For cpu_map the remote CPUs can still be using the entries + * (struct bpf_cpu_map_entry). + */ + for (i = 0; i < cmap->map.max_entries; i++) { + struct bpf_cpu_map_entry *rcpu; + + rcpu = READ_ONCE(cmap->cpu_map[i]); + if (!rcpu) + continue; + + /* bq flush and cleanup happens after RCU graze-period */ + __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ + } + free_percpu(cmap->flush_needed); + bpf_map_area_free(cmap->cpu_map); + kfree(cmap); +} + +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + struct bpf_cpu_map_entry *rcpu; + + if (key >= map->max_entries) + return NULL; + + rcpu = READ_ONCE(cmap->cpu_map[key]); + return rcpu; +} + +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_cpu_map_entry *rcpu = + __cpu_map_lookup_elem(map, *(u32 *)key); + + return rcpu ? &rcpu->qsize : NULL; +} + +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = next_key; + + if (index >= cmap->map.max_entries) { + *next = 0; + return 0; + } + + if (index == cmap->map.max_entries - 1) + return -ENOENT; + *next = index + 1; + return 0; +} + +const struct bpf_map_ops cpu_map_ops = { + .map_alloc = cpu_map_alloc, + .map_free = cpu_map_free, + .map_delete_elem = cpu_map_delete_elem, + .map_update_elem = cpu_map_update_elem, + .map_lookup_elem = cpu_map_lookup_elem, + .map_get_next_key = cpu_map_get_next_key, +}; + +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, + struct xdp_bulk_queue *bq) +{ + struct ptr_ring *q; + int i; + + if (unlikely(!bq->count)) + return 0; + + q = rcpu->queue; + spin_lock(&q->producer_lock); + + for (i = 0; i < bq->count; i++) { + void *xdp_pkt = bq->q[i]; + int err; + + err = __ptr_ring_produce(q, xdp_pkt); + if (err) { + /* Free xdp_pkt */ + page_frag_free(xdp_pkt); + } + } + bq->count = 0; + spin_unlock(&q->producer_lock); + + return 0; +} + +/* Notice: Will change in later patch */ +struct xdp_pkt { + void *data; + u16 len; + u16 headroom; +}; + +/* Runs under RCU-read-side, plus in softirq under NAPI protection. + * Thus, safe percpu variable access. + */ +int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) +{ + struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); + + if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) + bq_flush_to_queue(rcpu, bq); + + /* Notice, xdp_buff/page MUST be queued here, long enough for + * driver to code invoking us to finished, due to driver + * (e.g. ixgbe) recycle tricks based on page-refcnt. + * + * Thus, incoming xdp_pkt is always queued here (else we race + * with another CPU on page-refcnt and remaining driver code). + * Queue time is very short, as driver will invoke flush + * operation, when completing napi->poll call. + */ + bq->q[bq->count++] = xdp_pkt; + return 0; +} + +void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed); + + __set_bit(bit, bitmap); +} + +void __cpu_map_flush(struct bpf_map *map) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed); + u32 bit; + + /* The napi->poll softirq makes sure __cpu_map_insert_ctx() + * and __cpu_map_flush() happen on same CPU. Thus, the percpu + * bitmap indicate which percpu bulkq have packets. + */ + for_each_set_bit(bit, bitmap, map->max_entries) { + struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]); + struct xdp_bulk_queue *bq; + + /* This is possible if entry is removed by user space + * between xdp redirect and flush op. + */ + if (unlikely(!rcpu)) + continue; + + __clear_bit(bit, bitmap); + + /* Flush all frames in bulkq to real queue */ + bq = this_cpu_ptr(rcpu->bulkq); + bq_flush_to_queue(rcpu, bq); + + /* If already running, costs spin_lock_irqsave + smb_mb */ + wake_up_process(rcpu->kthread); + } +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d124e702e040..54fba06942f5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -592,6 +592,12 @@ static int map_update_elem(union bpf_attr *attr) if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; + /* Need to create a kthread, thus must support schedule */ + if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + err = map->ops->map_update_elem(map, key, value, attr->flags); + goto out; + } + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from * inside bpf map update or delete otherwise deadlocks are possible */ @@ -622,7 +628,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); - +out: if (!err) trace_bpf_map_update_elem(map, ufd, key, value); free_value: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9755279d94cb..cefa64be9a2f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1444,6 +1444,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_redirect_map) goto error; break; + /* Restrict bpf side of cpumap, open when use-cases appear */ + case BPF_MAP_TYPE_CPUMAP: + if (func_id != BPF_FUNC_redirect_map) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fb4fb81ce5b0..fa93033dc521 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, }; enum bpf_prog_type { From 9c270af37bb62e708e3e4415d653ce73e713df02 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:34 +0200 Subject: [PATCH 1068/2177] bpf: XDP_REDIRECT enable use of cpumap This patch connects cpumap to the xdp_do_redirect_map infrastructure. Still no SKB allocation are done yet. The XDP frames are transferred to the other CPU, but they are simply refcnt decremented on the remote CPU. This served as a good benchmark for measuring the overhead of remote refcnt decrement. If driver page recycle cache is not efficient then this, exposes a bottleneck in the page allocator. A shout-out to MST's ptr_ring, which is the secret behind is being so efficient to transfer memory pointers between CPUs, without constantly bouncing cache-lines between CPUs. V3: Handle !CONFIG_BPF_SYSCALL pointed out by kbuild test robot. V4: Make Generic-XDP aware of cpumap type, but don't allow redirect yet, as implementation require a separate upstream discussion. V5: - Fix a maybe-uninitialized pointed out by kbuild test robot. - Restrict bpf-prog side access to cpumap, open when use-cases appear - Implement cpu_map_enqueue() as a more simple void pointer enqueue V6: - Allow cpumap type for usage in helper bpf_redirect_map, general bpf-prog side restriction moved to earlier patch. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/bpf.h | 31 +++++++- include/trace/events/xdp.h | 10 ++- kernel/bpf/cpumap.c | 22 +++++- kernel/bpf/verifier.c | 3 +- net/core/filter.c | 148 ++++++++++++++++++++++++++++--------- 5 files changed, 176 insertions(+), 38 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4373125de1f3..6d4dd844828a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -355,6 +355,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); +void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); +void __cpu_map_flush(struct bpf_map *map); +struct xdp_buff; +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, + struct net_device *dev_rx); + /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) { @@ -362,7 +369,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } -#else +#else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); @@ -425,6 +432,28 @@ static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) static inline void __dev_map_flush(struct bpf_map *map) { } + +static inline +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) +{ + return NULL; +} + +static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __cpu_map_flush(struct bpf_map *map) +{ +} + +struct xdp_buff; +static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, + struct xdp_buff *xdp, + struct net_device *dev_rx) +{ + return 0; +} #endif /* CONFIG_BPF_SYSCALL */ #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 4e16c43fba10..eb2ece96c1a2 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -136,12 +136,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, __entry->map_id, __entry->map_index) ); +#define devmap_ifindex(fwd, map) \ + (!fwd ? 0 : \ + (!map ? 0 : \ + ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((struct net_device *)fwd)->ifindex : 0))) + #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ - trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ + trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ 0, map, idx) #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ - trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ + trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \ err, map, idx) #endif /* _TRACE_XDP_H */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e1e25ddba038..768da6a2c265 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -500,7 +500,7 @@ struct xdp_pkt { /* Runs under RCU-read-side, plus in softirq under NAPI protection. * Thus, safe percpu variable access. */ -int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) +static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) { struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); @@ -520,6 +520,26 @@ int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) return 0; } +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, + struct net_device *dev_rx) +{ + struct xdp_pkt *xdp_pkt; + int headroom; + + /* For now this is just used as a void pointer to data_hard_start. + * Followup patch will generalize this. + */ + xdp_pkt = xdp->data_hard_start; + + /* Fake writing into xdp_pkt->data to measure overhead */ + headroom = xdp->data - xdp->data_hard_start; + if (headroom < sizeof(*xdp_pkt)) + xdp_pkt->data = xdp->data; + + bq_enqueue(rcpu, xdp_pkt); + return 0; +} + void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cefa64be9a2f..e4d5136725a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1486,7 +1486,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, goto error; break; case BPF_FUNC_redirect_map: - if (map->map_type != BPF_MAP_TYPE_DEVMAP) + if (map->map_type != BPF_MAP_TYPE_DEVMAP && + map->map_type != BPF_MAP_TYPE_CPUMAP) goto error; break; case BPF_FUNC_sk_redirect_map: diff --git a/net/core/filter.c b/net/core/filter.c index 140fa9f9c0f4..4d88e0665c41 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2526,10 +2526,36 @@ static int __bpf_tx_xdp(struct net_device *dev, err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); if (err) return err; - if (map) + dev->netdev_ops->ndo_xdp_flush(dev); + return 0; +} + +static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + struct net_device *dev = fwd; + + if (!dev->netdev_ops->ndo_xdp_xmit) + return -EOPNOTSUPP; + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; __dev_map_insert_ctx(map, index); - else - dev->netdev_ops->ndo_xdp_flush(dev); + + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + struct bpf_cpu_map_entry *rcpu = fwd; + + err = cpu_map_enqueue(rcpu, xdp, dev_rx); + if (err) + return err; + __cpu_map_insert_ctx(map, index); + } return 0; } @@ -2539,11 +2565,33 @@ void xdp_do_flush_map(void) struct bpf_map *map = ri->map_to_flush; ri->map_to_flush = NULL; - if (map) - __dev_map_flush(map); + if (map) { + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + __dev_map_flush(map); + break; + case BPF_MAP_TYPE_CPUMAP: + __cpu_map_flush(map); + break; + default: + break; + } + } } EXPORT_SYMBOL_GPL(xdp_do_flush_map); +static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) +{ + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_CPUMAP: + return __cpu_map_lookup_elem(map, index); + default: + return NULL; + } +} + static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, unsigned long aux) { @@ -2556,8 +2604,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; - struct net_device *fwd = NULL; u32 index = ri->ifindex; + void *fwd = NULL; int err; ri->ifindex = 0; @@ -2570,7 +2618,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, goto err; } - fwd = __dev_map_lookup_elem(map, index); + fwd = __xdp_map_lookup_elem(map, index); if (!fwd) { err = -EINVAL; goto err; @@ -2578,7 +2626,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, if (ri->map_to_flush && ri->map_to_flush != map) xdp_do_flush_map(); - err = __bpf_tx_xdp(fwd, map, xdp, index); + err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); if (unlikely(err)) goto err; @@ -2620,54 +2668,88 @@ err: } EXPORT_SYMBOL_GPL(xdp_do_redirect); -int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) +static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + +int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; struct net_device *fwd = NULL; u32 index = ri->ifindex; - unsigned int len; int err = 0; ri->ifindex = 0; ri->map = NULL; ri->map_owner = 0; - if (map) { - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } - fwd = __dev_map_lookup_elem(map, index); - } else { - fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { + err = -EFAULT; + map = NULL; + goto err; } + fwd = __xdp_map_lookup_elem(map, index); if (unlikely(!fwd)) { err = -EINVAL; goto err; } - if (unlikely(!(fwd->flags & IFF_UP))) { - err = -ENETDOWN; + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + skb->dev = fwd; + } else { + /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ + err = -EBADRQC; goto err; } - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) { - err = -EMSGSIZE; - goto err; - } - - skb->dev = fwd; - map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) - : _trace_xdp_redirect(dev, xdp_prog, index); + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); return 0; err: - map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) - : _trace_xdp_redirect_err(dev, xdp_prog, index, err); + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + return err; +} + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + u32 index = ri->ifindex; + struct net_device *fwd; + int err = 0; + + if (ri->map) + return xdp_do_generic_redirect_map(dev, skb, xdp_prog); + + ri->ifindex = 0; + fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + + skb->dev = fwd; + _trace_xdp_redirect(dev, xdp_prog, index); + return 0; +err: + _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); From 1c601d829ab0d7ac3ac44853f83db2206afe67fc Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:39 +0200 Subject: [PATCH 1069/2177] bpf: cpumap xdp_buff to skb conversion and allocation This patch makes cpumap functional, by adding SKB allocation and invoking the network stack on the dequeuing CPU. For constructing the SKB on the remote CPU, the xdp_buff in converted into a struct xdp_pkt, and it mapped into the top headroom of the packet, to avoid allocating separate mem. For now, struct xdp_pkt is just a cpumap internal data structure, with info carried between enqueue to dequeue. If a driver doesn't have enough headroom it is simply dropped, with return code -EOVERFLOW. This will be picked up the xdp tracepoint infrastructure, to allow users to catch this. V2: take into account xdp->data_meta V4: - Drop busypoll tricks, keeping it more simple. - Skip RPS and Generic-XDP-recursive-reinjection, suggested by Alexei V5: correct RCU read protection around __netif_receive_skb_core. V6: Setting TASK_RUNNING vs TASK_INTERRUPTIBLE based on talk with Rik van Riel Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + kernel/bpf/cpumap.c | 152 ++++++++++++++++++++++++++++++++------ net/core/dev.c | 27 +++++++ 3 files changed, 158 insertions(+), 22 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 31bb3010c69b..bf014afcb914 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3260,6 +3260,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_core(struct sk_buff *skb); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 768da6a2c265..ee7adf4352dd 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -25,6 +25,9 @@ #include #include +#include /* netif_receive_skb_core */ +#include /* eth_type_trans */ + /* General idea: XDP packets getting XDP redirected to another CPU, * will maximum be stored/queued for one driver ->poll() call. It is * guaranteed that setting flush bit and flush operation happen on @@ -179,6 +182,92 @@ static void cpu_map_kthread_stop(struct work_struct *work) kthread_stop(rcpu->kthread); } +/* For now, xdp_pkt is a cpumap internal data structure, with info + * carried between enqueue to dequeue. It is mapped into the top + * headroom of the packet, to avoid allocating separate mem. + */ +struct xdp_pkt { + void *data; + u16 len; + u16 headroom; + u16 metasize; + struct net_device *dev_rx; +}; + +/* Convert xdp_buff to xdp_pkt */ +static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp) +{ + struct xdp_pkt *xdp_pkt; + int metasize; + int headroom; + + /* Assure headroom is available for storing info */ + headroom = xdp->data - xdp->data_hard_start; + metasize = xdp->data - xdp->data_meta; + metasize = metasize > 0 ? metasize : 0; + if ((headroom - metasize) < sizeof(*xdp_pkt)) + return NULL; + + /* Store info in top of packet */ + xdp_pkt = xdp->data_hard_start; + + xdp_pkt->data = xdp->data; + xdp_pkt->len = xdp->data_end - xdp->data; + xdp_pkt->headroom = headroom - sizeof(*xdp_pkt); + xdp_pkt->metasize = metasize; + + return xdp_pkt; +} + +struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, + struct xdp_pkt *xdp_pkt) +{ + unsigned int frame_size; + void *pkt_data_start; + struct sk_buff *skb; + + /* build_skb need to place skb_shared_info after SKB end, and + * also want to know the memory "truesize". Thus, need to + * know the memory frame size backing xdp_buff. + * + * XDP was designed to have PAGE_SIZE frames, but this + * assumption is not longer true with ixgbe and i40e. It + * would be preferred to set frame_size to 2048 or 4096 + * depending on the driver. + * frame_size = 2048; + * frame_len = frame_size - sizeof(*xdp_pkt); + * + * Instead, with info avail, skb_shared_info in placed after + * packet len. This, unfortunately fakes the truesize. + * Another disadvantage of this approach, the skb_shared_info + * is not at a fixed memory location, with mixed length + * packets, which is bad for cache-line hotness. + */ + frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + pkt_data_start = xdp_pkt->data - xdp_pkt->headroom; + skb = build_skb(pkt_data_start, frame_size); + if (!skb) + return NULL; + + skb_reserve(skb, xdp_pkt->headroom); + __skb_put(skb, xdp_pkt->len); + if (xdp_pkt->metasize) + skb_metadata_set(skb, xdp_pkt->metasize); + + /* Essential SKB info: protocol and skb->dev */ + skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx); + + /* Optional SKB info, currently missing: + * - HW checksum info (skb->ip_summed) + * - HW RX hash (skb_set_hash) + * - RX ring dev queue index (skb_record_rx_queue) + */ + + return skb; +} + static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; @@ -191,15 +280,45 @@ static int cpu_map_kthread_run(void *data) * kthread_stop signal until queue is empty. */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { + unsigned int processed = 0, drops = 0; struct xdp_pkt *xdp_pkt; - schedule(); - /* Do work */ - while ((xdp_pkt = ptr_ring_consume(rcpu->queue))) { - /* For now just "refcnt-free" */ - page_frag_free(xdp_pkt); + /* Release CPU reschedule checks */ + if (__ptr_ring_empty(rcpu->queue)) { + __set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else { + cond_resched(); } - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_RUNNING); + + /* Process packets in rcpu->queue */ + local_bh_disable(); + /* + * The bpf_cpu_map_entry is single consumer, with this + * kthread CPU pinned. Lockless access to ptr_ring + * consume side valid as no-resize allowed of queue. + */ + while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) { + struct sk_buff *skb; + int ret; + + skb = cpu_map_build_skb(rcpu, xdp_pkt); + if (!skb) { + page_frag_free(xdp_pkt); + continue; + } + + /* Inject into network stack */ + ret = netif_receive_skb_core(skb); + if (ret == NET_RX_DROP) + drops++; + + /* Limit BH-disable period */ + if (++processed == 8) + break; + } + local_bh_enable(); /* resched point, may call do_softirq() */ } __set_current_state(TASK_RUNNING); @@ -490,13 +609,6 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, return 0; } -/* Notice: Will change in later patch */ -struct xdp_pkt { - void *data; - u16 len; - u16 headroom; -}; - /* Runs under RCU-read-side, plus in softirq under NAPI protection. * Thus, safe percpu variable access. */ @@ -524,17 +636,13 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx) { struct xdp_pkt *xdp_pkt; - int headroom; - /* For now this is just used as a void pointer to data_hard_start. - * Followup patch will generalize this. - */ - xdp_pkt = xdp->data_hard_start; + xdp_pkt = convert_to_xdp_pkt(xdp); + if (!xdp_pkt) + return -EOVERFLOW; - /* Fake writing into xdp_pkt->data to measure overhead */ - headroom = xdp->data - xdp->data_hard_start; - if (headroom < sizeof(*xdp_pkt)) - xdp_pkt->data = xdp->data; + /* Info needed when constructing SKB on remote CPU */ + xdp_pkt->dev_rx = dev_rx; bq_enqueue(rcpu, xdp_pkt); return 0; diff --git a/net/core/dev.c b/net/core/dev.c index d2b20e73080e..cf5894f0e6eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4492,6 +4492,33 @@ out: return ret; } +/** + * netif_receive_skb_core - special purpose version of netif_receive_skb + * @skb: buffer to process + * + * More direct receive version of netif_receive_skb(). It should + * only be used by callers that have a need to skip RPS and Generic XDP. + * Caller must also take care of handling if (page_is_)pfmemalloc. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb_core(struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __netif_receive_skb_core(skb, false); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(netif_receive_skb_core); + static int __netif_receive_skb(struct sk_buff *skb) { int ret; From f9419f7bd7a5318b636a941a0214c5cdfa6f6530 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:44 +0200 Subject: [PATCH 1070/2177] bpf: cpumap add tracepoints This adds two tracepoint to the cpumap. One for the enqueue side trace_xdp_cpumap_enqueue() and one for the kthread dequeue side trace_xdp_cpumap_kthread(). To mitigate the tracepoint overhead, these are invoked during the enqueue/dequeue bulking phases, thus amortizing the cost. The obvious use-cases are for debugging and monitoring. The non-intuitive use-case is using these as a feedback loop to know the system load. One can imagine auto-scaling by reducing, adding or activating more worker CPUs on demand. V4: tracepoint remove time_limit info, instead add sched info V8: intro struct bpf_cpu_map_entry members cpu+map_id in this patch Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 70 ++++++++++++++++++++++++++++++++++++++ kernel/bpf/cpumap.c | 24 ++++++++++--- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index eb2ece96c1a2..0c8dec61987e 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -150,6 +150,76 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \ err, map, idx) +TRACE_EVENT(xdp_cpumap_kthread, + + TP_PROTO(int map_id, unsigned int processed, unsigned int drops, + int sched), + + TP_ARGS(map_id, processed, drops, sched), + + TP_STRUCT__entry( + __field(int, map_id) + __field(u32, act) + __field(int, cpu) + __field(unsigned int, drops) + __field(unsigned int, processed) + __field(int, sched) + ), + + TP_fast_assign( + __entry->map_id = map_id; + __entry->act = XDP_REDIRECT; + __entry->cpu = smp_processor_id(); + __entry->drops = drops; + __entry->processed = processed; + __entry->sched = sched; + ), + + TP_printk("kthread" + " cpu=%d map_id=%d action=%s" + " processed=%u drops=%u" + " sched=%d", + __entry->cpu, __entry->map_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->processed, __entry->drops, + __entry->sched) +); + +TRACE_EVENT(xdp_cpumap_enqueue, + + TP_PROTO(int map_id, unsigned int processed, unsigned int drops, + int to_cpu), + + TP_ARGS(map_id, processed, drops, to_cpu), + + TP_STRUCT__entry( + __field(int, map_id) + __field(u32, act) + __field(int, cpu) + __field(unsigned int, drops) + __field(unsigned int, processed) + __field(int, to_cpu) + ), + + TP_fast_assign( + __entry->map_id = map_id; + __entry->act = XDP_REDIRECT; + __entry->cpu = smp_processor_id(); + __entry->drops = drops; + __entry->processed = processed; + __entry->to_cpu = to_cpu; + ), + + TP_printk("enqueue" + " cpu=%d map_id=%d action=%s" + " processed=%u drops=%u" + " to_cpu=%d", + __entry->cpu, __entry->map_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->processed, __entry->drops, + __entry->to_cpu) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index ee7adf4352dd..b4358d84ddf1 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* netif_receive_skb_core */ #include /* eth_type_trans */ @@ -43,6 +44,8 @@ struct xdp_bulk_queue { /* Struct for every remote "destination" CPU in map */ struct bpf_cpu_map_entry { + u32 cpu; /* kthread CPU and map index */ + int map_id; /* Back reference to map */ u32 qsize; /* Queue size placeholder for map lookup */ /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ @@ -280,15 +283,16 @@ static int cpu_map_kthread_run(void *data) * kthread_stop signal until queue is empty. */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { - unsigned int processed = 0, drops = 0; + unsigned int processed = 0, drops = 0, sched = 0; struct xdp_pkt *xdp_pkt; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { __set_current_state(TASK_INTERRUPTIBLE); schedule(); + sched = 1; } else { - cond_resched(); + sched = cond_resched(); } __set_current_state(TASK_RUNNING); @@ -318,6 +322,9 @@ static int cpu_map_kthread_run(void *data) if (++processed == 8) break; } + /* Feedback loop via tracepoint */ + trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched); + local_bh_enable(); /* resched point, may call do_softirq() */ } __set_current_state(TASK_RUNNING); @@ -354,7 +361,9 @@ struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) if (err) goto free_queue; - rcpu->qsize = qsize; + rcpu->cpu = cpu; + rcpu->map_id = map_id; + rcpu->qsize = qsize; /* Setup kthread */ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, @@ -584,6 +593,8 @@ const struct bpf_map_ops cpu_map_ops = { static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, struct xdp_bulk_queue *bq) { + unsigned int processed = 0, drops = 0; + const int to_cpu = rcpu->cpu; struct ptr_ring *q; int i; @@ -599,13 +610,16 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, err = __ptr_ring_produce(q, xdp_pkt); if (err) { - /* Free xdp_pkt */ - page_frag_free(xdp_pkt); + drops++; + page_frag_free(xdp_pkt); /* Free xdp_pkt */ } + processed++; } bq->count = 0; spin_unlock(&q->producer_lock); + /* Feedback loop via tracepoints */ + trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu); return 0; } From fad3917e361b115f776563366415ffb2fc706bf1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:49 +0200 Subject: [PATCH 1071/2177] samples/bpf: add cpumap sample program xdp_redirect_cpu This sample program show how to use cpumap and the associated tracepoints. It provides command line stats, which shows how the XDP-RX process, cpumap-enqueue and cpumap kthread dequeue is cooperating on a per CPU basis. It also utilize the xdp_exception and xdp_redirect_err transpoints to allow users quickly to identify setup issues. One issue with ixgbe driver is that the driver reset the link when loading XDP. This reset the procfs smp_affinity settings. Thus, after loading the program, these must be reconfigured. The easiest workaround it to reduce the RX-queue to e.g. two via: # ethtool --set-channels ixgbe1 combined 2 And then add CPUs above 0 and 1, like: # xdp_redirect_cpu --dev ixgbe1 --prog 2 --cpu 2 --cpu 3 --cpu 4 Another issue with ixgbe is that the page recycle mechanism is tied to the RX-ring size. And the default setting of 512 elements is too small. This is the same issue with regular devmap XDP_REDIRECT. To overcome this I've been using 1024 rx-ring size: # ethtool -G ixgbe1 rx 1024 tx 1024 V3: - whitespace cleanups - bpf tracepoint cannot access top part of struct V4: - report on kthread sched events, according to tracepoint change - report average bulk enqueue size V5: - bpf_map_lookup_elem on cpumap not allowed from bpf_prog use separate map to mark CPUs not available V6: - correct kthread sched summary output V7: - Added a --stress-mode for concurrently changing underlying cpumap Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/xdp_redirect_cpu_kern.c | 609 ++++++++++++++++++++++++ samples/bpf/xdp_redirect_cpu_user.c | 697 ++++++++++++++++++++++++++++ 3 files changed, 1310 insertions(+) create mode 100644 samples/bpf/xdp_redirect_cpu_kern.c create mode 100644 samples/bpf/xdp_redirect_cpu_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 81f9fcd736b7..3534ccfb5920 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map +hostprogs-y += xdp_redirect_cpu hostprogs-y += xdp_monitor hostprogs-y += syscall_tp @@ -84,6 +85,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o +xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o @@ -129,6 +131,7 @@ always += tcp_iw_kern.o always += tcp_clamp_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o +always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += syscall_tp_kern.o @@ -169,6 +172,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf HOSTLOADLIBES_xdp_redirect += -lelf HOSTLOADLIBES_xdp_redirect_map += -lelf +HOSTLOADLIBES_xdp_redirect_cpu += -lelf HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_syscall_tp += -lelf diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c new file mode 100644 index 000000000000..303e9e7161f3 --- /dev/null +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -0,0 +1,609 @@ +/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) + * + * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "bpf_helpers.h" + +#define MAX_CPUS 12 /* WARNING - sync with _user.c */ + +/* Special map type that can XDP_REDIRECT frames to another CPU */ +struct bpf_map_def SEC("maps") cpu_map = { + .type = BPF_MAP_TYPE_CPUMAP, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = MAX_CPUS, +}; + +/* Common stats data record to keep userspace more simple */ +struct datarec { + __u64 processed; + __u64 dropped; + __u64 issue; +}; + +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ +struct bpf_map_def SEC("maps") rx_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Used by trace point */ +struct bpf_map_def SEC("maps") redirect_err_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 2, + /* TODO: have entries for all possible errno's */ +}; + +/* Used by trace point */ +struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = MAX_CPUS, +}; + +/* Used by trace point */ +struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Set of maps controlling available CPU, and for iterating through + * selectable redirect CPUs. + */ +struct bpf_map_def SEC("maps") cpus_available = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = MAX_CPUS, +}; +struct bpf_map_def SEC("maps") cpus_count = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; +struct bpf_map_def SEC("maps") cpus_iterator = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +/* Used by trace point */ +struct bpf_map_def SEC("maps") exception_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Helper parse functions */ + +/* Parse Ethernet layer 2, extract network layer 3 offset and protocol + * + * Returns false on error and non-supported ether-type + */ +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +static __always_inline +bool parse_eth(struct ethhdr *eth, void *data_end, + u16 *eth_proto, u64 *l3_offset) +{ + u16 eth_type; + u64 offset; + + offset = sizeof(*eth); + if ((void *)eth + offset > data_end) + return false; + + eth_type = eth->h_proto; + + /* Skip non 802.3 Ethertypes */ + if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) + return false; + + /* Handle VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + /* TODO: Handle double VLAN tagged packet */ + + *eth_proto = ntohs(eth_type); + *l3_offset = offset; + return true; +} + +static __always_inline +u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + struct udphdr *udph; + u16 dport; + + if (iph + 1 > data_end) + return 0; + if (!(iph->protocol == IPPROTO_UDP)) + return 0; + + udph = (void *)(iph + 1); + if (udph + 1 > data_end) + return 0; + + dport = ntohs(udph->dest); + return dport; +} + +static __always_inline +int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static __always_inline +int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp_cpu_map0") +int xdp_prognum0_no_touch(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct datarec *rec; + u32 *cpu_selected; + u32 cpu_dest; + u32 key = 0; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + /* Count RX packet in map */ + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_ABORTED; + rec->processed++; + + if (cpu_dest >= MAX_CPUS) { + rec->issue++; + return XDP_ABORTED; + } + + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp_cpu_map1_touch_data") +int xdp_prognum1_touch_data(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct datarec *rec; + u32 *cpu_selected; + u32 cpu_dest; + u16 eth_type; + u32 key = 0; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + /* Validate packet length is minimum Eth header size */ + if (eth + 1 > data_end) + return XDP_ABORTED; + + /* Count RX packet in map */ + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_ABORTED; + rec->processed++; + + /* Read packet data, and use it (drop non 802.3 Ethertypes) */ + eth_type = eth->h_proto; + if (ntohs(eth_type) < ETH_P_802_3_MIN) { + rec->dropped++; + return XDP_DROP; + } + + if (cpu_dest >= MAX_CPUS) { + rec->issue++; + return XDP_ABORTED; + } + + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp_cpu_map2_round_robin") +int xdp_prognum2_round_robin(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct datarec *rec; + u32 cpu_dest; + u32 *cpu_lookup; + u32 key0 = 0; + + u32 *cpu_selected; + u32 *cpu_iterator; + u32 *cpu_max; + u32 cpu_idx; + + cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); + if (!cpu_max) + return XDP_ABORTED; + + cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); + if (!cpu_iterator) + return XDP_ABORTED; + cpu_idx = *cpu_iterator; + + *cpu_iterator += 1; + if (*cpu_iterator == *cpu_max) + *cpu_iterator = 0; + + cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + /* Count RX packet in map */ + rec = bpf_map_lookup_elem(&rx_cnt, &key0); + if (!rec) + return XDP_ABORTED; + rec->processed++; + + if (cpu_dest >= MAX_CPUS) { + rec->issue++; + return XDP_ABORTED; + } + + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp_cpu_map3_proto_separate") +int xdp_prognum3_proto_separate(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 cpu_dest = 0; + u32 cpu_idx = 0; + u32 *cpu_lookup; + u32 key = 0; + + /* Count RX packet in map */ + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_ABORTED; + rec->processed++; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= MAX_CPUS) { + rec->issue++; + return XDP_ABORTED; + } + + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp_cpu_map4_ddos_filter_pktgen") +int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 cpu_dest = 0; + u32 cpu_idx = 0; + u16 dest_port; + u32 *cpu_lookup; + u32 key = 0; + + /* Count RX packet in map */ + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_ABORTED; + rec->processed++; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + /* DDoS filter UDP port 9 (pktgen) */ + dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); + if (dest_port == 9) { + if (rec) + rec->dropped++; + return XDP_DROP; + } + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= MAX_CPUS) { + rec->issue++; + return XDP_ABORTED; + } + + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + + +char _license[] SEC("license") = "GPL"; + +/*** Trace point code ***/ + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_redirect_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline +int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) +{ + u32 key = XDP_REDIRECT_ERROR; + struct datarec *rec; + int err = ctx->err; + + if (!err) + key = XDP_REDIRECT_SUCCESS; + + rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); + if (!rec) + return 0; + rec->dropped += 1; + + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tracepoint/xdp/xdp_redirect_err") +int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +SEC("tracepoint/xdp/xdp_redirect_map_err") +int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_exception_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int ifindex; // offset:16; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_exception") +int trace_xdp_exception(struct xdp_exception_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&exception_cnt, &key); + if (!rec) + return 1; + rec->dropped += 1; + + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_enqueue_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int to_cpu; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_enqueue") +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) +{ + u32 to_cpu = ctx->to_cpu; + struct datarec *rec; + + if (to_cpu >= MAX_CPUS) + return 1; + + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + if (ctx->processed > 0) + rec->issue += 1; + + /* Inception: It's possible to detect overload situations, via + * this tracepoint. This can be used for creating a feedback + * loop to XDP, which can take appropriate actions to mitigate + * this overload situation. + */ + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_kthread_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int sched; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_kthread") +int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Count times kthread yielded CPU via schedule call */ + if (ctx->sched) + rec->issue++; + + return 0; +} diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c new file mode 100644 index 000000000000..35fec9fecb57 --- /dev/null +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -0,0 +1,697 @@ +/* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +static const char *__doc__ = + " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\""; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MAX_CPUS 12 /* WARNING - sync with _kern.c */ + +/* How many xdp_progs are defined in _kern.c */ +#define MAX_PROG 5 + +/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead + * use bpf/libbpf.h), but cannot as (currently) needed for XDP + * attaching to a device via set_link_xdp_fd() + */ +#include "libbpf.h" +#include "bpf_load.h" + +#include "bpf_util.h" + +static int ifindex = -1; +static char ifname_buf[IF_NAMESIZE]; +static char *ifname; + +static __u32 xdp_flags; + +/* Exit return codes */ +#define EXIT_OK 0 +#define EXIT_FAIL 1 +#define EXIT_FAIL_OPTION 2 +#define EXIT_FAIL_XDP 3 +#define EXIT_FAIL_BPF 4 +#define EXIT_FAIL_MEM 5 + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"dev", required_argument, NULL, 'd' }, + {"skb-mode", no_argument, NULL, 'S' }, + {"debug", no_argument, NULL, 'D' }, + {"sec", required_argument, NULL, 's' }, + {"prognum", required_argument, NULL, 'p' }, + {"qsize", required_argument, NULL, 'q' }, + {"cpu", required_argument, NULL, 'c' }, + {"stress-mode", no_argument, NULL, 'x' }, + {"no-separators", no_argument, NULL, 'z' }, + {0, 0, NULL, 0 } +}; + +static void int_exit(int sig) +{ + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + if (ifindex > -1) + set_link_xdp_fd(ifindex, -1, xdp_flags); + exit(EXIT_OK); +} + +static void usage(char *argv[]) +{ + int i; + + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +/* gettime returns the current time of day in nanoseconds. + * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC) + * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE) + */ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +static __u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + exit(EXIT_FAIL); + } + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +/* Common stats data record shared with _kern.c */ +struct datarec { + __u64 processed; + __u64 dropped; + __u64 issue; +}; +struct record { + __u64 timestamp; + struct datarec total; + struct datarec *cpu; +}; +struct stats_record { + struct record rx_cnt; + struct record redir_err; + struct record kthread; + struct record exception; + struct record enq[MAX_CPUS]; +}; + +static bool map_collect_percpu(int fd, __u32 key, struct record *rec) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct datarec values[nr_cpus]; + __u64 sum_processed = 0; + __u64 sum_dropped = 0; + __u64 sum_issue = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return false; + } + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = values[i].processed; + sum_processed += values[i].processed; + rec->cpu[i].dropped = values[i].dropped; + sum_dropped += values[i].dropped; + rec->cpu[i].issue = values[i].issue; + sum_issue += values[i].issue; + } + rec->total.processed = sum_processed; + rec->total.dropped = sum_dropped; + rec->total.issue = sum_issue; + return true; +} + +static struct datarec *alloc_record_per_cpu(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct datarec *array; + size_t size; + + size = sizeof(struct datarec) * nr_cpus; + array = malloc(size); + memset(array, 0, size); + if (!array) { + fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); + exit(EXIT_FAIL_MEM); + } + return array; +} + +static struct stats_record *alloc_stats_record(void) +{ + struct stats_record *rec; + int i; + + rec = malloc(sizeof(*rec)); + memset(rec, 0, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "Mem alloc error\n"); + exit(EXIT_FAIL_MEM); + } + rec->rx_cnt.cpu = alloc_record_per_cpu(); + rec->redir_err.cpu = alloc_record_per_cpu(); + rec->kthread.cpu = alloc_record_per_cpu(); + rec->exception.cpu = alloc_record_per_cpu(); + for (i = 0; i < MAX_CPUS; i++) + rec->enq[i].cpu = alloc_record_per_cpu(); + + return rec; +} + +static void free_stats_record(struct stats_record *r) +{ + int i; + + for (i = 0; i < MAX_CPUS; i++) + free(r->enq[i].cpu); + free(r->exception.cpu); + free(r->kthread.cpu); + free(r->redir_err.cpu); + free(r->rx_cnt.cpu); + free(r); +} + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double) period / NANOSEC_PER_SEC); + + return period_; +} + +static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->processed - p->processed; + pps = packets / period_; + } + return pps; +} + +static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->dropped - p->dropped; + pps = packets / period_; + } + return pps; +} + +static __u64 calc_errs_pps(struct datarec *r, + struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->issue - p->issue; + pps = packets / period_; + } + return pps; +} + +static void stats_print(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int prog_num) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + double pps = 0, drop = 0, err = 0; + struct record *rec, *prev; + int to_cpu; + double t; + int i; + + /* Header */ + printf("Running XDP/eBPF prog_num:%d\n", prog_num); + printf("%-15s %-7s %-14s %-11s %-9s\n", + "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); + + /* XDP rx_cnt */ + { + char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; + char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n"; + char *errstr = ""; + + rec = &stats_rec->rx_cnt; + prev = &stats_prev->rx_cnt; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (err > 0) + errstr = "cpu-dest/err"; + if (pps > 0) + printf(fmt_rx, "XDP-RX", + i, pps, drop, err, errstr); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + printf(fm2_rx, "XDP-RX", "total", pps, drop); + } + + /* cpumap enqueue stats */ + for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { + char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; + char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; + char *errstr = ""; + + rec = &stats_rec->enq[to_cpu]; + prev = &stats_prev->enq[to_cpu]; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (err > 0) { + errstr = "bulk-average"; + err = pps / err; /* calc average bulk size */ + } + if (pps > 0) + printf(fmt, "cpumap-enqueue", + i, to_cpu, pps, drop, err, errstr); + } + pps = calc_pps(&rec->total, &prev->total, t); + if (pps > 0) { + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + if (err > 0) { + errstr = "bulk-average"; + err = pps / err; /* calc average bulk size */ + } + printf(fm2, "cpumap-enqueue", + "sum", to_cpu, pps, drop, err, errstr); + } + } + + /* cpumap kthread stats */ + { + char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; + char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n"; + char *e_str = ""; + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (err > 0) + e_str = "sched"; + if (pps > 0) + printf(fmt_k, "cpumap_kthread", + i, pps, drop, err, e_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + if (err > 0) + e_str = "sched-sum"; + printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str); + } + + /* XDP redirect err tracepoints (very unlikely) */ + { + char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; + char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; + + rec = &stats_rec->redir_err; + prev = &stats_prev->redir_err; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + if (pps > 0) + printf(fmt_err, "redirect_err", i, pps, drop); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + printf(fm2_err, "redirect_err", "total", pps, drop); + } + + /* XDP general exception tracepoints */ + { + char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; + char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; + + rec = &stats_rec->exception; + prev = &stats_prev->exception; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + if (pps > 0) + printf(fmt_err, "xdp_exception", i, pps, drop); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + printf(fm2_err, "xdp_exception", "total", pps, drop); + } + + printf("\n"); + fflush(stdout); +} + +static void stats_collect(struct stats_record *rec) +{ + int fd, i; + + fd = map_fd[1]; /* map: rx_cnt */ + map_collect_percpu(fd, 0, &rec->rx_cnt); + + fd = map_fd[2]; /* map: redirect_err_cnt */ + map_collect_percpu(fd, 1, &rec->redir_err); + + fd = map_fd[3]; /* map: cpumap_enqueue_cnt */ + for (i = 0; i < MAX_CPUS; i++) + map_collect_percpu(fd, i, &rec->enq[i]); + + fd = map_fd[4]; /* map: cpumap_kthread_cnt */ + map_collect_percpu(fd, 0, &rec->kthread); + + fd = map_fd[8]; /* map: exception_cnt */ + map_collect_percpu(fd, 0, &rec->exception); +} + + +/* Pointer swap trick */ +static inline void swap(struct stats_record **a, struct stats_record **b) +{ + struct stats_record *tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + +static int create_cpu_entry(__u32 cpu, __u32 queue_size, + __u32 avail_idx, bool new) +{ + __u32 curr_cpus_count = 0; + __u32 key = 0; + int ret; + + /* Add a CPU entry to cpumap, as this allocate a cpu entry in + * the kernel for the cpu. + */ + ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0); + if (ret) { + fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); + exit(EXIT_FAIL_BPF); + } + + /* Inform bpf_prog's that a new CPU is available to select + * from via some control maps. + */ + /* map_fd[5] = cpus_available */ + ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0); + if (ret) { + fprintf(stderr, "Add to avail CPUs failed\n"); + exit(EXIT_FAIL_BPF); + } + + /* When not replacing/updating existing entry, bump the count */ + /* map_fd[6] = cpus_count */ + ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count); + if (ret) { + fprintf(stderr, "Failed reading curr cpus_count\n"); + exit(EXIT_FAIL_BPF); + } + if (new) { + curr_cpus_count++; + ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0); + if (ret) { + fprintf(stderr, "Failed write curr cpus_count\n"); + exit(EXIT_FAIL_BPF); + } + } + /* map_fd[7] = cpus_iterator */ + printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n", + new ? "Add-new":"Replace", cpu, avail_idx, + queue_size, curr_cpus_count); + + return 0; +} + +/* CPUs are zero-indexed. Thus, add a special sentinel default value + * in map cpus_available to mark CPU index'es not configured + */ +static void mark_cpus_unavailable(void) +{ + __u32 invalid_cpu = MAX_CPUS; + int ret, i; + + for (i = 0; i < MAX_CPUS; i++) { + /* map_fd[5] = cpus_available */ + ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0); + if (ret) { + fprintf(stderr, "Failed marking CPU unavailable\n"); + exit(EXIT_FAIL_BPF); + } + } +} + +/* Stress cpumap management code by concurrently changing underlying cpumap */ +static void stress_cpumap(void) +{ + /* Changing qsize will cause kernel to free and alloc a new + * bpf_cpu_map_entry, with an associated/complicated tear-down + * procedure. + */ + create_cpu_entry(1, 1024, 0, false); + create_cpu_entry(1, 128, 0, false); + create_cpu_entry(1, 16000, 0, false); +} + +static void stats_poll(int interval, bool use_separators, int prog_num, + bool stress_mode) +{ + struct stats_record *record, *prev; + + record = alloc_stats_record(); + prev = alloc_stats_record(); + stats_collect(record); + + /* Trick to pretty printf with thousands separators use %' */ + if (use_separators) + setlocale(LC_NUMERIC, "en_US"); + + while (1) { + swap(&prev, &record); + stats_collect(record); + stats_print(record, prev, prog_num); + sleep(interval); + if (stress_mode) + stress_cpumap(); + } + + free_stats_record(record); + free_stats_record(prev); +} + +int main(int argc, char **argv) +{ + struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + bool use_separators = true; + bool stress_mode = false; + char filename[256]; + bool debug = false; + int added_cpus = 0; + int longindex = 0; + int interval = 2; + int prog_num = 0; + int add_cpu = -1; + __u32 qsize; + int opt; + + /* Notice: choosing he queue size is very important with the + * ixgbe driver, because it's driver page recycling trick is + * dependend on pages being returned quickly. The number of + * out-standing packets in the system must be less-than 2x + * RX-ring size. + */ + qsize = 128+64; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + if (load_bpf_file(filename)) { + fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + return EXIT_FAIL; + } + + if (!prog_fd[0]) { + fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); + return EXIT_FAIL; + } + + mark_cpus_unavailable(); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hSd:", + long_options, &longindex)) != -1) { + switch (opt) { + case 'd': + if (strlen(optarg) >= IF_NAMESIZE) { + fprintf(stderr, "ERR: --dev name too long\n"); + goto error; + } + ifname = (char *)&ifname_buf; + strncpy(ifname, optarg, IF_NAMESIZE); + ifindex = if_nametoindex(ifname); + if (ifindex == 0) { + fprintf(stderr, + "ERR: --dev name unknown err(%d):%s\n", + errno, strerror(errno)); + goto error; + } + break; + case 's': + interval = atoi(optarg); + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'D': + debug = true; + break; + case 'x': + stress_mode = true; + break; + case 'z': + use_separators = false; + break; + case 'p': + /* Selecting eBPF prog to load */ + prog_num = atoi(optarg); + if (prog_num < 0 || prog_num >= MAX_PROG) { + fprintf(stderr, + "--prognum too large err(%d):%s\n", + errno, strerror(errno)); + goto error; + } + break; + case 'c': + /* Add multiple CPUs */ + add_cpu = strtoul(optarg, NULL, 0); + if (add_cpu >= MAX_CPUS) { + fprintf(stderr, + "--cpu nr too large for cpumap err(%d):%s\n", + errno, strerror(errno)); + goto error; + } + create_cpu_entry(add_cpu, qsize, added_cpus, true); + added_cpus++; + break; + case 'q': + qsize = atoi(optarg); + break; + case 'h': + error: + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + /* Required option */ + if (ifindex == -1) { + fprintf(stderr, "ERR: required option --dev missing\n"); + usage(argv); + return EXIT_FAIL_OPTION; + } + /* Required option */ + if (add_cpu == -1) { + fprintf(stderr, "ERR: required option --cpu missing\n"); + fprintf(stderr, " Specify multiple --cpu option to add more\n"); + usage(argv); + return EXIT_FAIL_OPTION; + } + + /* Remove XDP program when program is interrupted */ + signal(SIGINT, int_exit); + + if (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) { + fprintf(stderr, "link set xdp fd failed\n"); + return EXIT_FAIL_XDP; + } + + if (debug) { + printf("Debug-mode reading trace pipe (fix #define DEBUG)\n"); + read_trace_pipe(); + } + + stats_poll(interval, use_separators, prog_num, stress_mode); + return EXIT_OK; +} From a2084f5650624edd0805dc78260d097df4f38eb6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Oct 2017 16:57:49 +0200 Subject: [PATCH 1072/2177] netlink: use NETLINK_CB(in_skb).sk instead of looking it up When netlink_ack() reports an allocation error to the sending socket, there's no need to look up the sending socket since it's available in the SKB's CB. Use that instead of going to the trouble of looking it up. Note that the pointer is only available since Eric Biederman's commit 3fbc290540a1 ("netlink: Make the sending netlink socket availabe in NETLINK_CB") which is far newer than the original lookup code (Oct 2003) (though the field was called 'ssk' in that commit and only got renamed to 'sk' later, I'd actually argue 'ssk' was better - or perhaps it should've been 'source_sk' - since there are so many different 'sk's involved.) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f34750691c5c..336d9c6dcad9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2336,16 +2336,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { - struct sock *sk; - - sk = netlink_lookup(sock_net(in_skb->sk), - in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).portid); - if (sk) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); - sock_put(sk); - } + NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk); return; } From a5b930e0598dc46184af0282c23c75b2167d9384 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:13 -0400 Subject: [PATCH 1073/2177] net: dsa: use port's cpu_dp when creating a slave When dsa_slave_create is called, the related port already has a CPU port assigned to it, available in its cpu_dp member. Use it instead of the unique tree cpu_dp. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 45f4ea845c07..c6f4829645bf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1117,16 +1117,13 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port, const char *name) { struct dsa_notifier_register_info rinfo = { }; + struct dsa_port *cpu_dp = port->cpu_dp; + struct net_device *master = cpu_dp->netdev; struct dsa_switch *ds = port->ds; - struct net_device *master; struct net_device *slave_dev; struct dsa_slave_priv *p; - struct dsa_port *cpu_dp; int ret; - cpu_dp = ds->dst->cpu_dp; - master = cpu_dp->netdev; - if (!ds->num_tx_queues) ds->num_tx_queues = 1; From 6158eaa7a717b469b1b0c0ae6d79910737686279 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:14 -0400 Subject: [PATCH 1074/2177] net: dsa: add slave notify helper Both DSA slave create and destroy functions call call_dsa_notifiers with respectively DSA_PORT_REGISTER and DSA_PORT_UNREGISTER and the same dsa_notifier_register_info structure. Wrap this in a dsa_slave_notify helper so prevent cluttering these functions. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c6f4829645bf..f31737256f69 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1114,9 +1114,23 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } +static void dsa_slave_notify(struct net_device *dev, unsigned long val) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = dsa_master_netdev(p); + struct dsa_port *dp = p->dp; + struct dsa_notifier_register_info rinfo = { + .switch_number = dp->ds->index, + .port_number = dp->index, + .master = master, + .info.dev = dev, + }; + + call_dsa_notifiers(val, dev, &rinfo.info); +} + int dsa_slave_create(struct dsa_port *port, const char *name) { - struct dsa_notifier_register_info rinfo = { }; struct dsa_port *cpu_dp = port->cpu_dp; struct net_device *master = cpu_dp->netdev; struct dsa_switch *ds = port->ds; @@ -1175,11 +1189,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) goto out_free; } - rinfo.info.dev = slave_dev; - rinfo.master = master; - rinfo.port_number = p->dp->index; - rinfo.switch_number = p->dp->ds->index; - call_dsa_notifiers(DSA_PORT_REGISTER, slave_dev, &rinfo.info); + dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); ret = register_netdev(slave_dev); if (ret) { @@ -1204,7 +1214,6 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); - struct dsa_notifier_register_info rinfo = { }; struct device_node *port_dn; port_dn = p->dp->dn; @@ -1216,11 +1225,7 @@ void dsa_slave_destroy(struct net_device *slave_dev) if (of_phy_is_fixed_link(port_dn)) of_phy_deregister_fixed_link(port_dn); } - rinfo.info.dev = slave_dev; - rinfo.master = p->dp->cpu_dp->netdev; - rinfo.port_number = p->dp->index; - rinfo.switch_number = p->dp->ds->index; - call_dsa_notifiers(DSA_PORT_UNREGISTER, slave_dev, &rinfo.info); + dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); unregister_netdev(slave_dev); free_percpu(p->stats64); free_netdev(slave_dev); From d945097bb19501bd95790d8220d4eeb418b6ebb2 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:15 -0400 Subject: [PATCH 1075/2177] net: dsa: add slave to port helper Many portions of DSA core code require to get the dsa_port structure corresponding to a slave net_device. For this purpose, introduce a dsa_slave_to_port() helper. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 7 ++ net/dsa/legacy.c | 6 +- net/dsa/slave.c | 167 +++++++++++++++++++++--------------------- net/dsa/tag_brcm.c | 9 +-- net/dsa/tag_dsa.c | 10 +-- net/dsa/tag_edsa.c | 10 +-- net/dsa/tag_ksz.c | 4 +- net/dsa/tag_lan9303.c | 4 +- net/dsa/tag_mtk.c | 4 +- net/dsa/tag_qca.c | 5 +- net/dsa/tag_trailer.c | 4 +- 11 files changed, 115 insertions(+), 115 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2850077cc9cc..569a4929b4c9 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -169,6 +169,13 @@ int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + return p->dp; +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index b0fefbffe082..cc28c6f792a3 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -740,8 +740,7 @@ int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 vid, u16 flags) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); return dsa_port_fdb_add(dp, addr, vid); } @@ -750,8 +749,7 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); return dsa_port_fdb_del(dp, addr, vid); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f31737256f69..894602c88b09 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -72,8 +72,8 @@ static int dsa_slave_get_iflink(const struct net_device *dev) static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; struct net_device *master = dsa_master_netdev(p); + struct dsa_port *dp = dsa_slave_to_port(dev); int err; if (!(master->flags & IFF_UP)) @@ -122,7 +122,7 @@ static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); if (dev->phydev) phy_stop(dev->phydev); @@ -246,14 +246,13 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_dump_ctx dump = { .dev = dev, .skb = skb, .cb = cb, .idx = *idx, }; - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; int err; err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); @@ -274,8 +273,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int ret; switch (attr->id) { @@ -301,8 +299,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err; /* For the prepare phase, ensure the full set of changes is feasable in @@ -329,8 +326,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err; switch (obj->id) { @@ -351,8 +347,8 @@ static int dsa_slave_port_obj_del(struct net_device *dev, static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: @@ -431,11 +427,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev, static int dsa_slave_get_regs_len(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs_len) - return ds->ops->get_regs_len(ds, p->dp->index); + return ds->ops->get_regs_len(ds, dp->index); return -EOPNOTSUPP; } @@ -443,11 +439,11 @@ static int dsa_slave_get_regs_len(struct net_device *dev) static void dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs) - ds->ops->get_regs(ds, p->dp->index, regs, _p); + ds->ops->get_regs(ds, dp->index, regs, _p); } static u32 dsa_slave_get_link(struct net_device *dev) @@ -462,8 +458,8 @@ static u32 dsa_slave_get_link(struct net_device *dev) static int dsa_slave_get_eeprom_len(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; @@ -477,8 +473,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) static int dsa_slave_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_eeprom) return ds->ops->get_eeprom(ds, eeprom, data); @@ -489,8 +485,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, static int dsa_slave_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->set_eeprom) return ds->ops->set_eeprom(ds, eeprom, data); @@ -501,8 +497,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, static void dsa_slave_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (stringset == ETH_SS_STATS) { int len = ETH_GSTRING_LEN; @@ -512,7 +508,7 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); if (ds->ops->get_strings) - ds->ops->get_strings(ds, p->dp->index, data + 4 * len); + ds->ops->get_strings(ds, dp->index, data + 4 * len); } } @@ -520,8 +516,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; struct pcpu_sw_netstats *s; unsigned int start; int i; @@ -543,13 +540,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, data[3] += rx_bytes; } if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4); + ds->ops->get_ethtool_stats(ds, dp->index, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { int count; @@ -566,29 +563,29 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_wol) - ds->ops->get_wol(ds, p->dp->index, w); + ds->ops->get_wol(ds, dp->index, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret = -EOPNOTSUPP; if (ds->ops->set_wol) - ret = ds->ops->set_wol(ds, p->dp->index, w); + ret = ds->ops->set_wol(ds, dp->index, w); return ret; } static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret; /* Port's PHY and MAC both need to be EEE capable */ @@ -598,7 +595,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!ds->ops->set_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->set_mac_eee(ds, p->dp->index, e); + ret = ds->ops->set_mac_eee(ds, dp->index, e); if (ret) return ret; @@ -613,8 +610,8 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret; /* Port's PHY and MAC both need to be EEE capable */ @@ -624,7 +621,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) if (!ds->ops->get_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->get_mac_eee(ds, p->dp->index, e); + ret = ds->ops->get_mac_eee(ds, dp->index, e); if (ret) return ret; @@ -676,9 +673,9 @@ static void dsa_slave_poll_controller(struct net_device *dev) static int dsa_slave_get_phys_port_name(struct net_device *dev, char *name, size_t len) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); - if (snprintf(name, len, "p%d", p->dp->index) >= len) + if (snprintf(name, len, "p%d", dp->index) >= len) return -EINVAL; return 0; @@ -701,14 +698,15 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; __be16 protocol = cls->common.protocol; - struct dsa_switch *ds = p->dp->ds; struct net *net = dev_net(dev); - struct dsa_slave_priv *to_p; + struct dsa_switch *ds = dp->ds; struct net_device *to_dev; const struct tc_action *a; + struct dsa_port *to_dp; int err = -EOPNOTSUPP; LIST_HEAD(actions); int ifindex; @@ -741,13 +739,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; - to_p = netdev_priv(to_dev); + to_dp = dsa_slave_to_port(to_dev); - mirror->to_local_port = to_p->dp->index; + mirror->to_local_port = to_dp->index; mirror->ingress = ingress; - err = ds->ops->port_mirror_add(ds, p->dp->index, mirror, - ingress); + err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress); if (err) { kfree(mall_tc_entry); return err; @@ -762,9 +759,9 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, static void dsa_slave_del_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; if (!ds->ops->port_mirror_del) return; @@ -777,8 +774,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, switch (mall_tc_entry->type) { case DSA_PORT_MALL_MIRROR: - ds->ops->port_mirror_del(ds, p->dp->index, - &mall_tc_entry->mirror); + ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror); break; default: WARN_ON(1); @@ -855,25 +851,25 @@ static void dsa_slave_get_stats64(struct net_device *dev, static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (!ds->ops->get_rxnfc) return -EOPNOTSUPP; - return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs); + return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); } static int dsa_slave_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (!ds->ops->set_rxnfc) return -EOPNOTSUPP; - return ds->ops->set_rxnfc(ds, p->dp->index, nfc); + return ds->ops->set_rxnfc(ds, dp->index, nfc); } static const struct ethtool_ops dsa_slave_ethtool_ops = { @@ -933,8 +929,9 @@ static struct device_type dsa_type = { static void dsa_slave_adjust_link(struct net_device *dev) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; unsigned int status_changed = 0; if (p->old_link != dev->phydev->link) { @@ -953,7 +950,7 @@ static void dsa_slave_adjust_link(struct net_device *dev) } if (ds->ops->adjust_link && status_changed) - ds->ops->adjust_link(ds, p->dp->index, dev->phydev); + ds->ops->adjust_link(ds, dp->index, dev->phydev); if (status_changed) phy_print_status(dev->phydev); @@ -962,14 +959,14 @@ static void dsa_slave_adjust_link(struct net_device *dev) static int dsa_slave_fixed_link_update(struct net_device *dev, struct fixed_phy_status *status) { - struct dsa_slave_priv *p; struct dsa_switch *ds; + struct dsa_port *dp; if (dev) { - p = netdev_priv(dev); - ds = p->dp->ds; + dp = dsa_slave_to_port(dev); + ds = dp->ds; if (ds->ops->fixed_link_update) - ds->ops->fixed_link_update(ds, p->dp->index, status); + ds->ops->fixed_link_update(ds, dp->index, status); } return 0; @@ -978,8 +975,9 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, /* slave device setup *******************************************************/ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) { + struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); if (!slave_dev->phydev) { @@ -997,14 +995,15 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) static int dsa_slave_phy_setup(struct net_device *slave_dev) { + struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); - struct dsa_switch *ds = p->dp->ds; - struct device_node *phy_dn, *port_dn; + struct device_node *port_dn = dp->dn; + struct dsa_switch *ds = dp->ds; + struct device_node *phy_dn; bool phy_is_fixed = false; u32 phy_flags = 0; int mode, ret; - port_dn = p->dp->dn; mode = of_get_phy_mode(port_dn); if (mode < 0) mode = PHY_INTERFACE_MODE_NA; @@ -1025,7 +1024,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) } if (ds->ops->get_phy_flags) - phy_flags = ds->ops->get_phy_flags(ds, p->dp->index); + phy_flags = ds->ops->get_phy_flags(ds, dp->index); if (phy_dn) { int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); @@ -1061,10 +1060,10 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * MDIO bus instead */ if (!slave_dev->phydev) { - ret = dsa_slave_phy_connect(slave_dev, p->dp->index); + ret = dsa_slave_phy_connect(slave_dev, dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", - p->dp->index, ret); + dp->index, ret); if (phy_is_fixed) of_phy_deregister_fixed_link(port_dn); return ret; @@ -1118,7 +1117,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val) { struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_notifier_register_info rinfo = { .switch_number = dp->ds->index, .port_number = dp->index, @@ -1202,8 +1201,8 @@ int dsa_slave_create(struct dsa_port *port, const char *name) out_phy: phy_disconnect(slave_dev->phydev); - if (of_phy_is_fixed_link(p->dp->dn)) - of_phy_deregister_fixed_link(p->dp->dn); + if (of_phy_is_fixed_link(port->dn)) + of_phy_deregister_fixed_link(port->dn); out_free: free_percpu(p->stats64); free_netdev(slave_dev); @@ -1213,10 +1212,9 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { + struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); - struct device_node *port_dn; - - port_dn = p->dp->dn; + struct device_node *port_dn = dp->dn; netif_carrier_off(slave_dev); if (slave_dev->phydev) { @@ -1239,8 +1237,7 @@ static bool dsa_slave_dev_check(struct net_device *dev) static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err = NOTIFY_DONE; if (netif_is_bridge_master(info->upper_dev)) { @@ -1283,14 +1280,14 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) container_of(work, struct dsa_switchdev_event_work, work); struct net_device *dev = switchdev_work->dev; struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); int err; rtnl_lock(); switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid); + err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); if (err) { netdev_dbg(dev, "fdb add failed err=%d\n", err); break; @@ -1301,7 +1298,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid); + err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); if (err) { netdev_dbg(dev, "fdb del failed err=%d\n", err); dev_close(dev); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index cc4f472fbd77..2a0fa9f9f7ca 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -61,7 +61,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; @@ -82,15 +82,14 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); brcm_tag[1] = 0; brcm_tag[2] = 0; - if (p->dp->index == 8) + if (dp->index == 8) brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; - brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; + brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK; /* Now tell the master network device about the desired output queue * as well */ - skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(p->dp->index, - queue)); + skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue)); return skb; } diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index c77218f173d1..fe3c9700a8c8 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -18,7 +18,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *dsa_header; /* @@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct tagged FROM_CPU DSA tag from 802.1q tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x60 | p->dp->ds->index; - dsa_header[1] = p->dp->index << 3; + dsa_header[0] = 0x60 | dp->ds->index; + dsa_header[1] = dp->index << 3; /* * Move CFI field from byte 2 to byte 1. @@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct untagged FROM_CPU DSA tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x40 | p->dp->ds->index; - dsa_header[1] = p->dp->index << 3; + dsa_header[0] = 0x40 | dp->ds->index; + dsa_header[1] = dp->index << 3; dsa_header[2] = 0x00; dsa_header[3] = 0x00; } diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 0b83cbe0c9e8..c683e240bafc 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -19,7 +19,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *edsa_header; /* @@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x60 | p->dp->ds->index; - edsa_header[5] = p->dp->index << 3; + edsa_header[4] = 0x60 | dp->ds->index; + edsa_header[5] = dp->index << 3; /* * Move CFI field from byte 6 to byte 5. @@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x40 | p->dp->ds->index; - edsa_header[5] = p->dp->index << 3; + edsa_header[4] = 0x40 | dp->ds->index; + edsa_header[5] = dp->index << 3; edsa_header[6] = 0x00; edsa_header[7] = 0x00; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index b241c990cde0..66e443fd7675 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -34,7 +34,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct sk_buff *nskb; int padlen; u8 *tag; @@ -72,7 +72,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); tag[0] = 0; - tag[1] = 1 << p->dp->index; /* destination port */ + tag[1] = 1 << dp->index; /* destination port */ return nskb; } diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 4f211e56c81f..c709adb5efd9 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -42,7 +42,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u16 *lan9303_tag; /* insert a special VLAN tag between the MAC addresses @@ -62,7 +62,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); lan9303_tag[0] = htons(ETH_P_8021Q); - lan9303_tag[1] = htons(p->dp->index | BIT(4)); + lan9303_tag[1] = htons(dp->index | BIT(4)); return skb; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 968586c5d40e..5abebbfd7060 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -23,7 +23,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; if (skb_cow_head(skb, MTK_HDR_LEN) < 0) @@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Build the tag after the MAC Source Address */ mtk_tag = skb->data + 2 * ETH_ALEN; mtk_tag[0] = 0; - mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; mtk_tag[2] = 0; mtk_tag[3] = 0; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 8d33d9ebf910..b0274b6781cb 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -38,7 +38,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u16 *phdr, hdr; dev->stats.tx_packets++; @@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) /* Set the version field, and set destination port information */ hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | - QCA_HDR_XMIT_FROM_CPU | - BIT(p->dp->index); + QCA_HDR_XMIT_FROM_CPU | BIT(dp->index); *phdr = htons(hdr); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 61668be267f5..8b205bb5f521 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -16,7 +16,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct sk_buff *nskb; int padlen; u8 *trailer; @@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) trailer = skb_put(nskb, 4); trailer[0] = 0x80; - trailer[1] = 1 << p->dp->index; + trailer[1] = 1 << dp->index; trailer[2] = 0x10; trailer[3] = 0x00; From d0006b002208936d36af8e4dce1f6dfeebb2dfba Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:16 -0400 Subject: [PATCH 1076/2177] net: dsa: add slave to master helper Many part of the DSA slave code require to get the master device assigned to a slave device. Remove dsa_master_netdev() in favor of a dsa_slave_to_master() helper which does that. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 13 ++++++++----- net/dsa/slave.c | 26 +++++++++----------------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 569a4929b4c9..eedda1f1b099 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -176,6 +176,14 @@ static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) return p->dp; } +static inline struct net_device * +dsa_slave_to_master(const struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dp->cpu_dp->netdev; +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); @@ -204,9 +212,4 @@ extern const struct dsa_device_ops qca_netdev_ops; /* tag_trailer.c */ extern const struct dsa_device_ops trailer_netdev_ops; -static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) -{ - return p->dp->cpu_dp->netdev; -} - #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 894602c88b09..b72e07503a40 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -64,15 +64,12 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ static int dsa_slave_get_iflink(const struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - return dsa_master_netdev(p)->ifindex; + return dsa_slave_to_master(dev)->ifindex; } static int dsa_slave_open(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); int err; @@ -120,8 +117,7 @@ out: static int dsa_slave_close(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); if (dev->phydev) @@ -144,8 +140,7 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); if (change & IFF_ALLMULTI) dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); @@ -155,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) static void dsa_slave_set_rx_mode(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); dev_mc_sync(master, dev); dev_uc_sync(master, dev); @@ -164,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); struct sockaddr *addr = a; int err; @@ -409,7 +402,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ - nskb->dev = dsa_master_netdev(p); + nskb->dev = dsa_slave_to_master(dev); dev_queue_xmit(nskb); return NETDEV_TX_OK; @@ -632,8 +625,8 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) static int dsa_slave_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { + struct net_device *master = dsa_slave_to_master(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); struct netpoll *netpoll; int err = 0; @@ -1115,8 +1108,7 @@ int dsa_slave_resume(struct net_device *slave_dev) static void dsa_slave_notify(struct net_device *dev, unsigned long val) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_notifier_register_info rinfo = { .switch_number = dp->ds->index, From 2231c43b560403675217f52204b18c1c59c0ee76 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:17 -0400 Subject: [PATCH 1077/2177] net: dsa: rename dsa_master_get_slave The dsa_master_get_slave is slightly confusing since the idiomatic "get" term often suggests reference counting, in symmetry to "put". Rename it to dsa_master_find_slave to make the look up operation clear. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 4 ++-- net/dsa/tag_brcm.c | 2 +- net/dsa/tag_dsa.c | 2 +- net/dsa/tag_edsa.c | 2 +- net/dsa/tag_ksz.c | 2 +- net/dsa/tag_lan9303.c | 2 +- net/dsa/tag_mtk.c | 2 +- net/dsa/tag_qca.c | 2 +- net/dsa/tag_trailer.c | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index eedda1f1b099..623c22b75e81 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -113,8 +113,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int dsa_master_ethtool_setup(struct net_device *dev); void dsa_master_ethtool_restore(struct net_device *dev); -static inline struct net_device *dsa_master_get_slave(struct net_device *dev, - int device, int port) +static inline struct net_device *dsa_master_find_slave(struct net_device *dev, + int device, int port) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 2a0fa9f9f7ca..9e082bae3cb0 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -119,7 +119,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - skb->dev = dsa_master_get_slave(dev, 0, source_port); + skb->dev = dsa_master_find_slave(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index fe3c9700a8c8..dbbcdafed8c3 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -91,7 +91,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_get_slave(dev, source_device, source_port); + skb->dev = dsa_master_find_slave(dev, source_device, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index c683e240bafc..f38a626b3a05 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -104,7 +104,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = edsa_header[0] & 0x1f; source_port = (edsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_get_slave(dev, source_device, source_port); + skb->dev = dsa_master_find_slave(dev, source_device, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 66e443fd7675..0f62effad88f 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -87,7 +87,7 @@ static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, source_port = tag[0] & 7; - skb->dev = dsa_master_get_slave(dev, 0, source_port); + skb->dev = dsa_master_find_slave(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index c709adb5efd9..57519597c6fc 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -94,7 +94,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, source_port = ntohs(lan9303_tag[1]) & 0x3; - skb->dev = dsa_master_get_slave(dev, 0, source_port); + skb->dev = dsa_master_find_slave(dev, 0, source_port); if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 5abebbfd7060..8475434af7d5 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -69,7 +69,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - skb->dev = dsa_master_get_slave(dev, 0, port); + skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index b0274b6781cb..613f4ee97771 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -91,7 +91,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); - skb->dev = dsa_master_get_slave(dev, 0, port); + skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 8b205bb5f521..7d20e1f3de28 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -71,7 +71,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, source_port = trailer[1] & 7; - skb->dev = dsa_master_get_slave(dev, 0, source_port); + skb->dev = dsa_master_find_slave(dev, 0, source_port); if (!skb->dev) return NULL; From f8b8b1cd5aadd221742b45eb0ee3c8a80abf036a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:18 -0400 Subject: [PATCH 1078/2177] net: dsa: split dsa_port's netdev member The dsa_port structure has a "netdev" member, which can be used for either the master device, or the slave device, depending on its type. It is true that today, CPU port are not exposed to userspace, thus the port's netdev member can be used to point to its master interface. But it is still slightly confusing, so split it into more explicit "master" and "slave" members inside an anonymous union. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 6 +++--- drivers/net/dsa/mt7530.c | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- include/net/dsa.h | 9 ++++++++- net/dsa/dsa.c | 6 +++--- net/dsa/dsa2.c | 20 ++++++++++---------- net/dsa/dsa_priv.h | 4 ++-- net/dsa/legacy.c | 14 +++++++------- net/dsa/slave.c | 6 +++--- 9 files changed, 38 insertions(+), 31 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 32025b990437..b43c063b9634 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -601,7 +601,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, * state machine and make it go in PHY_FORCING state instead. */ if (!status->link) - netif_carrier_off(ds->ports[port].netdev); + netif_carrier_off(ds->ports[port].slave); status->duplex = 1; } else { status->link = 1; @@ -690,7 +690,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->ports[port].cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol; @@ -713,7 +713,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->ports[port].cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->ports[port].cpu_dp->index; struct ethtool_wolinfo pwol; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 034241696ce2..fea2e665d0cb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -933,7 +933,7 @@ mt7530_setup(struct dsa_switch *ds) * controller also is the container for two GMACs nodes representing * as two netdev instances. */ - dn = ds->ports[MT7530_CPU_PORT].netdev->dev.of_node->parent; + dn = ds->ports[MT7530_CPU_PORT].master->dev.of_node->parent; priv->ethernet = syscon_node_to_regmap(dn); if (IS_ERR(priv->ethernet)) return PTR_ERR(priv->ethernet); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 76cf383dcf90..88b47fa09b41 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1137,7 +1137,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; - if (!ds->ports[port].netdev) + if (!ds->ports[port].slave) continue; if (vlan.member[i] == diff --git a/include/net/dsa.h b/include/net/dsa.h index 2746741f74cf..6ed1a17ed1bd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -164,6 +164,14 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* A CPU port is physically connected to a master device. + * A user port exposed to userspace has a slave device. + */ + union { + struct net_device *master; + struct net_device *slave; + }; + /* CPU port tagging operations used by master or slave devices */ const struct dsa_device_ops *tag_ops; @@ -176,7 +184,6 @@ struct dsa_port { unsigned int index; const char *name; struct dsa_port *cpu_dp; - struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 832c659ff993..a3abf7a7b9a2 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -201,7 +201,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_PM_SLEEP static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; + return ds->enabled_port_mask & (1 << p) && ds->ports[p].slave; } int dsa_switch_suspend(struct dsa_switch *ds) @@ -213,7 +213,7 @@ int dsa_switch_suspend(struct dsa_switch *ds) if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_suspend(ds->ports[i].netdev); + ret = dsa_slave_suspend(ds->ports[i].slave); if (ret) return ret; } @@ -240,7 +240,7 @@ int dsa_switch_resume(struct dsa_switch *ds) if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_resume(ds->ports[i].netdev); + ret = dsa_slave_resume(ds->ports[i].slave); if (ret) return ret; } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6ac9e11d385c..9e8b8aab049d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -279,7 +279,7 @@ static int dsa_user_port_apply(struct dsa_port *port) if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", port->index, err); - port->netdev = NULL; + port->slave = NULL; return err; } @@ -289,7 +289,7 @@ static int dsa_user_port_apply(struct dsa_port *port) if (err) return err; - devlink_port_type_eth_set(&port->devlink_port, port->netdev); + devlink_port_type_eth_set(&port->devlink_port, port->slave); return 0; } @@ -297,9 +297,9 @@ static int dsa_user_port_apply(struct dsa_port *port) static void dsa_user_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); - if (port->netdev) { - dsa_slave_destroy(port->netdev); - port->netdev = NULL; + if (port->slave) { + dsa_slave_destroy(port->slave); + port->slave = NULL; port->ds->enabled_port_mask &= ~(1 << port->index); } } @@ -432,9 +432,9 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) * sent to the tag format's receive function. */ wmb(); - dst->cpu_dp->netdev->dsa_ptr = dst->cpu_dp; + dst->cpu_dp->master->dsa_ptr = dst->cpu_dp; - err = dsa_master_ethtool_setup(dst->cpu_dp->netdev); + err = dsa_master_ethtool_setup(dst->cpu_dp->master); if (err) return err; @@ -451,9 +451,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dsa_master_ethtool_restore(dst->cpu_dp->netdev); + dsa_master_ethtool_restore(dst->cpu_dp->master); - dst->cpu_dp->netdev->dsa_ptr = NULL; + dst->cpu_dp->master->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -499,7 +499,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, if (!dst->cpu_dp) { dst->cpu_dp = port; - dst->cpu_dp->netdev = ethernet_dev; + dst->cpu_dp->master = ethernet_dev; } /* Initialize cpu_port_mask now for drv->setup() diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 623c22b75e81..1e9914062d0b 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -130,7 +130,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, if (port < 0 || port >= ds->num_ports) return NULL; - return ds->ports[port].netdev; + return ds->ports[port].slave; } /* port.c */ @@ -181,7 +181,7 @@ dsa_slave_to_master(const struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - return dp->cpu_dp->netdev; + return dp->cpu_dp->master; } /* switch.c */ diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index cc28c6f792a3..b6c88fd33d4f 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -120,7 +120,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, return -EINVAL; } dst->cpu_dp = &ds->ports[i]; - dst->cpu_dp->netdev = master; + dst->cpu_dp->master = master; ds->cpu_port_mask |= 1 << i; } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; @@ -261,10 +261,10 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (!(ds->enabled_port_mask & (1 << port))) continue; - if (!ds->ports[port].netdev) + if (!ds->ports[port].slave) continue; - dsa_slave_destroy(ds->ports[port].netdev); + dsa_slave_destroy(ds->ports[port].slave); } /* Disable configuration of the CPU and DSA ports */ @@ -601,7 +601,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = dst->cpu_dp; - return dsa_master_ethtool_setup(dst->cpu_dp->netdev); + return dsa_master_ethtool_setup(dst->cpu_dp->master); } static int dsa_probe(struct platform_device *pdev) @@ -666,9 +666,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dsa_master_ethtool_restore(dst->cpu_dp->netdev); + dsa_master_ethtool_restore(dst->cpu_dp->master); - dst->cpu_dp->netdev->dsa_ptr = NULL; + dst->cpu_dp->master->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -683,7 +683,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dev_put(dst->cpu_dp->netdev); + dev_put(dst->cpu_dp->master); } static int dsa_remove(struct platform_device *pdev) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b72e07503a40..6906de0f0050 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1123,7 +1123,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val) int dsa_slave_create(struct dsa_port *port, const char *name) { struct dsa_port *cpu_dp = port->cpu_dp; - struct net_device *master = cpu_dp->netdev; + struct net_device *master = cpu_dp->master; struct dsa_switch *ds = port->ds; struct net_device *slave_dev; struct dsa_slave_priv *p; @@ -1170,7 +1170,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) p->old_link = -1; p->old_duplex = -1; - port->netdev = slave_dev; + port->slave = slave_dev; netif_carrier_off(slave_dev); @@ -1198,7 +1198,7 @@ out_phy: out_free: free_percpu(p->stats64); free_netdev(slave_dev); - port->netdev = NULL; + port->slave = NULL; return ret; } From c8652c83bc84ac8db44060ced0036de7628aa5e5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 16 Oct 2017 11:12:19 -0400 Subject: [PATCH 1079/2177] net: dsa: add dsa_to_port helper The dsa_port structure is part of DSA core data and must only be updated by the later. It is OK and sometimes necessary for the DSA drivers to access this data, but this has to be read only. For that purpose, add a dsa_to_port() helper which returns a const pointer to a dsa_port structure which must be used by DSA drivers from now on instead of digging into ds->ports[] themselves. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/lan9303-core.c | 2 +- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/mv88e6060.c | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 8 ++++---- drivers/net/dsa/qca8k.c | 4 ++-- include/net/dsa.h | 5 +++++ 7 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d4ce092def83..b48cf0487b43 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1354,7 +1354,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br) b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); b53_for_each_port(dev, i) { - if (ds->ports[i].bridge_dev != br) + if (dsa_to_port(ds, i)->bridge_dev != br) continue; /* Add this local port to the remote port VLAN control @@ -1390,7 +1390,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) b53_for_each_port(dev, i) { /* Don't touch the remaining ports */ - if (ds->ports[i].bridge_dev != br) + if (dsa_to_port(ds, i)->bridge_dev != br) continue; b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), ®); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index fecfe1fe67ea..09a748327fc6 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -863,7 +863,7 @@ static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, struct lan9303 *chip = ds->priv; dev_dbg(chip->dev, "%s(port %d)\n", __func__, port); - if (ds->ports[1].bridge_dev == ds->ports[2].bridge_dev) { + if (dsa_to_port(ds, 1)->bridge_dev == dsa_to_port(ds, 2)->bridge_dev) { lan9303_bridge_ports(chip); chip->is_bridged = true; /* unleash stp_state_set() */ } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index fea2e665d0cb..21431be2831e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -782,7 +782,7 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, * and not being setup until the port becomes enabled. */ if (ds->enabled_port_mask & BIT(i) && i != port) { - if (ds->ports[i].bridge_dev != bridge) + if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) mt7530_set(priv, MT7530_PCR_P(i), @@ -819,7 +819,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, * is kept and not being setup until the port becomes enabled. */ if (ds->enabled_port_mask & BIT(i) && i != port) { - if (ds->ports[i].bridge_dev != bridge) + if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) mt7530_clear(priv, MT7530_PCR_P(i), diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 6173be889d95..f78b9e13be1c 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -177,7 +177,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) | (dsa_is_cpu_port(ds, p) ? ds->enabled_port_mask : - BIT(ds->ports[p].cpu_dp->index))); + BIT(dsa_to_port(ds, p)->cpu_dp->index))); /* Port Association Vector: when learning source addresses * of packets, add the address to the address database using diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 88b47fa09b41..677d6902807e 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -851,7 +851,7 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) if (dsa_is_cpu_port(chip->ds, i) || dsa_is_dsa_port(chip->ds, i) || - (br && chip->ds->ports[i].bridge_dev == br)) + (br && dsa_to_port(chip->ds, i)->bridge_dev == br)) pvlan |= BIT(i); return pvlan; @@ -1144,16 +1144,16 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER) continue; - if (ds->ports[i].bridge_dev == + if (dsa_to_port(ds, i)->bridge_dev == ds->ports[port].bridge_dev) break; /* same bridge, check next VLAN */ - if (!ds->ports[i].bridge_dev) + if (!dsa_to_port(ds, i)->bridge_dev) continue; dev_err(ds->dev, "p%d: hw VLAN %d already used by %s\n", port, vlan.vid, - netdev_name(ds->ports[i].bridge_dev)); + netdev_name(dsa_to_port(ds, i)->bridge_dev)); err = -EOPNOTSUPP; goto unlock; } diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 82f09711ac1a..d1b0b1fb632f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -700,7 +700,7 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br) int i; for (i = 1; i < QCA8K_NUM_PORTS; i++) { - if (ds->ports[i].bridge_dev != br) + if (dsa_to_port(ds, i)->bridge_dev != br) continue; /* Add this port to the portvlan mask of the other ports * in the bridge @@ -725,7 +725,7 @@ qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) int i; for (i = 1; i < QCA8K_NUM_PORTS; i++) { - if (ds->ports[i].bridge_dev != br) + if (dsa_to_port(ds, i)->bridge_dev != br) continue; /* Remove this port to the portvlan mask of the other ports * in the bridge diff --git a/include/net/dsa.h b/include/net/dsa.h index 6ed1a17ed1bd..38961ef91d3d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -269,6 +269,11 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } +static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) +{ + return &ds->ports[p]; +} + static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; From eb4ddaf474285a4c6986f4a1c3205bdb0bed2da9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:45 -0700 Subject: [PATCH 1080/2177] net/decnet: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Johannes Berg Cc: David Ahern Cc: linux-decnet-user@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/dn.h | 7 ------- include/net/dn_nsp.h | 1 - net/decnet/af_decnet.c | 4 ---- net/decnet/dn_dev.c | 12 +++++------- net/decnet/dn_nsp_out.c | 11 ----------- 5 files changed, 5 insertions(+), 30 deletions(-) diff --git a/include/net/dn.h b/include/net/dn.h index 913b73d239f5..4394f7d5cfe8 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -122,13 +122,6 @@ struct dn_scp /* Session Control Port */ unsigned long keepalive; void (*keepalive_fxn)(struct sock *sk); - /* - * This stuff is for the fast timer for delayed acks - */ - struct timer_list delack_timer; - int delack_pending; - void (*delack_fxn)(struct sock *sk); - }; static inline struct dn_scp *DN_SK(struct sock *sk) diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 3a3e33d18456..413a15e5339c 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -17,7 +17,6 @@ void dn_nsp_send_data_ack(struct sock *sk); void dn_nsp_send_oth_ack(struct sock *sk); -void dn_nsp_delayed_ack(struct sock *sk); void dn_send_conn_ack(struct sock *sk); void dn_send_conn_conf(struct sock *sk, gfp_t gfp); void dn_nsp_send_disc(struct sock *sk, unsigned char type, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 73a0399dc7a2..d4c9a8bbad3e 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf scp->keepalive = 10 * HZ; scp->keepalive_fxn = dn_keepalive; - init_timer(&scp->delack_timer); - scp->delack_pending = 0; - scp->delack_fxn = dn_nsp_delayed_ack; - dn_start_slow_timer(sk); out: return sk; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4d339de56862..92dbaa3f1eae 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1038,14 +1038,14 @@ static void dn_eth_down(struct net_device *dev) static void dn_dev_set_timer(struct net_device *dev); -static void dn_dev_timer_func(unsigned long arg) +static void dn_dev_timer_func(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct dn_dev *dn_db; + struct dn_dev *dn_db = from_timer(dn_db, t, timer); + struct net_device *dev; struct dn_ifaddr *ifa; rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); + dev = dn_db->dev; if (dn_db->t3 <= dn_db->parms.t2) { if (dn_db->parms.timer3) { for (ifa = rcu_dereference(dn_db->ifa_list); @@ -1070,8 +1070,6 @@ static void dn_dev_set_timer(struct net_device *dev) if (dn_db->parms.t2 > dn_db->parms.t3) dn_db->parms.t2 = dn_db->parms.t3; - dn_db->timer.data = (unsigned long)dev; - dn_db->timer.function = dn_dev_timer_func; dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); add_timer(&dn_db->timer); @@ -1100,7 +1098,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; - init_timer(&dn_db->timer); + timer_setup(&dn_db->timer, dn_dev_timer_func, 0); dn_db->uptime = jiffies; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 66f035e476ea..e50a4adfcf7e 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -491,17 +491,6 @@ void dn_send_conn_ack (struct sock *sk) dn_nsp_send(skb); } -void dn_nsp_delayed_ack(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->ackxmt_oth != scp->numoth_rcv) - dn_nsp_send_oth_ack(sk); - - if (scp->ackxmt_dat != scp->numdat_rcv) - dn_nsp_send_data_ack(sk); -} - static int dn_nsp_retrans_conn_conf(struct sock *sk) { struct dn_scp *scp = DN_SK(sk); From 83a37b3292f4aca799b355179ad6fbdd78a08e10 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:46 -0700 Subject: [PATCH 1081/2177] net/lapb: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Hans Liljestrand Cc: "Reshetova, Elena" Cc: linux-x25@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/lapb/lapb_iface.c | 4 ++-- net/lapb/lapb_timer.c | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index e15314e3b464..db6e0afe3a20 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void) skb_queue_head_init(&lapb->write_queue); skb_queue_head_init(&lapb->ack_queue); - init_timer(&lapb->t1timer); - init_timer(&lapb->t2timer); + timer_setup(&lapb->t1timer, NULL, 0); + timer_setup(&lapb->t2timer, NULL, 0); lapb->t1 = LAPB_DEFAULT_T1; lapb->t2 = LAPB_DEFAULT_T2; diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 1a5535bc3b8d..8bb469cb3abe 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -35,15 +35,14 @@ #include #include -static void lapb_t1timer_expiry(unsigned long); -static void lapb_t2timer_expiry(unsigned long); +static void lapb_t1timer_expiry(struct timer_list *); +static void lapb_t2timer_expiry(struct timer_list *); void lapb_start_t1timer(struct lapb_cb *lapb) { del_timer(&lapb->t1timer); - lapb->t1timer.data = (unsigned long)lapb; - lapb->t1timer.function = &lapb_t1timer_expiry; + lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry; lapb->t1timer.expires = jiffies + lapb->t1; add_timer(&lapb->t1timer); @@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb) { del_timer(&lapb->t2timer); - lapb->t2timer.data = (unsigned long)lapb; - lapb->t2timer.function = &lapb_t2timer_expiry; + lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry; lapb->t2timer.expires = jiffies + lapb->t2; add_timer(&lapb->t2timer); @@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb) return timer_pending(&lapb->t1timer); } -static void lapb_t2timer_expiry(unsigned long param) +static void lapb_t2timer_expiry(struct timer_list *t) { - struct lapb_cb *lapb = (struct lapb_cb *)param; + struct lapb_cb *lapb = from_timer(lapb, t, t2timer); if (lapb->condition & LAPB_ACK_PENDING_CONDITION) { lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; @@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param) } } -static void lapb_t1timer_expiry(unsigned long param) +static void lapb_t1timer_expiry(struct timer_list *t) { - struct lapb_cb *lapb = (struct lapb_cb *)param; + struct lapb_cb *lapb = from_timer(lapb, t, t1timer); switch (lapb->state) { From 4966babd904d7f8e9e20735f3637a98fd7ca538c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:47 -0700 Subject: [PATCH 1082/2177] net/rose: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Ralf Baechle Cc: "David S. Miller" Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/rose/af_rose.c | 17 +++++++++-------- net/rose/rose_link.c | 16 +++++++--------- net/rose/rose_loopback.c | 9 +++------ net/rose/rose_route.c | 8 ++++---- net/rose/rose_timer.c | 30 +++++++++++++----------------- 5 files changed, 36 insertions(+), 44 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4a9729257023..6a5c4992cf61 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *); /* * Handler for deferred kills. */ -static void rose_destroy_timer(unsigned long data) +static void rose_destroy_timer(struct timer_list *t) { - rose_destroy_socket((struct sock *)data); + struct sock *sk = from_timer(sk, t, sk_timer); + + rose_destroy_socket(sk); } /* @@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - setup_timer(&sk->sk_timer, rose_destroy_timer, - (unsigned long)sk); + timer_setup(&sk->sk_timer, rose_destroy_timer, 0); sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); } else @@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rose_proto_ops; sk->sk_protocol = protocol; - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); @@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk) sk->sk_state = TCP_ESTABLISHED; sock_copy_flags(sk, osk); - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); orose = rose_sk(osk); rose->t1 = orose->t1; diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index c76638cc2cd5..cda4c6678ef1 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -27,8 +27,8 @@ #include #include -static void rose_ftimer_expiry(unsigned long); -static void rose_t0timer_expiry(unsigned long); +static void rose_ftimer_expiry(struct timer_list *); +static void rose_t0timer_expiry(struct timer_list *); static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); static void rose_transmit_restart_request(struct rose_neigh *neigh); @@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) { del_timer(&neigh->ftimer); - neigh->ftimer.data = (unsigned long)neigh; - neigh->ftimer.function = &rose_ftimer_expiry; + neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry; neigh->ftimer.expires = jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); @@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh) { del_timer(&neigh->t0timer); - neigh->t0timer.data = (unsigned long)neigh; - neigh->t0timer.function = &rose_t0timer_expiry; + neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry; neigh->t0timer.expires = jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); @@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh) return timer_pending(&neigh->t0timer); } -static void rose_ftimer_expiry(unsigned long param) +static void rose_ftimer_expiry(struct timer_list *t) { } -static void rose_t0timer_expiry(unsigned long param) +static void rose_t0timer_expiry(struct timer_list *t) { - struct rose_neigh *neigh = (struct rose_neigh *)param; + struct rose_neigh *neigh = from_timer(neigh, t, t0timer); rose_transmit_restart_request(neigh); diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 344456206b70..7af4f99c4a93 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue; static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); +static void rose_loopback_timer(struct timer_list *unused); void rose_loopback_init(void) { skb_queue_head_init(&loopback_queue); - init_timer(&loopback_timer); + timer_setup(&loopback_timer, rose_loopback_timer, 0); } static int rose_loopback_running(void) @@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) return 1; } -static void rose_loopback_timer(unsigned long); static void rose_set_loopback_timer(void) { del_timer(&loopback_timer); - loopback_timer.data = 0; - loopback_timer.function = &rose_loopback_timer; loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); } -static void rose_loopback_timer(unsigned long param) +static void rose_loopback_timer(struct timer_list *unused) { struct sk_buff *skb; struct net_device *dev; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 452bbb38d943..65921cd10323 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, skb_queue_head_init(&rose_neigh->queue); - init_timer(&rose_neigh->ftimer); - init_timer(&rose_neigh->t0timer); + timer_setup(&rose_neigh->ftimer, NULL, 0); + timer_setup(&rose_neigh->t0timer, NULL, 0); if (rose_route->ndigis != 0) { rose_neigh->digipeat = @@ -390,8 +390,8 @@ void rose_add_loopback_neigh(void) skb_queue_head_init(&sn->queue); - init_timer(&sn->ftimer); - init_timer(&sn->t0timer); + timer_setup(&sn->ftimer, NULL, 0); + timer_setup(&sn->t0timer, NULL, 0); spin_lock_bh(&rose_neigh_list_lock); sn->next = rose_neigh_list; diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index bc5469d6d9cb..3b89d66f15bb 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -29,8 +29,8 @@ #include static void rose_heartbeat_expiry(unsigned long); -static void rose_timer_expiry(unsigned long); -static void rose_idletimer_expiry(unsigned long); +static void rose_timer_expiry(struct timer_list *); +static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { @@ -49,8 +49,7 @@ void rose_start_t1timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; add_timer(&rose->timer); @@ -62,8 +61,7 @@ void rose_start_t2timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; add_timer(&rose->timer); @@ -75,8 +73,7 @@ void rose_start_t3timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; add_timer(&rose->timer); @@ -88,8 +85,7 @@ void rose_start_hbtimer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; add_timer(&rose->timer); @@ -102,8 +98,7 @@ void rose_start_idletimer(struct sock *sk) del_timer(&rose->idletimer); if (rose->idle > 0) { - rose->idletimer.data = (unsigned long)sk; - rose->idletimer.function = &rose_idletimer_expiry; + rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; add_timer(&rose->idletimer); @@ -163,10 +158,10 @@ static void rose_heartbeat_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_timer_expiry(unsigned long param) +static void rose_timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct rose_sock *rose = rose_sk(sk); + struct rose_sock *rose = from_timer(rose, t, timer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); switch (rose->state) { @@ -192,9 +187,10 @@ static void rose_timer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_idletimer_expiry(unsigned long param) +static void rose_idletimer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct rose_sock *rose = from_timer(rose, t, idletimer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); rose_clear_queues(sk); From 0010e3f8b3537b8e7c8a8e7249f9d184e92df1a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:50 -0700 Subject: [PATCH 1083/2177] net/ti/tlan: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Samuel Chessman Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/tlan.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index c8d53d8c83ee..8f53d762fbc4 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -172,7 +172,8 @@ static u32 tlan_handle_tx_eoc(struct net_device *, u16); static u32 tlan_handle_status_check(struct net_device *, u16); static u32 tlan_handle_rx_eoc(struct net_device *, u16); -static void tlan_timer(unsigned long); +static void tlan_timer(struct timer_list *t); +static void tlan_phy_monitor(struct timer_list *t); static void tlan_reset_lists(struct net_device *); static void tlan_free_lists(struct net_device *); @@ -190,7 +191,6 @@ static void tlan_phy_power_up(struct net_device *); static void tlan_phy_reset(struct net_device *); static void tlan_phy_start_link(struct net_device *); static void tlan_phy_finish_auto_neg(struct net_device *); -static void tlan_phy_monitor(unsigned long); /* static int tlan_phy_nop(struct net_device *); @@ -254,11 +254,10 @@ tlan_set_timer(struct net_device *dev, u32 ticks, u32 type) spin_unlock_irqrestore(&priv->lock, flags); return; } - priv->timer.function = tlan_timer; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; if (!in_irq()) spin_unlock_irqrestore(&priv->lock, flags); - priv->timer.data = (unsigned long) dev; priv->timer_set_at = jiffies; priv->timer_type = type; mod_timer(&priv->timer, jiffies + ticks); @@ -926,8 +925,8 @@ static int tlan_open(struct net_device *dev) return err; } - init_timer(&priv->timer); - init_timer(&priv->media_timer); + timer_setup(&priv->timer, NULL, 0); + timer_setup(&priv->media_timer, tlan_phy_monitor, 0); tlan_start(dev); @@ -1426,8 +1425,7 @@ static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int) tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT); if (priv->timer.function == NULL) { - priv->timer.function = tlan_timer; - priv->timer.data = (unsigned long) dev; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timer_set_at = jiffies; priv->timer_type = TLAN_TIMER_ACTIVITY; @@ -1578,8 +1576,7 @@ drop_and_reuse: tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT); if (priv->timer.function == NULL) { - priv->timer.function = tlan_timer; - priv->timer.data = (unsigned long) dev; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timer_set_at = jiffies; priv->timer_type = TLAN_TIMER_ACTIVITY; @@ -1836,10 +1833,10 @@ ThunderLAN driver timer function * **************************************************************/ -static void tlan_timer(unsigned long data) +static void tlan_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct tlan_priv *priv = netdev_priv(dev); + struct tlan_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; u32 elapsed; unsigned long flags = 0; @@ -1872,7 +1869,6 @@ static void tlan_timer(unsigned long data) tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK); } else { - priv->timer.function = tlan_timer; priv->timer.expires = priv->timer_set_at + TLAN_TIMER_ACT_DELAY; spin_unlock_irqrestore(&priv->lock, flags); @@ -2317,8 +2313,6 @@ tlan_finish_reset(struct net_device *dev) } else netdev_info(dev, "Link active\n"); /* Enabling link beat monitoring */ - priv->media_timer.function = tlan_phy_monitor; - priv->media_timer.data = (unsigned long) dev; priv->media_timer.expires = jiffies + HZ; add_timer(&priv->media_timer); } @@ -2763,10 +2757,10 @@ static void tlan_phy_finish_auto_neg(struct net_device *dev) * *******************************************************************/ -static void tlan_phy_monitor(unsigned long data) +static void tlan_phy_monitor(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct tlan_priv *priv = netdev_priv(dev); + struct tlan_priv *priv = from_timer(priv, t, media_timer); + struct net_device *dev = priv->dev; u16 phy; u16 phy_status; From 2183c1a61ccc952a836a533c88288289affa2a59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:51 -0700 Subject: [PATCH 1084/2177] net/usb/usbnet: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Since the callback is called from both a timer and a tasklet, adjust the tasklet to pass the timer address too. When tasklets have their .data field removed, this can be refactored to call a central function after resolving the correct container_of() for a separate callback function for timer and tasklet. Cc: Oliver Neukum Cc: netdev@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 6510e5cc1817..80348b6a8646 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1509,9 +1509,9 @@ err: // tasklet (work deferred from completions, in_irq) or timer -static void usbnet_bh (unsigned long param) +static void usbnet_bh (struct timer_list *t) { - struct usbnet *dev = (struct usbnet *) param; + struct usbnet *dev = from_timer(dev, t, delay); struct sk_buff *skb; struct skb_data *entry; @@ -1694,13 +1694,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); - dev->bh.func = usbnet_bh; - dev->bh.data = (unsigned long) dev; + dev->bh.func = (void (*)(unsigned long))usbnet_bh; + dev->bh.data = (unsigned long)&dev->delay; INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); - dev->delay.function = usbnet_bh; - dev->delay.data = (unsigned long) dev; - init_timer (&dev->delay); + timer_setup(&dev->delay, usbnet_bh, 0); mutex_init (&dev->phy_mutex); mutex_init(&dev->interrupt_mutex); dev->interrupt_count = 0; From d8eb7e262d0d438b23e69d836175b9cec3e4bb90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:52 -0700 Subject: [PATCH 1085/2177] net/wireless/ray_cs: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wireless/ray_cs.c | 53 ++++++++++++++++------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 170cd504e8ff..d8afcdfca1ed 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -92,7 +92,7 @@ static const struct iw_handler_def ray_handler_def; /***** Prototypes for raylink functions **************************************/ static void authenticate(ray_dev_t *local); static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type); -static void authenticate_timeout(u_long); +static void authenticate_timeout(struct timer_list *t); static int get_free_ccs(ray_dev_t *local); static int get_free_tx_ccs(ray_dev_t *local); static void init_startup_params(ray_dev_t *local); @@ -102,7 +102,7 @@ static int ray_init(struct net_device *dev); static int interrupt_ecf(ray_dev_t *local, int ccs); static void ray_reset(struct net_device *dev); static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, int len); -static void verify_dl_startup(u_long); +static void verify_dl_startup(struct timer_list *t); /* Prototypes for interrpt time functions **********************************/ static irqreturn_t ray_interrupt(int reg, void *dev_id); @@ -120,9 +120,8 @@ static void associate(ray_dev_t *local); /* Card command functions */ static int dl_startup_params(struct net_device *dev); -static void join_net(u_long local); -static void start_net(u_long local); -/* void start_net(ray_dev_t *local); */ +static void join_net(struct timer_list *t); +static void start_net(struct timer_list *t); /*===========================================================================*/ /* Parameters that can be set with 'insmod' */ @@ -323,7 +322,7 @@ static int ray_probe(struct pcmcia_device *p_dev) dev_dbg(&p_dev->dev, "ray_cs ray_attach calling ether_setup.)\n"); netif_stop_queue(dev); - init_timer(&local->timer); + timer_setup(&local->timer, NULL, 0); this_device = p_dev; return ray_config(p_dev); @@ -570,8 +569,7 @@ static int dl_startup_params(struct net_device *dev) local->card_status = CARD_DL_PARAM; /* Start kernel timer to wait for dl startup to complete. */ local->timer.expires = jiffies + HZ / 2; - local->timer.data = (long)local; - local->timer.function = verify_dl_startup; + local->timer.function = (TIMER_FUNC_TYPE)verify_dl_startup; add_timer(&local->timer); dev_dbg(&link->dev, "ray_cs dl_startup_params started timer for verify_dl_startup\n"); @@ -641,9 +639,9 @@ static void init_startup_params(ray_dev_t *local) } /* init_startup_params */ /*===========================================================================*/ -static void verify_dl_startup(u_long data) +static void verify_dl_startup(struct timer_list *t) { - ray_dev_t *local = (ray_dev_t *) data; + ray_dev_t *local = from_timer(local, t, timer); struct ccs __iomem *pccs = ccs_base(local) + local->dl_param_ccs; UCHAR status; struct pcmcia_device *link = local->finder; @@ -676,16 +674,16 @@ static void verify_dl_startup(u_long data) return; } if (local->sparm.b4.a_network_type == ADHOC) - start_net((u_long) local); + start_net(&local->timer); else - join_net((u_long) local); + join_net(&local->timer); } /* end verify_dl_startup */ /*===========================================================================*/ /* Command card to start a network */ -static void start_net(u_long data) +static void start_net(struct timer_list *t) { - ray_dev_t *local = (ray_dev_t *) data; + ray_dev_t *local = from_timer(local, t, timer); struct ccs __iomem *pccs; int ccsindex; struct pcmcia_device *link = local->finder; @@ -710,9 +708,9 @@ static void start_net(u_long data) /*===========================================================================*/ /* Command card to join a network */ -static void join_net(u_long data) +static void join_net(struct timer_list *t) { - ray_dev_t *local = (ray_dev_t *) data; + ray_dev_t *local = from_timer(local, t, timer); struct ccs __iomem *pccs; int ccsindex; @@ -1639,13 +1637,13 @@ static int get_free_ccs(ray_dev_t *local) } /* get_free_ccs */ /*===========================================================================*/ -static void authenticate_timeout(u_long data) +static void authenticate_timeout(struct timer_list *t) { - ray_dev_t *local = (ray_dev_t *) data; + ray_dev_t *local = from_timer(local, t, timer); del_timer(&local->timer); printk(KERN_INFO "ray_cs Authentication with access point failed" " - timeout\n"); - join_net((u_long) local); + join_net(&local->timer); } /*===========================================================================*/ @@ -1945,17 +1943,16 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) del_timer(&local->timer); local->timer.expires = jiffies + HZ * 5; - local->timer.data = (long)local; if (status == CCS_START_NETWORK) { dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" start failed\n", memtmp); - local->timer.function = start_net; + local->timer.function = (TIMER_FUNC_TYPE)start_net; } else { dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" join failed\n", memtmp); - local->timer.function = join_net; + local->timer.function = (TIMER_FUNC_TYPE)join_net; } add_timer(&local->timer); } @@ -1967,7 +1964,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) } else { dev_dbg(&link->dev, "ray_cs association failed,\n"); local->card_status = CARD_ASSOC_FAILED; - join_net((u_long) local); + join_net(&local->timer); } break; case CCS_TX_REQUEST: @@ -2420,12 +2417,11 @@ static void authenticate(ray_dev_t *local) del_timer(&local->timer); if (build_auth_frame(local, local->bss_id, OPEN_AUTH_REQUEST)) { - local->timer.function = join_net; + local->timer.function = (TIMER_FUNC_TYPE)join_net; } else { - local->timer.function = authenticate_timeout; + local->timer.function = (TIMER_FUNC_TYPE)authenticate_timeout; } local->timer.expires = jiffies + HZ * 2; - local->timer.data = (long)local; add_timer(&local->timer); local->authentication_state = AWAITING_RESPONSE; } /* end authenticate */ @@ -2468,7 +2464,7 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs, } else { pr_debug("Authentication refused\n"); local->card_status = CARD_AUTH_REFUSED; - join_net((u_long) local); + join_net(&local->timer); local->authentication_state = UNAUTHENTICATED; } @@ -2506,8 +2502,7 @@ static void associate(ray_dev_t *local) del_timer(&local->timer); local->timer.expires = jiffies + HZ * 2; - local->timer.data = (long)local; - local->timer.function = join_net; + local->timer.function = (TIMER_FUNC_TYPE)join_net; add_timer(&local->timer); local->card_status = CARD_ASSOC_FAILED; return; From 5e8b824d91b43baa3125ab941e42137ef458e72f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:54 -0700 Subject: [PATCH 1086/2177] isdn/hisax: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Karsten Keil Cc: Geliang Tang Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/hisax/amd7930_fn.c | 6 +++--- drivers/isdn/hisax/arcofi.c | 6 +++--- drivers/isdn/hisax/diva.c | 9 ++++----- drivers/isdn/hisax/elsa.c | 10 +++++----- drivers/isdn/hisax/fsm.c | 7 +++---- drivers/isdn/hisax/hfc4s8s_l1.c | 6 +++--- drivers/isdn/hisax/hfc_2bds0.c | 4 ++-- drivers/isdn/hisax/hfc_pci.c | 9 +++++---- drivers/isdn/hisax/hfc_sx.c | 9 +++++---- drivers/isdn/hisax/hfc_usb.c | 10 ++++++---- drivers/isdn/hisax/hfcscard.c | 5 +++-- drivers/isdn/hisax/icc.c | 6 +++--- drivers/isdn/hisax/ipacx.c | 8 ++++---- drivers/isdn/hisax/isac.c | 6 +++--- drivers/isdn/hisax/isar.c | 9 ++++----- drivers/isdn/hisax/isdnl3.c | 6 +++--- drivers/isdn/hisax/saphir.c | 7 +++---- drivers/isdn/hisax/teleint.c | 5 +++-- drivers/isdn/hisax/w6692.c | 7 +++---- 19 files changed, 68 insertions(+), 67 deletions(-) diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c index dcf4c2a9fcea..77debda2221b 100644 --- a/drivers/isdn/hisax/amd7930_fn.c +++ b/drivers/isdn/hisax/amd7930_fn.c @@ -398,7 +398,6 @@ Amd7930_fill_Dfifo(struct IsdnCardState *cs) debugl1(cs, "Amd7930: fill_Dfifo dbusytimer running"); del_timer(&cs->dbusytimer); } - init_timer(&cs->dbusytimer); cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000); add_timer(&cs->dbusytimer); @@ -686,8 +685,9 @@ DC_Close_Amd7930(struct IsdnCardState *cs) { static void -dbusy_timer_handler(struct IsdnCardState *cs) +dbusy_timer_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dbusytimer); u_long flags; struct PStack *stptr; WORD dtcr, der; @@ -790,5 +790,5 @@ void Amd7930_init(struct IsdnCardState *cs) void setup_Amd7930(struct IsdnCardState *cs) { INIT_WORK(&cs->tqueue, Amd7930_bh); - setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs); + timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0); } diff --git a/drivers/isdn/hisax/arcofi.c b/drivers/isdn/hisax/arcofi.c index 9826bad49e2c..2f784f96d439 100644 --- a/drivers/isdn/hisax/arcofi.c +++ b/drivers/isdn/hisax/arcofi.c @@ -23,7 +23,6 @@ add_arcofi_timer(struct IsdnCardState *cs) { if (test_and_set_bit(FLG_ARCOFI_TIMER, &cs->HW_Flags)) { del_timer(&cs->dc.isac.arcofitimer); } - init_timer(&cs->dc.isac.arcofitimer); cs->dc.isac.arcofitimer.expires = jiffies + ((ARCOFI_TIMER_VALUE * HZ) / 1000); add_timer(&cs->dc.isac.arcofitimer); } @@ -112,7 +111,8 @@ arcofi_fsm(struct IsdnCardState *cs, int event, void *data) { } static void -arcofi_timer(struct IsdnCardState *cs) { +arcofi_timer(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dc.isac.arcofitimer); arcofi_fsm(cs, ARCOFI_TIMEOUT, NULL); } @@ -125,7 +125,7 @@ clear_arcofi(struct IsdnCardState *cs) { void init_arcofi(struct IsdnCardState *cs) { - setup_timer(&cs->dc.isac.arcofitimer, (void *)arcofi_timer, (long)cs); + timer_setup(&cs->dc.isac.arcofitimer, arcofi_timer, 0); init_waitqueue_head(&cs->dc.isac.arcofi_wait); test_and_set_bit(HW_ARCOFI, &cs->HW_Flags); } diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c index 3fc94e7741ae..38bdd3f7b960 100644 --- a/drivers/isdn/hisax/diva.c +++ b/drivers/isdn/hisax/diva.c @@ -798,8 +798,9 @@ reset_diva(struct IsdnCardState *cs) #define DIVA_ASSIGN 1 static void -diva_led_handler(struct IsdnCardState *cs) +diva_led_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.diva.tl); int blink = 0; if ((cs->subtyp == DIVA_IPAC_ISA) || @@ -828,7 +829,6 @@ diva_led_handler(struct IsdnCardState *cs) byteout(cs->hw.diva.ctrl, cs->hw.diva.ctrl_reg); if (blink) { - init_timer(&cs->hw.diva.tl); cs->hw.diva.tl.expires = jiffies + ((blink * HZ) / 1000); add_timer(&cs->hw.diva.tl); } @@ -900,7 +900,7 @@ Diva_card_msg(struct IsdnCardState *cs, int mt, void *arg) (cs->subtyp != DIVA_IPAC_PCI) && (cs->subtyp != DIVA_IPACX_PCI)) { spin_lock_irqsave(&cs->lock, flags); - diva_led_handler(cs); + diva_led_handler(&cs->hw.diva.tl); spin_unlock_irqrestore(&cs->lock, flags); } return (0); @@ -978,8 +978,7 @@ static int setup_diva_common(struct IsdnCardState *cs) printk(KERN_INFO "Diva: IPACX Design Id: %x\n", MemReadISAC_IPACX(cs, IPACX_ID) & 0x3F); } else { /* DIVA 2.0 */ - setup_timer(&cs->hw.diva.tl, (void *)diva_led_handler, - (long)cs); + timer_setup(&cs->hw.diva.tl, diva_led_handler, 0); cs->readisac = &ReadISAC; cs->writeisac = &WriteISAC; cs->readisacfifo = &ReadISACfifo; diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c index 03bc5d504e22..b21c05820f44 100644 --- a/drivers/isdn/hisax/elsa.c +++ b/drivers/isdn/hisax/elsa.c @@ -606,8 +606,9 @@ check_arcofi(struct IsdnCardState *cs) #endif /* ARCOFI_USE */ static void -elsa_led_handler(struct IsdnCardState *cs) +elsa_led_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.elsa.tl); int blink = 0; if (cs->subtyp == ELSA_PCMCIA || cs->subtyp == ELSA_PCMCIA_IPAC) @@ -640,7 +641,6 @@ elsa_led_handler(struct IsdnCardState *cs) } else byteout(cs->hw.elsa.ctrl, cs->hw.elsa.ctrl_reg); if (blink) { - init_timer(&cs->hw.elsa.tl); cs->hw.elsa.tl.expires = jiffies + ((blink * HZ) / 1000); add_timer(&cs->hw.elsa.tl); } @@ -715,7 +715,7 @@ Elsa_card_msg(struct IsdnCardState *cs, int mt, void *arg) init_modem(cs); } #endif - elsa_led_handler(cs); + elsa_led_handler(&cs->hw.elsa.tl); return (ret); case (MDL_REMOVE | REQUEST): cs->hw.elsa.status &= 0; @@ -767,7 +767,7 @@ Elsa_card_msg(struct IsdnCardState *cs, int mt, void *arg) else cs->hw.elsa.status &= ~ELSA_BAD_PWR; } - elsa_led_handler(cs); + elsa_led_handler(&cs->hw.elsa.tl); return (ret); } @@ -1147,7 +1147,7 @@ static int setup_elsa_common(struct IsdnCard *card) init_arcofi(cs); #endif setup_isac(cs); - setup_timer(&cs->hw.elsa.tl, (void *)elsa_led_handler, (long)cs); + timer_setup(&cs->hw.elsa.tl, elsa_led_handler, 0); /* Teste Timer */ if (cs->hw.elsa.timer) { byteout(cs->hw.elsa.trig, 0xff); diff --git a/drivers/isdn/hisax/fsm.c b/drivers/isdn/hisax/fsm.c index d63266fa8cbd..3e020ec0f65e 100644 --- a/drivers/isdn/hisax/fsm.c +++ b/drivers/isdn/hisax/fsm.c @@ -85,8 +85,9 @@ FsmChangeState(struct FsmInst *fi, int newstate) } static void -FsmExpireTimer(struct FsmTimer *ft) +FsmExpireTimer(struct timer_list *t) { + struct FsmTimer *ft = from_timer(ft, t, tl); #if FSM_TIMER_DEBUG if (ft->fi->debug) ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft); @@ -102,7 +103,7 @@ FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft) if (ft->fi->debug) ft->fi->printdebug(ft->fi, "FsmInitTimer %lx", (long) ft); #endif - setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft); + timer_setup(&ft->tl, FsmExpireTimer, 0); } void @@ -131,7 +132,6 @@ FsmAddTimer(struct FsmTimer *ft, ft->fi->printdebug(ft->fi, "FsmAddTimer already active!"); return -1; } - init_timer(&ft->tl); ft->event = event; ft->arg = arg; ft->tl.expires = jiffies + (millisec * HZ) / 1000; @@ -152,7 +152,6 @@ FsmRestartTimer(struct FsmTimer *ft, if (timer_pending(&ft->tl)) del_timer(&ft->tl); - init_timer(&ft->tl); ft->event = event; ft->arg = arg; ft->tl.expires = jiffies + (millisec * HZ) / 1000; diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c index 9090cc1e1f29..e9bb8fb67ad0 100644 --- a/drivers/isdn/hisax/hfc4s8s_l1.c +++ b/drivers/isdn/hisax/hfc4s8s_l1.c @@ -591,8 +591,9 @@ bch_l2l1(struct hisax_if *ifc, int pr, void *arg) /* layer 1 timer function */ /**************************/ static void -hfc_l1_timer(struct hfc4s8s_l1 *l1) +hfc_l1_timer(struct timer_list *t) { + struct hfc4s8s_l1 *l1 = from_timer(l1, t, l1_timer); u_long flags; if (!l1->enabled) @@ -1396,8 +1397,7 @@ setup_instance(hfc4s8s_hw *hw) l1p = hw->l1 + i; spin_lock_init(&l1p->lock); l1p->hw = hw; - setup_timer(&l1p->l1_timer, (void *)hfc_l1_timer, - (long)(l1p)); + timer_setup(&l1p->l1_timer, hfc_l1_timer, 0); l1p->st_num = i; skb_queue_head_init(&l1p->d_tx_queue); l1p->d_if.ifc.priv = hw->l1 + i; diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c index ad8597a1a07e..86b82172e992 100644 --- a/drivers/isdn/hisax/hfc_2bds0.c +++ b/drivers/isdn/hisax/hfc_2bds0.c @@ -1014,7 +1014,7 @@ setstack_hfcd(struct PStack *st, struct IsdnCardState *cs) } static void -hfc_dbusy_timer(struct IsdnCardState *cs) +hfc_dbusy_timer(struct timer_list *t) { } @@ -1073,6 +1073,6 @@ set_cs_func(struct IsdnCardState *cs) cs->writeisacfifo = &dummyf; cs->BC_Read_Reg = &ReadReg; cs->BC_Write_Reg = &WriteReg; - setup_timer(&cs->dbusytimer, (void *)hfc_dbusy_timer, (long)cs); + timer_setup(&cs->dbusytimer, hfc_dbusy_timer, 0); INIT_WORK(&cs->tqueue, hfcd_bh); } diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index f9ca35cc32b1..8e5b03161b2f 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -165,8 +165,9 @@ reset_hfcpci(struct IsdnCardState *cs) /* Timer function called when kernel timer expires */ /***************************************************/ static void -hfcpci_Timer(struct IsdnCardState *cs) +hfcpci_Timer(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.hfcpci.timer); cs->hw.hfcpci.timer.expires = jiffies + 75; /* WD RESET */ /* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcpci.ctmt | 0x80); @@ -1095,7 +1096,7 @@ hfcpci_interrupt(int intno, void *dev_id) /* timer callback for D-chan busy resolution. Currently no function */ /********************************************************************/ static void -hfcpci_dbusy_timer(struct IsdnCardState *cs) +hfcpci_dbusy_timer(struct timer_list *t) { } @@ -1582,7 +1583,7 @@ inithfcpci(struct IsdnCardState *cs) cs->bcs[1].BC_SetStack = setstack_2b; cs->bcs[0].BC_Close = close_hfcpci; cs->bcs[1].BC_Close = close_hfcpci; - setup_timer(&cs->dbusytimer, (void *)hfcpci_dbusy_timer, (long)cs); + timer_setup(&cs->dbusytimer, hfcpci_dbusy_timer, 0); mode_hfcpci(cs->bcs, 0, 0); mode_hfcpci(cs->bcs + 1, 0, 1); } @@ -1744,7 +1745,7 @@ setup_hfcpci(struct IsdnCard *card) cs->BC_Write_Reg = NULL; cs->irq_func = &hfcpci_interrupt; cs->irq_flags |= IRQF_SHARED; - setup_timer(&cs->hw.hfcpci.timer, (void *)hfcpci_Timer, (long)cs); + timer_setup(&cs->hw.hfcpci.timer, hfcpci_Timer, 0); cs->cardmsg = &hfcpci_card_msg; cs->auxcmd = &hfcpci_auxcmd; diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index 3aef8e1a90e4..d925f579bc80 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -418,8 +418,9 @@ reset_hfcsx(struct IsdnCardState *cs) /* Timer function called when kernel timer expires */ /***************************************************/ static void -hfcsx_Timer(struct IsdnCardState *cs) +hfcsx_Timer(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.hfcsx.timer); cs->hw.hfcsx.timer.expires = jiffies + 75; /* WD RESET */ /* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcsx.ctmt | 0x80); @@ -860,7 +861,7 @@ hfcsx_interrupt(int intno, void *dev_id) /* timer callback for D-chan busy resolution. Currently no function */ /********************************************************************/ static void -hfcsx_dbusy_timer(struct IsdnCardState *cs) +hfcsx_dbusy_timer(struct timer_list *t) { } @@ -1495,7 +1496,7 @@ int setup_hfcsx(struct IsdnCard *card) } else return (0); /* no valid card type */ - setup_timer(&cs->dbusytimer, (void *)hfcsx_dbusy_timer, (long)cs); + timer_setup(&cs->dbusytimer, hfcsx_dbusy_timer, 0); INIT_WORK(&cs->tqueue, hfcsx_bh); cs->readisac = NULL; cs->writeisac = NULL; @@ -1507,7 +1508,7 @@ int setup_hfcsx(struct IsdnCard *card) cs->hw.hfcsx.b_fifo_size = 0; /* fifo size still unknown */ cs->hw.hfcsx.cirm = ccd_sp_irqtab[cs->irq & 0xF]; /* RAM not evaluated */ - setup_timer(&cs->hw.hfcsx.timer, (void *)hfcsx_Timer, (long)cs); + timer_setup(&cs->hw.hfcsx.timer, hfcsx_Timer, 0); reset_hfcsx(cs); cs->cardmsg = &hfcsx_card_msg; diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index e8212185d386..97ecb3073045 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -343,8 +343,9 @@ handle_led(hfcusb_data *hfc, int event) /* ISDN l1 timer T3 expires */ static void -l1_timer_expire_t3(hfcusb_data *hfc) +l1_timer_expire_t3(struct timer_list *t) { + hfcusb_data *hfc = from_timer(hfc, t, t3_timer); hfc->d_if.ifc.l1l2(&hfc->d_if.ifc, PH_DEACTIVATE | INDICATION, NULL); @@ -360,8 +361,9 @@ l1_timer_expire_t3(hfcusb_data *hfc) /* ISDN l1 timer T4 expires */ static void -l1_timer_expire_t4(hfcusb_data *hfc) +l1_timer_expire_t4(struct timer_list *t) { + hfcusb_data *hfc = from_timer(hfc, t, t4_timer); hfc->d_if.ifc.l1l2(&hfc->d_if.ifc, PH_DEACTIVATE | INDICATION, NULL); @@ -1165,10 +1167,10 @@ hfc_usb_init(hfcusb_data *hfc) hfc->old_led_state = 0; /* init the t3 timer */ - setup_timer(&hfc->t3_timer, (void *)l1_timer_expire_t3, (long)hfc); + timer_setup(&hfc->t3_timer, l1_timer_expire_t3, 0); /* init the t4 timer */ - setup_timer(&hfc->t4_timer, (void *)l1_timer_expire_t4, (long)hfc); + timer_setup(&hfc->t4_timer, l1_timer_expire_t4, 0); /* init the background machinery for control requests */ hfc->ctrl_read.bRequestType = 0xc0; diff --git a/drivers/isdn/hisax/hfcscard.c b/drivers/isdn/hisax/hfcscard.c index 467287096918..380bbeda9c74 100644 --- a/drivers/isdn/hisax/hfcscard.c +++ b/drivers/isdn/hisax/hfcscard.c @@ -41,8 +41,9 @@ hfcs_interrupt(int intno, void *dev_id) } static void -hfcs_Timer(struct IsdnCardState *cs) +hfcs_Timer(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.hfcD.timer); cs->hw.hfcD.timer.expires = jiffies + 75; /* WD RESET */ /* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcD.ctmt | 0x80); @@ -253,7 +254,7 @@ int setup_hfcs(struct IsdnCard *card) outb(0x57, cs->hw.hfcD.addr | 1); } set_cs_func(cs); - setup_timer(&cs->hw.hfcD.timer, (void *)hfcs_Timer, (long)cs); + timer_setup(&cs->hw.hfcD.timer, hfcs_Timer, 0); cs->cardmsg = &hfcs_card_msg; cs->irq_func = &hfcs_interrupt; return (1); diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c index 8d1804572b32..831dd1bb81ef 100644 --- a/drivers/isdn/hisax/icc.c +++ b/drivers/isdn/hisax/icc.c @@ -168,7 +168,6 @@ icc_fill_fifo(struct IsdnCardState *cs) debugl1(cs, "icc_fill_fifo dbusytimer running"); del_timer(&cs->dbusytimer); } - init_timer(&cs->dbusytimer); cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000); add_timer(&cs->dbusytimer); if (cs->debug & L1_DEB_ISAC_FIFO) { @@ -580,8 +579,9 @@ DC_Close_icc(struct IsdnCardState *cs) { } static void -dbusy_timer_handler(struct IsdnCardState *cs) +dbusy_timer_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dbusytimer); struct PStack *stptr; int rbch, star; @@ -676,5 +676,5 @@ clear_pending_icc_ints(struct IsdnCardState *cs) void setup_icc(struct IsdnCardState *cs) { INIT_WORK(&cs->tqueue, icc_bh); - setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs); + timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0); } diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c index c426b4fea28a..c7086c1534bd 100644 --- a/drivers/isdn/hisax/ipacx.c +++ b/drivers/isdn/hisax/ipacx.c @@ -35,7 +35,7 @@ static void ph_command(struct IsdnCardState *cs, unsigned int command); static inline void cic_int(struct IsdnCardState *cs); static void dch_l2l1(struct PStack *st, int pr, void *arg); -static void dbusy_timer_handler(struct IsdnCardState *cs); +static void dbusy_timer_handler(struct timer_list *t); static void dch_empty_fifo(struct IsdnCardState *cs, int count); static void dch_fill_fifo(struct IsdnCardState *cs); static inline void dch_int(struct IsdnCardState *cs); @@ -198,8 +198,9 @@ dch_l2l1(struct PStack *st, int pr, void *arg) //---------------------------------------------------------- //---------------------------------------------------------- static void -dbusy_timer_handler(struct IsdnCardState *cs) +dbusy_timer_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dbusytimer); struct PStack *st; int rbchd, stard; @@ -298,7 +299,6 @@ dch_fill_fifo(struct IsdnCardState *cs) debugl1(cs, "dch_fill_fifo dbusytimer running"); del_timer(&cs->dbusytimer); } - init_timer(&cs->dbusytimer); cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000); add_timer(&cs->dbusytimer); @@ -424,7 +424,7 @@ dch_init(struct IsdnCardState *cs) cs->setstack_d = dch_setstack; - setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs); + timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0); cs->writeisac(cs, IPACX_TR_CONF0, 0x00); // clear LDD cs->writeisac(cs, IPACX_TR_CONF2, 0x00); // enable transmitter diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c index ea965f29a555..bd40e0671ded 100644 --- a/drivers/isdn/hisax/isac.c +++ b/drivers/isdn/hisax/isac.c @@ -171,7 +171,6 @@ isac_fill_fifo(struct IsdnCardState *cs) debugl1(cs, "isac_fill_fifo dbusytimer running"); del_timer(&cs->dbusytimer); } - init_timer(&cs->dbusytimer); cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000); add_timer(&cs->dbusytimer); if (cs->debug & L1_DEB_ISAC_FIFO) { @@ -584,8 +583,9 @@ DC_Close_isac(struct IsdnCardState *cs) } static void -dbusy_timer_handler(struct IsdnCardState *cs) +dbusy_timer_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dbusytimer); struct PStack *stptr; int rbch, star; @@ -677,5 +677,5 @@ void clear_pending_isac_ints(struct IsdnCardState *cs) void setup_isac(struct IsdnCardState *cs) { INIT_WORK(&cs->tqueue, isac_bh); - setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs); + timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0); } diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c index 98b4b67ea337..d01ff116797b 100644 --- a/drivers/isdn/hisax/isar.c +++ b/drivers/isdn/hisax/isar.c @@ -1267,7 +1267,8 @@ isar_int_main(struct IsdnCardState *cs) } static void -ftimer_handler(struct BCState *bcs) { +ftimer_handler(struct timer_list *t) { + struct BCState *bcs = from_timer(bcs, t, hw.isar.ftimer); if (bcs->cs->debug) debugl1(bcs->cs, "ftimer flags %04lx", bcs->Flag); @@ -1902,8 +1903,6 @@ void initisar(struct IsdnCardState *cs) cs->bcs[1].BC_SetStack = setstack_isar; cs->bcs[0].BC_Close = close_isarstate; cs->bcs[1].BC_Close = close_isarstate; - setup_timer(&cs->bcs[0].hw.isar.ftimer, (void *)ftimer_handler, - (long)&cs->bcs[0]); - setup_timer(&cs->bcs[1].hw.isar.ftimer, (void *)ftimer_handler, - (long)&cs->bcs[1]); + timer_setup(&cs->bcs[0].hw.isar.ftimer, ftimer_handler, 0); + timer_setup(&cs->bcs[1].hw.isar.ftimer, ftimer_handler, 0); } diff --git a/drivers/isdn/hisax/isdnl3.c b/drivers/isdn/hisax/isdnl3.c index 569ce52c567b..bb3f9ec62749 100644 --- a/drivers/isdn/hisax/isdnl3.c +++ b/drivers/isdn/hisax/isdnl3.c @@ -160,8 +160,9 @@ newl3state(struct l3_process *pc, int state) } static void -L3ExpireTimer(struct L3Timer *t) +L3ExpireTimer(struct timer_list *timer) { + struct L3Timer *t = from_timer(t, timer, tl); t->pc->st->lli.l4l3(t->pc->st, t->event, t->pc); } @@ -169,7 +170,7 @@ void L3InitTimer(struct l3_process *pc, struct L3Timer *t) { t->pc = pc; - setup_timer(&t->tl, (void *)L3ExpireTimer, (long)t); + timer_setup(&t->tl, L3ExpireTimer, 0); } void @@ -186,7 +187,6 @@ L3AddTimer(struct L3Timer *t, printk(KERN_WARNING "L3AddTimer: timer already active!\n"); return -1; } - init_timer(&t->tl); t->event = event; t->tl.expires = jiffies + (millisec * HZ) / 1000; add_timer(&t->tl); diff --git a/drivers/isdn/hisax/saphir.c b/drivers/isdn/hisax/saphir.c index 6b2d0eccdd56..db906cb37a3f 100644 --- a/drivers/isdn/hisax/saphir.c +++ b/drivers/isdn/hisax/saphir.c @@ -159,8 +159,9 @@ Start_ISAC: } static void -SaphirWatchDog(struct IsdnCardState *cs) +SaphirWatchDog(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.saphir.timer); u_long flags; spin_lock_irqsave(&cs->lock, flags); @@ -268,9 +269,7 @@ int setup_saphir(struct IsdnCard *card) cs->irq, cs->hw.saphir.cfg_reg); setup_isac(cs); - cs->hw.saphir.timer.function = (void *) SaphirWatchDog; - cs->hw.saphir.timer.data = (long) cs; - init_timer(&cs->hw.saphir.timer); + timer_setup(&cs->hw.saphir.timer, SaphirWatchDog, 0); cs->hw.saphir.timer.expires = jiffies + 4 * HZ; add_timer(&cs->hw.saphir.timer); if (saphir_reset(cs)) { diff --git a/drivers/isdn/hisax/teleint.c b/drivers/isdn/hisax/teleint.c index 950399f066ef..247aa33076b1 100644 --- a/drivers/isdn/hisax/teleint.c +++ b/drivers/isdn/hisax/teleint.c @@ -179,8 +179,9 @@ Start_ISAC: } static void -TeleInt_Timer(struct IsdnCardState *cs) +TeleInt_Timer(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, hw.hfc.timer); int stat = 0; u_long flags; @@ -278,7 +279,7 @@ int setup_TeleInt(struct IsdnCard *card) cs->bcs[0].hw.hfc.send = NULL; cs->bcs[1].hw.hfc.send = NULL; cs->hw.hfc.fifosize = 7 * 1024 + 512; - setup_timer(&cs->hw.hfc.timer, (void *)TeleInt_Timer, (long)cs); + timer_setup(&cs->hw.hfc.timer, TeleInt_Timer, 0); if (!request_region(cs->hw.hfc.addr, 2, "TeleInt isdn")) { printk(KERN_WARNING "HiSax: TeleInt config port %x-%x already in use\n", diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c index 6f6733b7c1e4..c4be1644f5bb 100644 --- a/drivers/isdn/hisax/w6692.c +++ b/drivers/isdn/hisax/w6692.c @@ -188,7 +188,6 @@ W6692_fill_fifo(struct IsdnCardState *cs) debugl1(cs, "W6692_fill_fifo dbusytimer running"); del_timer(&cs->dbusytimer); } - init_timer(&cs->dbusytimer); cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000); add_timer(&cs->dbusytimer); if (cs->debug & L1_DEB_ISAC_FIFO) { @@ -684,8 +683,9 @@ DC_Close_W6692(struct IsdnCardState *cs) } static void -dbusy_timer_handler(struct IsdnCardState *cs) +dbusy_timer_handler(struct timer_list *t) { + struct IsdnCardState *cs = from_timer(cs, t, dbusytimer); struct PStack *stptr; int rbch, star; u_long flags; @@ -904,8 +904,7 @@ static void initW6692(struct IsdnCardState *cs, int part) if (part & 1) { cs->setstack_d = setstack_W6692; cs->DC_Close = DC_Close_W6692; - setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, - (long)cs); + timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0); resetW6692(cs); ph_command(cs, W_L1CMD_RST); cs->dc.w6692.ph_state = W_L1CMD_RST; From 8e763de0b91d9cdc68a80c4e5efd545f5fb644de Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:55 -0700 Subject: [PATCH 1087/2177] net/hamradio/6pack: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Andreas Koensgen Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 97fe8dfb602d..bbc7b7808a31 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -136,9 +136,9 @@ static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); * Note that in case of DAMA operation, the data is not sent here. */ -static void sp_xmit_on_air(unsigned long channel) +static void sp_xmit_on_air(struct timer_list *t) { - struct sixpack *sp = (struct sixpack *) channel; + struct sixpack *sp = from_timer(sp, t, tx_t); int actual, when = sp->slottime; static unsigned char random; @@ -229,7 +229,7 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len) sp->xleft = count; sp->xhead = sp->xbuff; sp->status2 = count; - sp_xmit_on_air((unsigned long)sp); + sp_xmit_on_air(&sp->tx_t); } return; @@ -500,9 +500,9 @@ static inline void tnc_set_sync_state(struct sixpack *sp, int new_tnc_state) __tnc_set_sync_state(sp, new_tnc_state); } -static void resync_tnc(unsigned long channel) +static void resync_tnc(struct timer_list *t) { - struct sixpack *sp = (struct sixpack *) channel; + struct sixpack *sp = from_timer(sp, t, resync_t); static char resync_cmd = 0xe8; /* clear any data that might have been received */ @@ -526,8 +526,6 @@ static void resync_tnc(unsigned long channel) /* Start resync timer again -- the TNC might be still absent */ del_timer(&sp->resync_t); - sp->resync_t.data = (unsigned long) sp; - sp->resync_t.function = resync_tnc; sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT; add_timer(&sp->resync_t); } @@ -541,8 +539,6 @@ static inline int tnc_init(struct sixpack *sp) sp->tty->ops->write(sp->tty, &inbyte, 1); del_timer(&sp->resync_t); - sp->resync_t.data = (unsigned long) sp; - sp->resync_t.function = resync_tnc; sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT; add_timer(&sp->resync_t); @@ -623,9 +619,9 @@ static int sixpack_open(struct tty_struct *tty) netif_start_queue(dev); - setup_timer(&sp->tx_t, sp_xmit_on_air, (unsigned long)sp); + timer_setup(&sp->tx_t, sp_xmit_on_air, 0); - init_timer(&sp->resync_t); + timer_setup(&sp->resync_t, resync_tnc, 0); spin_unlock_bh(&sp->lock); @@ -926,8 +922,6 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd) if (sp->tnc_state == TNC_IN_SYNC) { del_timer(&sp->resync_t); - sp->resync_t.data = (unsigned long) sp; - sp->resync_t.function = resync_tnc; sp->resync_t.expires = jiffies + SIXP_INIT_RESYNC_TIMEOUT; add_timer(&sp->resync_t); } From c3aed70953c39274bb55fe98e9b8344af1a7af75 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:56 -0700 Subject: [PATCH 1088/2177] xfrm: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() helper to pass the timer pointer explicitly. Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f06253969972..4838329bb43a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -57,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); -static void xfrm_policy_queue_process(unsigned long arg); +static void xfrm_policy_queue_process(struct timer_list *t); static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -179,9 +179,9 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static void xfrm_policy_timer(unsigned long data) +static void xfrm_policy_timer(struct timer_list *t) { - struct xfrm_policy *xp = (struct xfrm_policy *)data; + struct xfrm_policy *xp = from_timer(xp, t, timer); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -267,10 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) rwlock_init(&policy->lock); refcount_set(&policy->refcnt, 1); skb_queue_head_init(&policy->polq.hold_queue); - setup_timer(&policy->timer, xfrm_policy_timer, - (unsigned long)policy); - setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, - (unsigned long)policy); + timer_setup(&policy->timer, xfrm_policy_timer, 0); + timer_setup(&policy->polq.hold_timer, + xfrm_policy_queue_process, 0); } return policy; } @@ -1852,12 +1851,12 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return xdst; } -static void xfrm_policy_queue_process(unsigned long arg) +static void xfrm_policy_queue_process(struct timer_list *t) { struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; - struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer); struct net *net = xp_net(pol); struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; From eb8c6b5b4402581a805a5d8a736a7058c4c5abb7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:57 -0700 Subject: [PATCH 1089/2177] ethernet/broadcom: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() helper to pass the timer pointer explicitly. Cc: Florian Fainelli Cc: bcm-kernel-feedback-list@broadcom.com Cc: "David S. Miller" Cc: Arnd Bergmann Cc: Jarod Wilson Cc: netdev@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 24 ++++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index f6bc13fe8a99..d9346e2ac720 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -295,16 +295,13 @@ static int bcm_enet_refill_rx(struct net_device *dev) /* * timer callback to defer refill rx queue in case we're OOM */ -static void bcm_enet_refill_rx_timer(unsigned long data) +static void bcm_enet_refill_rx_timer(struct timer_list *t) { - struct net_device *dev; - struct bcm_enet_priv *priv; - - dev = (struct net_device *)data; - priv = netdev_priv(dev); + struct bcm_enet_priv *priv = from_timer(priv, t, rx_timeout); + struct net_device *dev = priv->net_dev; spin_lock(&priv->rx_lock); - bcm_enet_refill_rx((struct net_device *)data); + bcm_enet_refill_rx(dev); spin_unlock(&priv->rx_lock); } @@ -1860,8 +1857,7 @@ static int bcm_enet_probe(struct platform_device *pdev) spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - setup_timer(&priv->rx_timeout, bcm_enet_refill_rx_timer, - (unsigned long)dev); + timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0); /* init the mib update lock&work */ mutex_init(&priv->mib_update_lock); @@ -2015,9 +2011,9 @@ static inline int bcm_enet_port_is_rgmii(int portid) /* * enet sw PHY polling */ -static void swphy_poll_timer(unsigned long data) +static void swphy_poll_timer(struct timer_list *t) { - struct bcm_enet_priv *priv = (struct bcm_enet_priv *)data; + struct bcm_enet_priv *priv = from_timer(priv, t, swphy_poll); unsigned int i; for (i = 0; i < priv->num_ports; i++) { @@ -2326,7 +2322,7 @@ static int bcm_enetsw_open(struct net_device *dev) } /* start phy polling timer */ - setup_timer(&priv->swphy_poll, swphy_poll_timer, (unsigned long)priv); + timer_setup(&priv->swphy_poll, swphy_poll_timer, 0); mod_timer(&priv->swphy_poll, jiffies); return 0; @@ -2743,9 +2739,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - init_timer(&priv->rx_timeout); - priv->rx_timeout.function = bcm_enet_refill_rx_timer; - priv->rx_timeout.data = (unsigned long)dev; + timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0); /* register netdevice */ dev->netdev_ops = &bcm_enetsw_ops; From 41fce7034bf39721a30d7e6f2cb479808d7aea78 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:58 -0700 Subject: [PATCH 1090/2177] net: tulip: de2104x: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: "yuval.shaia@oracle.com" Cc: Tobias Klauser Cc: Jarod Wilson Cc: Philippe Reynes Cc: netdev@vger.kernel.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index c87b8cc42963..13430f75496c 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -333,8 +333,8 @@ static void de_set_rx_mode (struct net_device *dev); static void de_tx (struct de_private *de); static void de_clean_rings (struct de_private *de); static void de_media_interrupt (struct de_private *de, u32 status); -static void de21040_media_timer (unsigned long data); -static void de21041_media_timer (unsigned long data); +static void de21040_media_timer (struct timer_list *t); +static void de21041_media_timer (struct timer_list *t); static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media); @@ -959,9 +959,9 @@ static void de_next_media (struct de_private *de, const u32 *media, } } -static void de21040_media_timer (unsigned long data) +static void de21040_media_timer (struct timer_list *t) { - struct de_private *de = (struct de_private *) data; + struct de_private *de = from_timer(de, t, media_timer); struct net_device *dev = de->dev; u32 status = dr32(SIAStatus); unsigned int carrier; @@ -1040,9 +1040,9 @@ static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media) return 1; } -static void de21041_media_timer (unsigned long data) +static void de21041_media_timer (struct timer_list *t) { - struct de_private *de = (struct de_private *) data; + struct de_private *de = from_timer(de, t, media_timer); struct net_device *dev = de->dev; u32 status = dr32(SIAStatus); unsigned int carrier; @@ -1999,12 +1999,9 @@ static int de_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) de->msg_enable = (debug < 0 ? DE_DEF_MSG_ENABLE : debug); de->board_idx = board_idx; spin_lock_init (&de->lock); - init_timer(&de->media_timer); - if (de->de21040) - de->media_timer.function = de21040_media_timer; - else - de->media_timer.function = de21041_media_timer; - de->media_timer.data = (unsigned long) de; + timer_setup(&de->media_timer, + de->de21040 ? de21040_media_timer : de21041_media_timer, + 0); netif_carrier_off(dev); From 80c5a20b537996f848c61ddb8a6734c66b65abfd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:28:59 -0700 Subject: [PATCH 1091/2177] pcmcia/electra_cf: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Michael Ellerman Cc: linux-pcmcia@lists.infradead.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/pcmcia/electra_cf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c index c6fe2a4a7a6a..9671ded549f0 100644 --- a/drivers/pcmcia/electra_cf.c +++ b/drivers/pcmcia/electra_cf.c @@ -79,9 +79,9 @@ static int electra_cf_ss_init(struct pcmcia_socket *s) } /* the timer is primarily to kick this socket's pccardd */ -static void electra_cf_timer(unsigned long _cf) +static void electra_cf_timer(struct timer_list *t) { - struct electra_cf_socket *cf = (void *) _cf; + struct electra_cf_socket *cf = from_timer(cf, t, timer); int present = electra_cf_present(cf); if (present != cf->present) { @@ -95,7 +95,9 @@ static void electra_cf_timer(unsigned long _cf) static irqreturn_t electra_cf_irq(int irq, void *_cf) { - electra_cf_timer((unsigned long)_cf); + struct electra_cf_socket *cf = _cf; + + electra_cf_timer(&cf->timer); return IRQ_HANDLED; } @@ -206,7 +208,7 @@ static int electra_cf_probe(struct platform_device *ofdev) if (!cf) return -ENOMEM; - setup_timer(&cf->timer, electra_cf_timer, (unsigned long)cf); + timer_setup(&cf->timer, electra_cf_timer, 0); cf->irq = 0; cf->ofdev = ofdev; @@ -305,7 +307,7 @@ static int electra_cf_probe(struct platform_device *ofdev) cf->mem_phys, io.start, cf->irq); cf->active = 1; - electra_cf_timer((unsigned long)cf); + electra_cf_timer(&cf->timer); return 0; fail3: From abec4be3ee68c8572adb39da68fbfd86e63daa84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:00 -0700 Subject: [PATCH 1092/2177] net: ethernet: stmmac: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/altr_tse_pcs.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index 6a9c954492f2..8b50afcdb52d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -118,10 +118,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs) return ret; } -static void pcs_link_timer_callback(unsigned long data) +static void pcs_link_timer_callback(struct tse_pcs *pcs) { u16 val = 0; - struct tse_pcs *pcs = (struct tse_pcs *)data; void __iomem *tse_pcs_base = pcs->tse_pcs_base; void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; @@ -138,12 +137,11 @@ static void pcs_link_timer_callback(unsigned long data) } } -static void auto_nego_timer_callback(unsigned long data) +static void auto_nego_timer_callback(struct tse_pcs *pcs) { u16 val = 0; u16 speed = 0; u16 duplex = 0; - struct tse_pcs *pcs = (struct tse_pcs *)data; void __iomem *tse_pcs_base = pcs->tse_pcs_base; void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; @@ -201,14 +199,14 @@ static void auto_nego_timer_callback(unsigned long data) } } -static void aneg_link_timer_callback(unsigned long data) +static void aneg_link_timer_callback(struct timer_list *t) { - struct tse_pcs *pcs = (struct tse_pcs *)data; + struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer); if (pcs->autoneg == AUTONEG_ENABLE) - auto_nego_timer_callback(data); + auto_nego_timer_callback(pcs); else if (pcs->autoneg == AUTONEG_DISABLE) - pcs_link_timer_callback(data); + pcs_link_timer_callback(pcs); } void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, @@ -237,8 +235,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, tse_pcs_reset(tse_pcs_base, pcs); - setup_timer(&pcs->aneg_link_timer, - aneg_link_timer_callback, (unsigned long)pcs); + timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, + 0); mod_timer(&pcs->aneg_link_timer, jiffies + msecs_to_jiffies(AUTONEGO_LINK_TIMER)); } else if (phy_dev->autoneg == AUTONEG_DISABLE) { @@ -270,8 +268,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, tse_pcs_reset(tse_pcs_base, pcs); - setup_timer(&pcs->aneg_link_timer, - aneg_link_timer_callback, (unsigned long)pcs); + timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, + 0); mod_timer(&pcs->aneg_link_timer, jiffies + msecs_to_jiffies(AUTONEGO_LINK_TIMER)); } From d3e99b2d19b06030593761c658b648fd6427eaa1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:01 -0700 Subject: [PATCH 1093/2177] net/cw1200: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Solomon Peachy Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wireless/st/cw1200/pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index d2202ae92bdd..ded23df1ac1d 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c @@ -91,7 +91,7 @@ struct cw1200_suspend_state { u8 prev_ps_mode; }; -static void cw1200_pm_stay_awake_tmo(unsigned long arg) +static void cw1200_pm_stay_awake_tmo(struct timer_list *unused) { /* XXX what's the point of this ? */ } @@ -101,8 +101,7 @@ int cw1200_pm_init(struct cw1200_pm_state *pm, { spin_lock_init(&pm->lock); - setup_timer(&pm->stay_awake, cw1200_pm_stay_awake_tmo, - (unsigned long)pm); + timer_setup(&pm->stay_awake, cw1200_pm_stay_awake_tmo, 0); return 0; } From 5a3a8962035277210098a6b498680769a8eb36b6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:02 -0700 Subject: [PATCH 1094/2177] net: vxge: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jon Mason Cc: "David S. Miller" Cc: Miroslav Lichvar Cc: Jarod Wilson Cc: Eric Dumazet Cc: stephen hemminger Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-main.c | 12 ++++++------ drivers/net/ethernet/neterion/vxge/vxge-main.h | 8 +++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 50ea69d88480..5d5b9855e24e 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2597,9 +2597,9 @@ INTA_MODE: return VXGE_HW_OK; } -static void vxge_poll_vp_reset(unsigned long data) +static void vxge_poll_vp_reset(struct timer_list *t) { - struct vxgedev *vdev = (struct vxgedev *)data; + struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer); int i, j = 0; for (i = 0; i < vdev->no_of_vpath; i++) { @@ -2616,9 +2616,9 @@ static void vxge_poll_vp_reset(unsigned long data) mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2); } -static void vxge_poll_vp_lockup(unsigned long data) +static void vxge_poll_vp_lockup(struct timer_list *t) { - struct vxgedev *vdev = (struct vxgedev *)data; + struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer); enum vxge_hw_status status = VXGE_HW_OK; struct vxge_vpath *vpath; struct vxge_ring *ring; @@ -2858,12 +2858,12 @@ static int vxge_open(struct net_device *dev) vdev->config.rx_pause_enable); if (vdev->vp_reset_timer.function == NULL) - vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev, + vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, HZ / 2); /* There is no need to check for RxD leak and RxD lookup on Titan1A */ if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL) - vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev, + vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, HZ / 2); set_bit(__VXGE_STATE_CARD_UP, &vdev->state); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 3a79d93b8445..59a57ff5e96a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -417,12 +417,10 @@ struct vxge_tx_priv { module_param(p, int, 0) static inline -void vxge_os_timer(struct timer_list *timer, void (*func)(unsigned long data), - struct vxgedev *vdev, unsigned long timeout) +void vxge_os_timer(struct timer_list *timer, void (*func)(struct timer_list *), + unsigned long timeout) { - init_timer(timer); - timer->function = func; - timer->data = (unsigned long)vdev; + timer_setup(timer, func, 0); mod_timer(timer, jiffies + timeout); } From 847f03ee16cd4dbe1f15fe09989f52300e43ca10 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:03 -0700 Subject: [PATCH 1095/2177] drivers/atm/suni: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Passes NULL timer when doing non- timer call. Cc: Chas Williams <3chas3@gmail.com> Cc: linux-atm-general@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/atm/suni.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index b0363149b2fd..b8825f2d79e0 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(sunis_lock); if (atomic_read(&stats->s) < 0) atomic_set(&stats->s,INT_MAX); -static void suni_hz(unsigned long from_timer) +static void suni_hz(struct timer_list *timer) { struct suni_priv *walk; struct atm_dev *dev; @@ -85,7 +85,7 @@ static void suni_hz(unsigned long from_timer) ((GET(TACP_TCC) & 0xff) << 8) | ((GET(TACP_TCCM) & 7) << 16)); } - if (from_timer) mod_timer(&poll_timer,jiffies+HZ); + if (timer) mod_timer(&poll_timer,jiffies+HZ); } @@ -322,13 +322,11 @@ static int suni_start(struct atm_dev *dev) printk(KERN_WARNING "%s(itf %d): no signal\n",dev->type, dev->number); PRIV(dev)->loop_mode = ATM_LM_NONE; - suni_hz(0); /* clear SUNI counters */ + suni_hz(NULL); /* clear SUNI counters */ (void) fetch_stats(dev,NULL,1); /* clear kernel counters */ if (first) { - init_timer(&poll_timer); + timer_setup(&poll_timer, suni_hz, 0); poll_timer.expires = jiffies+HZ; - poll_timer.function = suni_hz; - poll_timer.data = 1; #if 0 printk(KERN_DEBUG "[u] p=0x%lx,n=0x%lx\n",(unsigned long) poll_timer.list.prev, (unsigned long) poll_timer.list.next); From 3d2ceaa63b81f41cd95e8954555130cc801c4452 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:04 -0700 Subject: [PATCH 1096/2177] atm: idt77252: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. This required adding a pointer back to vc_map, and adjusting the locking around removal a bit. Cc: Chas Williams <3chas3@gmail.com> Cc: linux-atm-general@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/atm/idt77252.c | 21 ++++++++++++--------- drivers/atm/idt77252.h | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 47f3c4ae0594..0e3b9c44c808 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2073,21 +2073,19 @@ idt77252_rate_logindex(struct idt77252_dev *card, int pcr) } static void -idt77252_est_timer(unsigned long data) +idt77252_est_timer(struct timer_list *t) { - struct vc_map *vc = (struct vc_map *)data; + struct rate_estimator *est = from_timer(est, t, timer); + struct vc_map *vc = est->vc; struct idt77252_dev *card = vc->card; - struct rate_estimator *est; unsigned long flags; u32 rate, cps; u64 ncells; u8 lacr; spin_lock_irqsave(&vc->lock, flags); - est = vc->estimator; - if (!est) + if (!vc->estimator) goto out; - ncells = est->cells; rate = ((u32)(ncells - est->last_cells)) << (7 - est->interval); @@ -2126,10 +2124,11 @@ idt77252_init_est(struct vc_map *vc, int pcr) est->maxcps = pcr < 0 ? -pcr : pcr; est->cps = est->maxcps; est->avcps = est->cps << 5; + est->vc = vc; est->interval = 2; /* XXX: make this configurable */ est->ewma_log = 2; /* XXX: make this configurable */ - setup_timer(&est->timer, idt77252_est_timer, (unsigned long)vc); + timer_setup(&est->timer, idt77252_est_timer, 0); mod_timer(&est->timer, jiffies + ((HZ / 4) << est->interval)); return est; @@ -2209,16 +2208,20 @@ static int idt77252_init_ubr(struct idt77252_dev *card, struct vc_map *vc, struct atm_vcc *vcc, struct atm_qos *qos) { + struct rate_estimator *est = NULL; unsigned long flags; int tcr; spin_lock_irqsave(&vc->lock, flags); if (vc->estimator) { - del_timer(&vc->estimator->timer); - kfree(vc->estimator); + est = vc->estimator; vc->estimator = NULL; } spin_unlock_irqrestore(&vc->lock, flags); + if (est) { + del_timer_sync(&est->timer); + kfree(est); + } tcr = atm_pcr_goal(&qos->txtp); if (tcr == 0) diff --git a/drivers/atm/idt77252.h b/drivers/atm/idt77252.h index 3a82cc23a053..9339197d701c 100644 --- a/drivers/atm/idt77252.h +++ b/drivers/atm/idt77252.h @@ -184,6 +184,8 @@ struct aal1 { unsigned char sequence; }; +struct vc_map; + struct rate_estimator { struct timer_list timer; unsigned int interval; @@ -193,6 +195,7 @@ struct rate_estimator { long avcps; u32 cps; u32 maxcps; + struct vc_map *vc; }; struct vc_map { From a8c22a2bbc67d001479696c5696ae11c84116701 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:05 -0700 Subject: [PATCH 1097/2177] net: tulip: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: David Howells Cc: Jarod Wilson Cc: Stephen Hemminger Cc: Johannes Berg Cc: Eric Dumazet Cc: Philippe Reynes Cc: "yuval.shaia@oracle.com" Cc: netdev@vger.kernel.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 12 ++++++------ drivers/net/ethernet/dec/tulip/dmfe.c | 10 +++++----- drivers/net/ethernet/dec/tulip/interrupt.c | 6 +++--- drivers/net/ethernet/dec/tulip/pnic.c | 6 +++--- drivers/net/ethernet/dec/tulip/pnic2.c | 6 +++--- drivers/net/ethernet/dec/tulip/timer.c | 12 ++++++------ drivers/net/ethernet/dec/tulip/tulip.h | 12 ++++++------ drivers/net/ethernet/dec/tulip/tulip_core.c | 14 ++++++-------- drivers/net/ethernet/dec/tulip/uli526x.c | 10 +++++----- drivers/net/ethernet/dec/tulip/winbond-840.c | 10 +++++----- 10 files changed, 48 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 299812e92db7..a31b4df3e7ff 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -912,7 +912,7 @@ static int de4x5_init(struct net_device *dev); static int de4x5_sw_reset(struct net_device *dev); static int de4x5_rx(struct net_device *dev); static int de4x5_tx(struct net_device *dev); -static void de4x5_ast(struct net_device *dev); +static void de4x5_ast(struct timer_list *t); static int de4x5_txur(struct net_device *dev); static int de4x5_rx_ovfc(struct net_device *dev); @@ -1147,8 +1147,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) lp->timeout = -1; lp->gendev = gendev; spin_lock_init(&lp->lock); - setup_timer(&lp->timer, (void (*)(unsigned long))de4x5_ast, - (unsigned long)dev); + timer_setup(&lp->timer, de4x5_ast, 0); de4x5_parse_params(dev); /* @@ -1741,9 +1740,10 @@ de4x5_tx(struct net_device *dev) } static void -de4x5_ast(struct net_device *dev) +de4x5_ast(struct timer_list *t) { - struct de4x5_private *lp = netdev_priv(dev); + struct de4x5_private *lp = from_timer(lp, t, timer); + struct net_device *dev = dev_get_drvdata(lp->gendev); int next_tick = DE4X5_AUTOSENSE_MS; int dt; @@ -2369,7 +2369,7 @@ autoconf_media(struct net_device *dev) lp->media = INIT; lp->tcount = 0; - de4x5_ast(dev); + de4x5_ast(&lp->timer); return lp->media; } diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 6585f737d08b..17ef7a28873d 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -331,7 +331,7 @@ static void dmfe_phy_write_1bit(void __iomem *, u32); static u16 dmfe_phy_read_1bit(void __iomem *); static u8 dmfe_sense_speed(struct dmfe_board_info *); static void dmfe_process_mode(struct dmfe_board_info *); -static void dmfe_timer(unsigned long); +static void dmfe_timer(struct timer_list *); static inline u32 cal_CRC(unsigned char *, unsigned int, u8); static void dmfe_rx_packet(struct net_device *, struct dmfe_board_info *); static void dmfe_free_tx_pkt(struct net_device *, struct dmfe_board_info *); @@ -596,7 +596,7 @@ static int dmfe_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - setup_timer(&db->timer, dmfe_timer, (unsigned long)dev); + timer_setup(&db->timer, dmfe_timer, 0); db->timer.expires = DMFE_TIMER_WUT + HZ * 2; add_timer(&db->timer); @@ -1128,10 +1128,10 @@ static const struct ethtool_ops netdev_ethtool_ops = { * Dynamic media sense, allocate Rx buffer... */ -static void dmfe_timer(unsigned long data) +static void dmfe_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dmfe_board_info *db = netdev_priv(dev); + struct dmfe_board_info *db = from_timer(db, t, timer); + struct net_device *dev = pci_get_drvdata(db->pdev); void __iomem *ioaddr = db->ioaddr; u32 tmp_cr8; unsigned char tmp_cr12; diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c index 8df80880ecaa..c1ca0765d56d 100644 --- a/drivers/net/ethernet/dec/tulip/interrupt.c +++ b/drivers/net/ethernet/dec/tulip/interrupt.c @@ -102,10 +102,10 @@ int tulip_refill_rx(struct net_device *dev) #ifdef CONFIG_TULIP_NAPI -void oom_timer(unsigned long data) +void oom_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, oom_timer); + napi_schedule(&tp->napi); } diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c index 7bcccf5cac7a..3fb39e32e1b4 100644 --- a/drivers/net/ethernet/dec/tulip/pnic.c +++ b/drivers/net/ethernet/dec/tulip/pnic.c @@ -84,10 +84,10 @@ void pnic_lnk_change(struct net_device *dev, int csr5) } } -void pnic_timer(unsigned long data) +void pnic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c index 5895fc43f6e0..412adaa7fdf8 100644 --- a/drivers/net/ethernet/dec/tulip/pnic2.c +++ b/drivers/net/ethernet/dec/tulip/pnic2.c @@ -76,10 +76,10 @@ #include -void pnic2_timer(unsigned long data) +void pnic2_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/dec/tulip/timer.c b/drivers/net/ethernet/dec/tulip/timer.c index 523d9dde50a2..642e9dfc5451 100644 --- a/drivers/net/ethernet/dec/tulip/timer.c +++ b/drivers/net/ethernet/dec/tulip/timer.c @@ -137,10 +137,10 @@ void tulip_media_task(struct work_struct *work) } -void mxic_timer(unsigned long data) +void mxic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; @@ -154,10 +154,10 @@ void mxic_timer(unsigned long data) } -void comet_timer(unsigned long data) +void comet_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; int next_tick = 2*HZ; if (tulip_debug > 1) diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h index 06660dbc44b7..b458140aeaef 100644 --- a/drivers/net/ethernet/dec/tulip/tulip.h +++ b/drivers/net/ethernet/dec/tulip/tulip.h @@ -43,7 +43,7 @@ struct tulip_chip_table { int io_size; int valid_intrs; /* CSR7 interrupt enable settings */ int flags; - void (*media_timer) (unsigned long); + void (*media_timer) (struct timer_list *); work_func_t media_task; }; @@ -476,7 +476,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5); /* PNIC2.c */ void pnic2_lnk_change(struct net_device *dev, int csr5); -void pnic2_timer(unsigned long data); +void pnic2_timer(struct timer_list *t); void pnic2_start_nway(struct net_device *dev); void pnic2_lnk_change(struct net_device *dev, int csr5); @@ -504,19 +504,19 @@ void tulip_find_mii (struct net_device *dev, int board_idx); /* pnic.c */ void pnic_do_nway(struct net_device *dev); void pnic_lnk_change(struct net_device *dev, int csr5); -void pnic_timer(unsigned long data); +void pnic_timer(struct timer_list *t); /* timer.c */ void tulip_media_task(struct work_struct *work); -void mxic_timer(unsigned long data); -void comet_timer(unsigned long data); +void mxic_timer(struct timer_list *t); +void comet_timer(struct timer_list *t); /* tulip_core.c */ extern int tulip_debug; extern const char * const medianame[]; extern const char tulip_media_cap[]; extern const struct tulip_chip_table tulip_tbl[]; -void oom_timer(unsigned long data); +void oom_timer(struct timer_list *t); extern u8 t21040_csr13[]; static inline void tulip_start_rxtx(struct tulip_private *tp) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 851b6d1f5a42..00d02a0967d0 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -123,10 +123,10 @@ int tulip_debug = TULIP_DEBUG; int tulip_debug = 1; #endif -static void tulip_timer(unsigned long data) +static void tulip_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; if (netif_running(dev)) schedule_work(&tp->media_work); @@ -505,7 +505,7 @@ media_picked: tp->timer.expires = RUN_AT(next_tick); add_timer(&tp->timer); #ifdef CONFIG_TULIP_NAPI - setup_timer(&tp->oom_timer, oom_timer, (unsigned long)dev); + timer_setup(&tp->oom_timer, oom_timer, 0); #endif } @@ -780,8 +780,7 @@ static void tulip_down (struct net_device *dev) spin_unlock_irqrestore (&tp->lock, flags); - setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer, - (unsigned long)dev); + timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0); dev->if_port = tp->saved_if_port; @@ -1470,8 +1469,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->csr0 = csr0; spin_lock_init(&tp->lock); spin_lock_init(&tp->mii_lock); - setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer, - (unsigned long)dev); + timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0); INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task); diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 5fbbc0caba99..488a744084c9 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -241,7 +241,7 @@ static void phy_write_1bit(struct uli526x_board_info *db, u32); static u16 phy_read_1bit(struct uli526x_board_info *db); static u8 uli526x_sense_speed(struct uli526x_board_info *); static void uli526x_process_mode(struct uli526x_board_info *); -static void uli526x_timer(unsigned long); +static void uli526x_timer(struct timer_list *t); static void uli526x_rx_packet(struct net_device *, struct uli526x_board_info *); static void uli526x_free_tx_pkt(struct net_device *, struct uli526x_board_info *); static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *); @@ -491,7 +491,7 @@ static int uli526x_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - setup_timer(&db->timer, uli526x_timer, (unsigned long)dev); + timer_setup(&db->timer, uli526x_timer, 0); db->timer.expires = ULI526X_TIMER_WUT + HZ * 2; add_timer(&db->timer); @@ -1021,10 +1021,10 @@ static const struct ethtool_ops netdev_ethtool_ops = { * Dynamic media sense, allocate Rx buffer... */ -static void uli526x_timer(unsigned long data) +static void uli526x_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct uli526x_board_info *db = netdev_priv(dev); + struct uli526x_board_info *db = from_timer(db, t, timer); + struct net_device *dev = pci_get_drvdata(db->pdev); struct uli_phy_ops *phy = &db->phy; void __iomem *ioaddr = db->ioaddr; unsigned long flags; diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 6f88d687b6d2..70cb2d689c2c 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -327,7 +327,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int netdev_open(struct net_device *dev); static int update_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void init_rxtx_rings(struct net_device *dev); static void free_rxtx_rings(struct netdev_private *np); static void init_registers(struct net_device *dev); @@ -655,7 +655,7 @@ static int netdev_open(struct net_device *dev) netdev_dbg(dev, "Done netdev_open()\n"); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = jiffies + 1*HZ; add_timer(&np->timer); return 0; @@ -772,10 +772,10 @@ static inline void update_csr6(struct net_device *dev, int new) np->mii_if.full_duplex = 1; } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = pci_get_drvdata(np->pci_dev); void __iomem *ioaddr = np->base_addr; if (debug > 2) From 1fccb565e8b09e54467d41111f6faf08fcc9c3c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:06 -0700 Subject: [PATCH 1098/2177] net: can: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Oliver Hartkopp Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/can/af_can.c | 4 ++-- net/can/af_can.h | 2 +- net/can/proc.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 88edac0f3e36..1f75e11ac35a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -882,8 +882,8 @@ static int can_pernet_init(struct net *net) if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ if (stats_timer) { - setup_timer(&net->can.can_stattimer, can_stat_update, - (unsigned long)net); + timer_setup(&net->can.can_stattimer, can_stat_update, + 0); mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); } diff --git a/net/can/af_can.h b/net/can/af_can.h index d0ef45bb2a72..eca6463c6213 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -113,6 +113,6 @@ struct s_pstats { /* function prototypes for the CAN networklayer procfs (proc.c) */ void can_init_proc(struct net *net); void can_remove_proc(struct net *net); -void can_stat_update(unsigned long data); +void can_stat_update(struct timer_list *t); #endif /* AF_CAN_H */ diff --git a/net/can/proc.c b/net/can/proc.c index 83045f00c63c..d979b3dc49a6 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, return rate; } -void can_stat_update(unsigned long data) +void can_stat_update(struct timer_list *t) { - struct net *net = (struct net *)data; + struct net *net = from_timer(net, t, can.can_stattimer); struct s_stats *can_stats = net->can.can_stats; unsigned long j = jiffies; /* snapshot */ @@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", can_stats->total_rx_match_ratio); @@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v) user_reset = 1; - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", can_pstats->stats_reset + 1); } else { From 7974c0f3622b21825160a37d28b7b1f35ee4cbac Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:07 -0700 Subject: [PATCH 1099/2177] drivers/net/3com: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Steffen Klassert Cc: "David S. Miller" Cc: Jarod Wilson Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c574_cs.c | 12 +++++------- drivers/net/ethernet/3com/3c589_cs.c | 10 +++++----- drivers/net/ethernet/3com/3c59x.c | 20 ++++++++++---------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 47c844cc9d27..48bc7fa0258c 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -225,7 +225,7 @@ static unsigned short read_eeprom(unsigned int ioaddr, int index); static void tc574_wait_for_completion(struct net_device *dev, int cmd); static void tc574_reset(struct net_device *dev); -static void media_check(unsigned long arg); +static void media_check(struct timer_list *t); static int el3_open(struct net_device *dev); static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -377,7 +377,7 @@ static int tc574_config(struct pcmcia_device *link) lp->autoselect = config & Autoselect ? 1 : 0; } - init_timer(&lp->media); + timer_setup(&lp->media, media_check, 0); { int phy; @@ -681,8 +681,6 @@ static int el3_open(struct net_device *dev) netif_start_queue(dev); tc574_reset(dev); - lp->media.function = media_check; - lp->media.data = (unsigned long) dev; lp->media.expires = jiffies + HZ; add_timer(&lp->media); @@ -859,10 +857,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) (and as a last resort, poll the NIC for events), and to monitor the MII, reporting changes in cable status. */ -static void media_check(unsigned long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct el3_private *lp = netdev_priv(dev); + struct el3_private *lp = from_timer(lp, t, media); + struct net_device *dev = lp->p_dev->priv; unsigned int ioaddr = dev->base_addr; unsigned long flags; unsigned short /* cable, */ media, partner; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index e28254a00599..2b2695311bda 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -163,7 +163,7 @@ static void tc589_release(struct pcmcia_device *link); static u16 read_eeprom(unsigned int ioaddr, int index); static void tc589_reset(struct net_device *dev); -static void media_check(unsigned long arg); +static void media_check(struct timer_list *t); static int el3_config(struct net_device *dev, struct ifmap *map); static int el3_open(struct net_device *dev); static netdev_tx_t el3_start_xmit(struct sk_buff *skb, @@ -517,7 +517,7 @@ static int el3_open(struct net_device *dev) netif_start_queue(dev); tc589_reset(dev); - setup_timer(&lp->media, media_check, (unsigned long)dev); + timer_setup(&lp->media, media_check, 0); mod_timer(&lp->media, jiffies + HZ); dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", @@ -676,10 +676,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) return IRQ_RETVAL(handled); } -static void media_check(unsigned long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *)(arg); - struct el3_private *lp = netdev_priv(dev); + struct el3_private *lp = from_timer(lp, t, media); + struct net_device *dev = lp->p_dev->priv; unsigned int ioaddr = dev->base_addr; u16 media, errs; unsigned long flags; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 402d9090ad29..f4e13a7014bd 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -759,8 +759,8 @@ static int vortex_open(struct net_device *dev); static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); -static void vortex_timer(unsigned long arg); -static void rx_oom_timer(unsigned long arg); +static void vortex_timer(struct timer_list *t); +static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1599,9 +1599,9 @@ vortex_up(struct net_device *dev) dev->name, media_tbl[dev->if_port].name); } - setup_timer(&vp->timer, vortex_timer, (unsigned long)dev); + timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - setup_timer(&vp->rx_oom_timer, rx_oom_timer, (unsigned long)dev); + timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1784,10 +1784,10 @@ out: } static void -vortex_timer(unsigned long data) +vortex_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct vortex_private *vp = netdev_priv(dev); + struct vortex_private *vp = from_timer(vp, t, timer); + struct net_device *dev = vp->mii.dev; void __iomem *ioaddr = vp->ioaddr; int next_tick = 60*HZ; int ok = 0; @@ -2687,10 +2687,10 @@ boomerang_rx(struct net_device *dev) * for some memory. Otherwise there is no way to restart the rx process. */ static void -rx_oom_timer(unsigned long arg) +rx_oom_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct vortex_private *vp = netdev_priv(dev); + struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); + struct net_device *dev = vp->mii.dev; spin_lock_irq(&vp->lock); if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ From cacd2b3fb9815ec29b778342a8aaa80edc0f98d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:08 -0700 Subject: [PATCH 1100/2177] chelsio: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Johannes Berg Cc: Eric Dumazet Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb/sge.c | 29 +++++++++++-------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 75e439918700..30de26ef3da4 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1882,10 +1882,10 @@ send: /* * Callback for the Tx buffer reclaim timer. Runs with softirqs disabled. */ -static void sge_tx_reclaim_cb(unsigned long data) +static void sge_tx_reclaim_cb(struct timer_list *t) { int i; - struct sge *sge = (struct sge *)data; + struct sge *sge = from_timer(sge, t, tx_reclaim_timer); for (i = 0; i < SGE_CMDQ_N; ++i) { struct cmdQ *q = &sge->cmdQ[i]; @@ -1978,10 +1978,10 @@ void t1_sge_start(struct sge *sge) /* * Callback for the T2 ESPI 'stuck packet feature' workaorund */ -static void espibug_workaround_t204(unsigned long data) +static void espibug_workaround_t204(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; + struct sge *sge = from_timer(sge, t, espibug_timer); + struct adapter *adapter = sge->adapter; unsigned int nports = adapter->params.nports; u32 seop[MAX_NPORTS]; @@ -2021,10 +2021,10 @@ static void espibug_workaround_t204(unsigned long data) mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); } -static void espibug_workaround(unsigned long data) +static void espibug_workaround(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; + struct sge *sge = from_timer(sge, t, espibug_timer); + struct adapter *adapter = sge->adapter; if (netif_running(adapter->port[0].dev)) { struct sk_buff *skb = sge->espibug_skb[0]; @@ -2075,18 +2075,15 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p) goto nomem_port; } - setup_timer(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, - (unsigned long)sge); + timer_setup(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, 0); if (is_T2(sge->adapter)) { - init_timer(&sge->espibug_timer); + timer_setup(&sge->espibug_timer, + adapter->params.nports > 1 ? espibug_workaround_t204 : espibug_workaround, + 0); - if (adapter->params.nports > 1) { + if (adapter->params.nports > 1) tx_sched_init(sge); - sge->espibug_timer.function = espibug_workaround_t204; - } else - sge->espibug_timer.function = espibug_workaround; - sge->espibug_timer.data = (unsigned long)sge->adapter; sge->espibug_timeout = 1; /* for T204, every 10ms */ From 495ad9864f17bb2f4e085f9398328d14bfdb4d88 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:09 -0700 Subject: [PATCH 1101/2177] net: amd8111e: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jarod Wilson Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/amd8111e.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 7f22af6e37e0..358f7ab77c70 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1669,9 +1669,9 @@ static int amd8111e_resume(struct pci_dev *pci_dev) return 0; } -static void amd8111e_config_ipg(struct net_device *dev) +static void amd8111e_config_ipg(struct timer_list *t) { - struct amd8111e_priv *lp = netdev_priv(dev); + struct amd8111e_priv *lp = from_timer(lp, t, ipg_data.ipg_timer); struct ipg_info *ipg_data = &lp->ipg_data; void __iomem *mmio = lp->mmio; unsigned int prev_col_cnt = ipg_data->col_cnt; @@ -1883,8 +1883,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize software ipg timer */ if(lp->options & OPTION_DYN_IPG_ENABLE){ - setup_timer(&lp->ipg_data.ipg_timer, - (void *)&amd8111e_config_ipg, (unsigned long)dev); + timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; lp->ipg_data.ipg = DEFAULT_IPG; From b09064b78f396ed0840895b11e8bd90e8380afd5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:10 -0700 Subject: [PATCH 1102/2177] bna: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Rasesh Mody Cc: Sudarsana Kalluru Cc: Dept-GELinuxNICDev@cavium.com Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 43 +++++++++++-------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 6e13c937d715..a843076597ec 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -1693,9 +1693,9 @@ err_return: /* Timer callbacks */ /* a) IOC timer */ static void -bnad_ioc_timeout(unsigned long data) +bnad_ioc_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.ioc_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1704,9 +1704,9 @@ bnad_ioc_timeout(unsigned long data) } static void -bnad_ioc_hb_check(unsigned long data) +bnad_ioc_hb_check(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.hb_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1715,9 +1715,9 @@ bnad_ioc_hb_check(unsigned long data) } static void -bnad_iocpf_timeout(unsigned long data) +bnad_iocpf_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.iocpf_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1726,9 +1726,9 @@ bnad_iocpf_timeout(unsigned long data) } static void -bnad_iocpf_sem_timeout(unsigned long data) +bnad_iocpf_sem_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.sem_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1748,9 +1748,9 @@ bnad_iocpf_sem_timeout(unsigned long data) /* b) Dynamic Interrupt Moderation Timer */ static void -bnad_dim_timeout(unsigned long data) +bnad_dim_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, dim_timer); struct bnad_rx_info *rx_info; struct bnad_rx_ctrl *rx_ctrl; int i, j; @@ -1781,9 +1781,9 @@ bnad_dim_timeout(unsigned long data) /* c) Statistics Timer */ static void -bnad_stats_timeout(unsigned long data) +bnad_stats_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, stats_timer); unsigned long flags; if (!netif_running(bnad->netdev) || @@ -1804,8 +1804,7 @@ bnad_dim_timer_start(struct bnad *bnad) { if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED && !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) { - setup_timer(&bnad->dim_timer, bnad_dim_timeout, - (unsigned long)bnad); + timer_setup(&bnad->dim_timer, bnad_dim_timeout, 0); set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); mod_timer(&bnad->dim_timer, jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); @@ -1823,8 +1822,7 @@ bnad_stats_timer_start(struct bnad *bnad) spin_lock_irqsave(&bnad->bna_lock, flags); if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) { - setup_timer(&bnad->stats_timer, bnad_stats_timeout, - (unsigned long)bnad); + timer_setup(&bnad->stats_timer, bnad_stats_timeout, 0); mod_timer(&bnad->stats_timer, jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); } @@ -3692,14 +3690,11 @@ bnad_pci_probe(struct pci_dev *pdev, goto res_free; /* Set up timers */ - setup_timer(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, - (unsigned long)bnad); + timer_setup(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, 0); + timer_setup(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, 0); + timer_setup(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, 0); + timer_setup(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, + 0); /* * Start the chip From 6fa35bd0e9e4e0d1ba3b6a6c7968b8c926317507 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:11 -0700 Subject: [PATCH 1103/2177] net: dl2k: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Jarod Wilson Cc: Tobias Klauser Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 778f974e2928..a2f6758d38dd 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -68,7 +68,7 @@ static const int max_intrloop = 50; static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); -static void rio_timer (unsigned long data); +static void rio_timer (struct timer_list *t); static void rio_tx_timeout (struct net_device *dev); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); @@ -644,7 +644,7 @@ static int rio_open(struct net_device *dev) return i; } - setup_timer(&np->timer, rio_timer, (unsigned long)dev); + timer_setup(&np->timer, rio_timer, 0); np->timer.expires = jiffies + 1 * HZ; add_timer(&np->timer); @@ -655,10 +655,10 @@ static int rio_open(struct net_device *dev) } static void -rio_timer (unsigned long data) +rio_timer (struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = pci_get_drvdata(np->pdev); unsigned int entry; int next_tick = 1*HZ; unsigned long flags; From 11dd894e4afa7995d8e4bd6008cbd79840c3a8bd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:12 -0700 Subject: [PATCH 1104/2177] net: ksz884x: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Johannes Berg Cc: Jarod Wilson Cc: Masahiro Yamada Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ksz884x.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e798fbe08600..52207508744c 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4338,11 +4338,11 @@ static void ksz_stop_timer(struct ksz_timer_info *info) } static void ksz_init_timer(struct ksz_timer_info *info, int period, - void (*function)(unsigned long), void *data) + void (*function)(struct timer_list *)) { info->max = 0; info->period = period; - setup_timer(&info->timer, function, (unsigned long)data); + timer_setup(&info->timer, function, 0); } static void ksz_update_timer(struct ksz_timer_info *info) @@ -6689,9 +6689,9 @@ static void mib_read_work(struct work_struct *work) } } -static void mib_monitor(unsigned long ptr) +static void mib_monitor(struct timer_list *t) { - struct dev_info *hw_priv = (struct dev_info *) ptr; + struct dev_info *hw_priv = from_timer(hw_priv, t, mib_timer_info.timer); mib_read_work(&hw_priv->mib_read); @@ -6716,10 +6716,10 @@ static void mib_monitor(unsigned long ptr) * * This routine is run in a kernel timer to monitor the network device. */ -static void dev_monitor(unsigned long ptr) +static void dev_monitor(struct timer_list *t) { - struct net_device *dev = (struct net_device *) ptr; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, monitor_timer_info.timer); + struct net_device *dev = priv->mii_if.dev; struct dev_info *hw_priv = priv->adapter; struct ksz_hw *hw = &hw_priv->hw; struct ksz_port *port = &priv->port; @@ -6789,7 +6789,7 @@ static int __init netdev_init(struct net_device *dev) /* 500 ms timeout */ ksz_init_timer(&priv->monitor_timer_info, 500 * HZ / 1000, - dev_monitor, dev); + dev_monitor); /* 500 ms timeout */ dev->watchdog_timeo = HZ / 2; @@ -7065,7 +7065,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) /* 500 ms timeout */ ksz_init_timer(&hw_priv->mib_timer_info, 500 * HZ / 1000, - mib_monitor, hw_priv); + mib_monitor); for (i = 0; i < hw->dev_count; i++) { dev = alloc_etherdev(sizeof(struct dev_priv)); From d99356797a8f3abaa57e13c5d1f50e4392eca037 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:13 -0700 Subject: [PATCH 1105/2177] forcedeth: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Zhu Yanjun Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index a235e8881af9..88128ce61471 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1884,10 +1884,9 @@ packet_dropped: } /* If rx bufs are exhausted called after 50ms to attempt to refresh */ -static void nv_do_rx_refill(unsigned long data) +static void nv_do_rx_refill(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, oom_kick); /* Just reschedule NAPI rx processing */ napi_schedule(&np->napi); @@ -4065,10 +4064,10 @@ static void nv_free_irq(struct net_device *dev) } } -static void nv_do_nic_poll(unsigned long data) +static void nv_do_nic_poll(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, nic_poll); + struct net_device *dev = np->dev; u8 __iomem *base = get_hwbase(dev); u32 mask = 0; unsigned long flags; @@ -4176,16 +4175,18 @@ static void nv_do_nic_poll(unsigned long data) #ifdef CONFIG_NET_POLL_CONTROLLER static void nv_poll_controller(struct net_device *dev) { - nv_do_nic_poll((unsigned long) dev); + struct fe_priv *np = netdev_priv(dev); + + nv_do_nic_poll(&np->nic_poll); } #endif -static void nv_do_stats_poll(unsigned long data) +static void nv_do_stats_poll(struct timer_list *t) __acquires(&netdev_priv(dev)->hwstats_lock) __releases(&netdev_priv(dev)->hwstats_lock) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, stats_poll); + struct net_device *dev = np->dev; /* If lock is currently taken, the stats are being refreshed * and hence fresh enough */ @@ -5631,10 +5632,9 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) u64_stats_init(&np->swstats_rx_syncp); u64_stats_init(&np->swstats_tx_syncp); - setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev); - setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev); - setup_deferrable_timer(&np->stats_poll, nv_do_stats_poll, - (unsigned long)dev); + timer_setup(&np->oom_kick, nv_do_rx_refill, 0); + timer_setup(&np->nic_poll, nv_do_nic_poll, 0); + timer_setup(&np->stats_poll, nv_do_stats_poll, TIMER_DEFERRABLE); err = pci_enable_device(pci_dev); if (err) From e313ac12eb13a9738116192848695da54c38b1ae Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:14 -0700 Subject: [PATCH 1106/2177] mISDN: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Karsten Keil Cc: Geliang Tang Cc: "David S. Miller" Cc: Masahiro Yamada Cc: Andrew Morton Cc: Anton Vasilyev Cc: Ingo Molnar Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/mISDNipac.c | 7 +++---- drivers/isdn/hardware/mISDN/w6692.c | 7 +++---- drivers/isdn/mISDN/dsp.h | 2 +- drivers/isdn/mISDN/dsp_core.c | 6 ++---- drivers/isdn/mISDN/dsp_tones.c | 6 ++---- drivers/isdn/mISDN/fsm.c | 7 +++---- drivers/isdn/mISDN/l1oip_core.c | 15 +++++++-------- drivers/isdn/mISDN/timerdev.c | 6 +++--- 8 files changed, 24 insertions(+), 32 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c index e240010b93fa..4d78f870435e 100644 --- a/drivers/isdn/hardware/mISDN/mISDNipac.c +++ b/drivers/isdn/hardware/mISDN/mISDNipac.c @@ -172,7 +172,6 @@ isac_fill_fifo(struct isac_hw *isac) pr_debug("%s: %s dbusytimer running\n", isac->name, __func__); del_timer(&isac->dch.timer); } - init_timer(&isac->dch.timer); isac->dch.timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000); add_timer(&isac->dch.timer); if (isac->dch.debug & DEBUG_HW_DFIFO) { @@ -727,8 +726,9 @@ isac_release(struct isac_hw *isac) } static void -dbusy_timer_handler(struct isac_hw *isac) +dbusy_timer_handler(struct timer_list *t) { + struct isac_hw *isac = from_timer(isac, t, dch.timer); int rbch, star; u_long flags; @@ -796,8 +796,7 @@ isac_init(struct isac_hw *isac) } isac->mon_tx = NULL; isac->mon_rx = NULL; - setup_timer(&isac->dch.timer, (void *)dbusy_timer_handler, - (long)isac); + timer_setup(&isac->dch.timer, dbusy_timer_handler, 0); isac->mocr = 0xaa; if (isac->type & IPAC_TYPE_ISACX) { /* Disable all IRQ */ diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index d80072fef434..536d5137f49d 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -311,7 +311,6 @@ W6692_fill_Dfifo(struct w6692_hw *card) pr_debug("%s: fill_Dfifo dbusytimer running\n", card->name); del_timer(&dch->timer); } - init_timer(&dch->timer); dch->timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000); add_timer(&dch->timer); if (debug & DEBUG_HW_DFIFO) { @@ -819,8 +818,9 @@ w6692_irq(int intno, void *dev_id) } static void -dbusy_timer_handler(struct dchannel *dch) +dbusy_timer_handler(struct timer_list *t) { + struct dchannel *dch = from_timer(dch, t, timer); struct w6692_hw *card = dch->hw; int rbch, star; u_long flags; @@ -852,8 +852,7 @@ static void initW6692(struct w6692_hw *card) { u8 val; - setup_timer(&card->dch.timer, (void *)dbusy_timer_handler, - (u_long)&card->dch); + timer_setup(&card->dch.timer, dbusy_timer_handler, 0); w6692_mode(&card->bc[0], ISDN_P_NONE); w6692_mode(&card->bc[1], ISDN_P_NONE); WriteW6692(card, W_D_CTL, 0x00); diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h index fc1733a08845..fa09d511a8ed 100644 --- a/drivers/isdn/mISDN/dsp.h +++ b/drivers/isdn/mISDN/dsp.h @@ -259,7 +259,7 @@ extern u8 *dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len, extern int dsp_tone(struct dsp *dsp, int tone); extern void dsp_tone_copy(struct dsp *dsp, u8 *data, int len); -extern void dsp_tone_timeout(void *arg); +extern void dsp_tone_timeout(struct timer_list *t); extern void dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len); extern void dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len); diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 880e9d367a39..cd036e87335a 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -1092,7 +1092,7 @@ dspcreate(struct channel_req *crq) ndsp->pcm_bank_tx = -1; ndsp->hfc_conf = -1; /* current conference number */ /* set tone timer */ - setup_timer(&ndsp->tone.tl, (void *)dsp_tone_timeout, (long)ndsp); + timer_setup(&ndsp->tone.tl, dsp_tone_timeout, 0); if (dtmfthreshold < 20 || dtmfthreshold > 500) dtmfthreshold = 200; @@ -1202,9 +1202,7 @@ static int __init dsp_init(void) } /* set sample timer */ - dsp_spl_tl.function = (void *)dsp_cmx_send; - dsp_spl_tl.data = 0; - init_timer(&dsp_spl_tl); + timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0); dsp_spl_tl.expires = jiffies + dsp_tics; dsp_spl_jiffies = dsp_spl_tl.expires; add_timer(&dsp_spl_tl); diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c index 057e0d6a369b..8389e2105cdc 100644 --- a/drivers/isdn/mISDN/dsp_tones.c +++ b/drivers/isdn/mISDN/dsp_tones.c @@ -457,9 +457,9 @@ dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len) * timer expires * *****************/ void -dsp_tone_timeout(void *arg) +dsp_tone_timeout(struct timer_list *t) { - struct dsp *dsp = arg; + struct dsp *dsp = from_timer(dsp, t, tone.tl); struct dsp_tone *tone = &dsp->tone; struct pattern *pat = (struct pattern *)tone->pattern; int index = tone->index; @@ -478,7 +478,6 @@ dsp_tone_timeout(void *arg) else dsp_tone_hw_message(dsp, pat->data[index], *(pat->siz[index])); /* set timer */ - init_timer(&tone->tl); tone->tl.expires = jiffies + (pat->seq[index] * HZ) / 8000; add_timer(&tone->tl); } @@ -541,7 +540,6 @@ dsp_tone(struct dsp *dsp, int tone) /* set timer */ if (timer_pending(&tonet->tl)) del_timer(&tonet->tl); - init_timer(&tonet->tl); tonet->tl.expires = jiffies + (pat->seq[0] * HZ) / 8000; add_timer(&tonet->tl); } else { diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c index 92e6570b1143..cabcb906e0b5 100644 --- a/drivers/isdn/mISDN/fsm.c +++ b/drivers/isdn/mISDN/fsm.c @@ -100,8 +100,9 @@ mISDN_FsmChangeState(struct FsmInst *fi, int newstate) EXPORT_SYMBOL(mISDN_FsmChangeState); static void -FsmExpireTimer(struct FsmTimer *ft) +FsmExpireTimer(struct timer_list *t) { + struct FsmTimer *ft = from_timer(ft, t, tl); #if FSM_TIMER_DEBUG if (ft->fi->debug) ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft); @@ -117,7 +118,7 @@ mISDN_FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft) if (ft->fi->debug) ft->fi->printdebug(ft->fi, "mISDN_FsmInitTimer %lx", (long) ft); #endif - setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft); + timer_setup(&ft->tl, FsmExpireTimer, 0); } EXPORT_SYMBOL(mISDN_FsmInitTimer); @@ -153,7 +154,6 @@ mISDN_FsmAddTimer(struct FsmTimer *ft, } return -1; } - init_timer(&ft->tl); ft->event = event; ft->arg = arg; ft->tl.expires = jiffies + (millisec * HZ) / 1000; @@ -175,7 +175,6 @@ mISDN_FsmRestartTimer(struct FsmTimer *ft, if (timer_pending(&ft->tl)) del_timer(&ft->tl); - init_timer(&ft->tl); ft->event = event; ft->arg = arg; ft->tl.expires = jiffies + (millisec * HZ) / 1000; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 6be2041248d3..b5d590e378ac 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -842,17 +842,18 @@ l1oip_send_bh(struct work_struct *work) * timer stuff */ static void -l1oip_keepalive(void *data) +l1oip_keepalive(struct timer_list *t) { - struct l1oip *hc = (struct l1oip *)data; + struct l1oip *hc = from_timer(hc, t, keep_tl); schedule_work(&hc->workq); } static void -l1oip_timeout(void *data) +l1oip_timeout(struct timer_list *t) { - struct l1oip *hc = (struct l1oip *)data; + struct l1oip *hc = from_timer(hc, t, + timeout_tl); struct dchannel *dch = hc->chan[hc->d_idx].dch; if (debug & DEBUG_L1OIP_MSG) @@ -1437,13 +1438,11 @@ init_card(struct l1oip *hc, int pri, int bundle) if (ret) return ret; - hc->keep_tl.function = (void *)l1oip_keepalive; - hc->keep_tl.data = (ulong)hc; - init_timer(&hc->keep_tl); + timer_setup(&hc->keep_tl, l1oip_keepalive, 0); hc->keep_tl.expires = jiffies + 2 * HZ; /* two seconds first time */ add_timer(&hc->keep_tl); - setup_timer(&hc->timeout_tl, (void *)l1oip_timeout, (ulong)hc); + timer_setup(&hc->timeout_tl, l1oip_timeout, 0); hc->timeout_on = 0; /* state that we have timer off */ return 0; diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index b1e135fc1fb5..c50a34340f67 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -162,9 +162,9 @@ mISDN_poll(struct file *filep, poll_table *wait) } static void -dev_expire_timer(unsigned long data) +dev_expire_timer(struct timer_list *t) { - struct mISDNtimer *timer = (void *)data; + struct mISDNtimer *timer = from_timer(timer, t, tl); u_long flags; spin_lock_irqsave(&timer->dev->lock, flags); @@ -189,7 +189,7 @@ misdn_add_timer(struct mISDNtimerdev *dev, int timeout) if (!timer) return -ENOMEM; timer->dev = dev; - setup_timer(&timer->tl, dev_expire_timer, (long)timer); + timer_setup(&timer->tl, dev_expire_timer, 0); spin_lock_irq(&dev->lock); id = timer->id = dev->next_id++; if (dev->next_id < 0) From c788dd2c64322f97e4fdc04ea3948c4209fbc8a7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:15 -0700 Subject: [PATCH 1107/2177] isdn/gigaset: Use kzalloc instead of open-coded field zeroing This replaces a kmalloc followed by a bunch of per-field zeroing with a single kzalloc call, reducing the lines of code. Cc: Paul Bolle Cc: Karsten Keil Cc: "David S. Miller" Cc: Johan Hovold Cc: gigaset307x-common@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/gigaset/bas-gigaset.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 2da3ff650e1d..33151f05e744 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2200,7 +2200,7 @@ static int gigaset_initcshw(struct cardstate *cs) { struct bas_cardstate *ucs; - cs->hw.bas = ucs = kmalloc(sizeof *ucs, GFP_KERNEL); + cs->hw.bas = ucs = kzalloc(sizeof(*ucs), GFP_KERNEL); if (!ucs) { pr_err("out of memory\n"); return -ENOMEM; @@ -2212,15 +2212,7 @@ static int gigaset_initcshw(struct cardstate *cs) return -ENOMEM; } - ucs->urb_cmd_in = NULL; - ucs->urb_cmd_out = NULL; - ucs->rcvbuf = NULL; - ucs->rcvbuf_size = 0; - spin_lock_init(&ucs->lock); - ucs->pending = 0; - - ucs->basstate = 0; setup_timer(&ucs->timer_ctrl, req_timeout, (unsigned long) cs); setup_timer(&ucs->timer_atrdy, atrdy_timeout, (unsigned long) cs); setup_timer(&ucs->timer_cmd_in, cmd_in_timeout, (unsigned long) cs); From 4cfea08e6251d1468b3a694e9543131a12be3ac9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:16 -0700 Subject: [PATCH 1108/2177] isdn/gigaset: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Paul Bolle Cc: Karsten Keil Cc: "David S. Miller" Cc: Johan Hovold Cc: gigaset307x-common@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/gigaset/bas-gigaset.c | 36 +++++++++++++++++------------- drivers/isdn/gigaset/common.c | 7 +++--- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 33151f05e744..c990c6bbffc2 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -433,10 +433,11 @@ static void check_pending(struct bas_cardstate *ucs) * argument: * controller state structure */ -static void cmd_in_timeout(unsigned long data) +static void cmd_in_timeout(struct timer_list *t) { - struct cardstate *cs = (struct cardstate *) data; - struct bas_cardstate *ucs = cs->hw.bas; + struct bas_cardstate *ucs = from_timer(ucs, t, timer_cmd_in); + struct urb *urb = ucs->urb_int_in; + struct cardstate *cs = urb->context; int rc; if (!ucs->rcvbuf_size) { @@ -639,10 +640,11 @@ static void int_in_work(struct work_struct *work) * argument: * controller state structure */ -static void int_in_resubmit(unsigned long data) +static void int_in_resubmit(struct timer_list *t) { - struct cardstate *cs = (struct cardstate *) data; - struct bas_cardstate *ucs = cs->hw.bas; + struct bas_cardstate *ucs = from_timer(ucs, t, timer_int_in); + struct urb *urb = ucs->urb_int_in; + struct cardstate *cs = urb->context; int rc; if (ucs->retry_int_in++ >= BAS_RETRY) { @@ -1441,10 +1443,11 @@ error: * argument: * controller state structure */ -static void req_timeout(unsigned long data) +static void req_timeout(struct timer_list *t) { - struct cardstate *cs = (struct cardstate *) data; - struct bas_cardstate *ucs = cs->hw.bas; + struct bas_cardstate *ucs = from_timer(ucs, t, timer_ctrl); + struct urb *urb = ucs->urb_int_in; + struct cardstate *cs = urb->context; int pending; unsigned long flags; @@ -1837,10 +1840,11 @@ static void write_command_callback(struct urb *urb) * argument: * controller state structure */ -static void atrdy_timeout(unsigned long data) +static void atrdy_timeout(struct timer_list *t) { - struct cardstate *cs = (struct cardstate *) data; - struct bas_cardstate *ucs = cs->hw.bas; + struct bas_cardstate *ucs = from_timer(ucs, t, timer_atrdy); + struct urb *urb = ucs->urb_int_in; + struct cardstate *cs = urb->context; dev_warn(cs->dev, "timeout waiting for HD_READY_SEND_ATDATA\n"); @@ -2213,10 +2217,10 @@ static int gigaset_initcshw(struct cardstate *cs) } spin_lock_init(&ucs->lock); - setup_timer(&ucs->timer_ctrl, req_timeout, (unsigned long) cs); - setup_timer(&ucs->timer_atrdy, atrdy_timeout, (unsigned long) cs); - setup_timer(&ucs->timer_cmd_in, cmd_in_timeout, (unsigned long) cs); - setup_timer(&ucs->timer_int_in, int_in_resubmit, (unsigned long) cs); + timer_setup(&ucs->timer_ctrl, req_timeout, 0); + timer_setup(&ucs->timer_atrdy, atrdy_timeout, 0); + timer_setup(&ucs->timer_cmd_in, cmd_in_timeout, 0); + timer_setup(&ucs->timer_int_in, int_in_resubmit, 0); init_waitqueue_head(&ucs->waitqueue); INIT_WORK(&ucs->int_in_wq, int_in_work); diff --git a/drivers/isdn/gigaset/common.c b/drivers/isdn/gigaset/common.c index 7c7814497e3e..15482c5de33c 100644 --- a/drivers/isdn/gigaset/common.c +++ b/drivers/isdn/gigaset/common.c @@ -153,9 +153,9 @@ static int test_timeout(struct at_state_t *at_state) return 1; } -static void timer_tick(unsigned long data) +static void timer_tick(struct timer_list *t) { - struct cardstate *cs = (struct cardstate *) data; + struct cardstate *cs = from_timer(cs, t, timer); unsigned long flags; unsigned channel; struct at_state_t *at_state; @@ -687,7 +687,7 @@ struct cardstate *gigaset_initcs(struct gigaset_driver *drv, int channels, cs->ignoreframes = ignoreframes; INIT_LIST_HEAD(&cs->temp_at_states); cs->running = 0; - init_timer(&cs->timer); /* clear next & prev */ + timer_setup(&cs->timer, timer_tick, 0); spin_lock_init(&cs->ev_lock); cs->ev_tail = 0; cs->ev_head = 0; @@ -768,7 +768,6 @@ struct cardstate *gigaset_initcs(struct gigaset_driver *drv, int channels, spin_lock_irqsave(&cs->lock, flags); cs->running = 1; spin_unlock_irqrestore(&cs->lock, flags); - setup_timer(&cs->timer, timer_tick, (unsigned long) cs); cs->timer.expires = jiffies + msecs_to_jiffies(GIG_TICK); add_timer(&cs->timer); From cdeabbb881343c1d554b83687a71f45280c592e0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:17 -0700 Subject: [PATCH 1109/2177] net: sched: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Add pointer back to Qdisc. Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 7 +++---- net/sched/sch_generic.c | 6 +++--- net/sched/sch_pie.c | 10 ++++++---- net/sched/sch_red.c | 10 ++++++---- net/sched/sch_sfq.c | 10 +++++----- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index f3be6667a253..6b29cef90bec 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -345,9 +345,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static void flow_perturbation(unsigned long arg) +static void flow_perturbation(struct timer_list *t) { - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = from_timer(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) @@ -505,8 +505,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, - (unsigned long)fnew); + timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); netif_keep_dst(qdisc_dev(tp->q)); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a0a198768aad..6ced7c89c141 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -288,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev) } EXPORT_SYMBOL(dev_trans_start); -static void dev_watchdog(unsigned long arg) +static void dev_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; + struct net_device *dev = from_timer(dev, t, watchdog_timer); netif_tx_lock(dev); if (!qdisc_tx_is_noop(dev)) { @@ -954,7 +954,7 @@ void dev_init_scheduler(struct net_device *dev) if (dev_ingress_queue(dev)) dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); + timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } static void shutdown_scheduler_queue(struct net_device *dev, diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 6c2791d6102d..776c694c77c7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -74,6 +74,7 @@ struct pie_sched_data { struct pie_vars vars; struct pie_stats stats; struct timer_list adapt_timer; + struct Qdisc *sch; }; static void pie_params_init(struct pie_params *params) @@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch) pie_vars_init(&q->vars); } -static void pie_timer(unsigned long arg) +static void pie_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct pie_sched_data *q = qdisc_priv(sch); + struct pie_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) pie_vars_init(&q->vars); sch->limit = q->params.limit; - setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, pie_timer, 0); if (opt) { int err = pie_change(sch, opt); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 93b9d70a9b28..fdfdb56aaae2 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -40,6 +40,7 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; struct timer_list adapt_timer; + struct Qdisc *sch; struct red_parms parms; struct red_vars vars; struct red_stats stats; @@ -221,10 +222,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static inline void red_adaptative_timer(unsigned long arg) +static inline void red_adaptative_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct red_sched_data *q = qdisc_priv(sch); + struct red_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -238,7 +239,8 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) struct red_sched_data *q = qdisc_priv(sch); q->qdisc = &noop_qdisc; - setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, red_adaptative_timer, 0); return red_change(sch, opt); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 123a53af2506..5a9c95149c5c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -145,6 +145,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; + struct Qdisc *sch; }; /* @@ -604,10 +605,10 @@ drop: qdisc_tree_reduce_backlog(sch, dropped, drop_len); } -static void sfq_perturbation(unsigned long arg) +static void sfq_perturbation(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct sfq_sched_data *q = qdisc_priv(sch); + struct sfq_sched_data *q = from_timer(q, t, perturb_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -722,8 +723,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; - setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, - (unsigned long)sch); + timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) From a92c5751b97cca55d8140ec0bf26a53c7e00bfa5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:18 -0700 Subject: [PATCH 1110/2177] netfilter: ipset: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. This introduces a pointer back to the struct ip_set, which is used instead of the struct timer_list .data field. Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: Stephen Hemminger Cc: simran singhal Cc: Muhammad Falak R Wani Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/netfilter/ipset/ip_set_bitmap_gen.h | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_ip.c | 2 ++ net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 ++ net/netfilter/ipset/ip_set_bitmap_port.c | 2 ++ net/netfilter/ipset/ip_set_hash_gen.h | 12 +++++++----- net/netfilter/ipset/ip_set_list_set.c | 12 +++++++----- 6 files changed, 25 insertions(+), 15 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 8ad2b52a0b32..5ca18f07683b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -37,11 +37,11 @@ #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) static void -mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct mtype *map = set->data; - setup_timer(&map->gc, gc, (unsigned long)set); + timer_setup(&map->gc, gc, 0); mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } @@ -272,10 +272,10 @@ out: } static void -mtype_gc(unsigned long ul_set) +mtype_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct mtype *map = set->data; + struct mtype *map = from_timer(map, t, gc); + struct ip_set *set = map->set; void *x; u32 id; diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 4783efff0bde..d8975a0b4282 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -48,6 +48,7 @@ struct bitmap_ip { size_t memsize; /* members size */ u8 netmask; /* subnet netmask */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* data extensions */ __aligned(__alignof__(u64)); }; @@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->netmask = netmask; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_IPV4; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 9a065f672d3a..4c279fbd2d5d 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -52,6 +52,7 @@ struct bitmap_ipmac { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; @@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->elements = elements; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_IPV4; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 7f0c733358a4..7f9bbd7c98b5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -40,6 +40,7 @@ struct bitmap_port { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* data extensions */ __aligned(__alignof__(u64)); }; @@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->last_port = last_port; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_UNSPEC; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 51063d9ed0f7..efffc8eabafe 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -280,6 +280,7 @@ htable_bits(u32 hashsize) struct htype { struct htable __rcu *table; /* the hash table */ struct timer_list gc; /* garbage collection when timeout enabled */ + struct ip_set *set; /* attached to this ip_set */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK @@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set) } static void -mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct htype *h = set->data; - setup_timer(&h->gc, gc, (unsigned long)set); + timer_setup(&h->gc, gc, 0); mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); pr_debug("gc initialized, run in every %u\n", IPSET_GC_PERIOD(set->timeout)); @@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h) } static void -mtype_gc(unsigned long ul_set) +mtype_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct htype *h = set->data; + struct htype *h = from_timer(h, t, gc); + struct ip_set *set = h->set; pr_debug("called\n"); spin_lock_bh(&set->lock); @@ -1314,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, t->htable_bits = hbits; RCU_INIT_POINTER(h->table, t); + h->set = set; set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b..c9b4e05ad940 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -44,6 +44,7 @@ struct set_adt_elem { struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ struct net *net; /* namespace */ struct list_head members; /* the set members */ }; @@ -571,10 +572,10 @@ static const struct ip_set_type_variant set_variant = { }; static void -list_set_gc(unsigned long ul_set) +list_set_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct list_set *map = set->data; + struct list_set *map = from_timer(map, t, gc); + struct ip_set *set = map->set; spin_lock_bh(&set->lock); set_cleanup_entries(set); @@ -585,11 +586,11 @@ list_set_gc(unsigned long ul_set) } static void -list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct list_set *map = set->data; - setup_timer(&map->gc, gc, (unsigned long)set); + timer_setup(&map->gc, gc, 0); mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } @@ -606,6 +607,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) map->size = size; map->net = net; + map->set = set; INIT_LIST_HEAD(&map->members); set->data = map; From 59f379f9046a9e0532ffd19b44e3c32fe79ec51b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:19 -0700 Subject: [PATCH 1111/2177] inet/connection_sock: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Gerrit Renker Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Cc: dccp@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 6 +++--- net/dccp/timer.c | 18 ++++++++++-------- net/ipv4/inet_connection_sock.c | 21 +++++++++------------ net/ipv4/tcp_timer.c | 18 +++++++++++------- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 13e4c89a8231..0358745ea059 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -169,9 +169,9 @@ enum inet_csk_ack_state_t { }; void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)); + void (*retransmit_handler)(struct timer_list *), + void (*delack_handler)(struct timer_list *), + void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3a2c34027758..1e35526bf436 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk) __sk_dst_reset(sk); } -static void dccp_write_timer(unsigned long data) +static void dccp_write_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_retransmit_timer); + struct sock *sk = &icsk->icsk_inet.sk; int event = 0; bh_lock_sock(sk); @@ -161,19 +162,20 @@ out: sock_put(sk); } -static void dccp_keepalive_timer(unsigned long data) +static void dccp_keepalive_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); pr_err("dccp should not use a keepalive timer !\n"); sock_put(sk); } /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ -static void dccp_delack_timer(unsigned long data) +static void dccp_delack_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_delack_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8a91ebbf0c01..5c965ecc96a0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -494,17 +494,15 @@ EXPORT_SYMBOL(inet_csk_accept); * to optimize. */ void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)) + void (*retransmit_handler)(struct timer_list *t), + void (*delack_handler)(struct timer_list *t), + void (*keepalive_handler)(struct timer_list *t)) { struct inet_connection_sock *icsk = inet_csk(sk); - setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, - (unsigned long)sk); - setup_timer(&icsk->icsk_delack_timer, delack_handler, - (unsigned long)sk); - setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); + timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); + timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); + timer_setup(&sk->sk_timer, keepalive_handler, 0); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } EXPORT_SYMBOL(inet_csk_init_xmit_timers); @@ -676,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req } EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); -static void reqsk_timer_handler(unsigned long data) +static void reqsk_timer_handler(struct timer_list *t) { - struct request_sock *req = (struct request_sock *)data; + struct request_sock *req = from_timer(req, t, rsk_timer); struct sock *sk_listener = req->rsk_listener; struct net *net = sock_net(sk_listener); struct inet_connection_sock *icsk = inet_csk(sk_listener); @@ -749,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, req->num_timeout = 0; req->sk = NULL; - setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, - (unsigned long)req); + timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); inet_ehash_insert(req_to_sk(req), NULL); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7014cc00c74c..804a8d34ce86 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -288,15 +288,17 @@ out: * * Returns: Nothing (void) */ -static void tcp_delack_timer(unsigned long data) +static void tcp_delack_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_delack_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); } else { - inet_csk(sk)->icsk_ack.blocked = 1; + icsk->icsk_ack.blocked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) @@ -576,9 +578,11 @@ out: sk_mem_reclaim(sk); } -static void tcp_write_timer(unsigned long data) +static void tcp_write_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_retransmit_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { @@ -613,9 +617,9 @@ void tcp_set_keepalive(struct sock *sk, int val) EXPORT_SYMBOL_GPL(tcp_set_keepalive); -static void tcp_keepalive_timer (unsigned long data) +static void tcp_keepalive_timer (struct timer_list *t) { - struct sock *sk = (struct sock *) data; + struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 elapsed; From 78802011fbe34331bdef6f2dfb1634011f0e4c32 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:20 -0700 Subject: [PATCH 1112/2177] inet: frags: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Alexander Aring Cc: Stefan Schmidt Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: linux-wpan@vger.kernel.org Cc: netdev@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Signed-off-by: Kees Cook Acked-by: Stefan Schmidt # for ieee802154 Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- net/ieee802154/6lowpan/reassembly.c | 5 +++-- net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/ip_fragment.c | 5 +++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++-- net/ipv6/reassembly.c | 5 +++-- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fc59e0775e00..c695807ca707 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -95,7 +95,7 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*frag_expire)(unsigned long data); + void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; }; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f85b08baff16..85bf86ad6b18 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) fq->daddr = *arg->dst; } -static void lowpan_frag_expire(unsigned long data) +static void lowpan_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); spin_lock(&fq->q.lock); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index af74d0433453..7f3ef5c287a1 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) spin_unlock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &expired, list_evictor) - f->frag_expire((unsigned long) fq); + f->frag_expire(&fq->timer); return evicted; } @@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, f->constructor(q, arg); add_frag_mem_limit(nf, f->qsize); - setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); refcount_set(&q->refcnt, 1); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 46408c220d9d..9215654a401f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -190,12 +190,13 @@ static bool frag_expire_skip_icmp(u32 user) /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ -static void ip_expire(unsigned long arg) +static void ip_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct ipq *qp; struct net *net; - qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b263bf3a19f7..977d8900cfd1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_ct_frag6_expire(unsigned long data) +static void nf_ct_frag6_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); ip6_expire_frag_queue(net, fq, &nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 846012eae526..afbc000ad4f2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -170,12 +170,13 @@ out: } EXPORT_SYMBOL(ip6_expire_frag_queue); -static void ip6_frag_expire(unsigned long data) +static void ip6_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); ip6_expire_frag_queue(net, fq, &ip6_frags); From 9f12a77e467b8ec0296cf1c3481447e1b1811f59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:21 -0700 Subject: [PATCH 1113/2177] net/core: Collapse redundant sk_timer callback data assignments The core sk_timer initializer can provide the common .data assignment instead of it being set separately in users. Cc: "David S. Miller" Cc: Ralf Baechle Cc: Andrew Hendry Cc: Eric Dumazet Cc: Paolo Abeni Cc: David Howells Cc: Colin Ian King Cc: Ingo Molnar Cc: linzhang Cc: netdev@vger.kernel.org Cc: linux-hams@vger.kernel.org Cc: linux-x25@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/netrom/nr_timer.c | 1 - net/rose/rose_timer.c | 1 - net/x25/af_x25.c | 1 - net/x25/x25_timer.c | 1 - 5 files changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 23953b741a41..f577df17bcf2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2683,7 +2683,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_init_common(sk); sk->sk_send_head = NULL; - init_timer(&sk->sk_timer); + setup_timer(&sk->sk_timer, NULL, (unsigned long)sk); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 94d05806a9a2..f84ce71f1f5f 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -45,7 +45,6 @@ void nr_init_timers(struct sock *sk) setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk); /* initialized by sock_init_data */ - sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.function = &nr_heartbeat_expiry; } diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 3b89d66f15bb..e08201185214 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -36,7 +36,6 @@ void rose_start_heartbeat(struct sock *sk) { del_timer(&sk->sk_timer); - sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.function = &rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ac095936552d..c590c0bd1393 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -414,7 +414,6 @@ static void __x25_destroy_socket(struct sock *sk) /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; sk->sk_timer.function = x25_destroy_timer; - sk->sk_timer.data = (unsigned long)sk; add_timer(&sk->sk_timer); } else { /* drop last reference so sock_put will free */ diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index 5c5db1a36399..de5cec41d100 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -36,7 +36,6 @@ void x25_init_timers(struct sock *sk) setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk); /* initialized by sock_init_data */ - sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.function = &x25_heartbeat_expiry; } From d26c089e78298843b8c5202ffb43146d17c15bde Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:22 -0700 Subject: [PATCH 1114/2177] hdlc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. This adds a pointer back to the net_device, and drops needless open-coded resetting of the .function and .data fields. Cc: David S. Miller Cc: Krzysztof Halasa Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 13 ++++++------- drivers/net/wan/hdlc_fr.c | 12 ++++++------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index c696d42f4502..320039d329c7 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -54,6 +54,7 @@ struct cisco_state { cisco_proto settings; struct timer_list timer; + struct net_device *dev; spinlock_t lock; unsigned long last_poll; int up; @@ -257,11 +258,10 @@ rx_error: -static void cisco_timer(unsigned long arg) +static void cisco_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - hdlc_device *hdlc = dev_to_hdlc(dev); - struct cisco_state *st = state(hdlc); + struct cisco_state *st = from_timer(st, t, timer); + struct net_device *dev = st->dev; spin_lock(&st->lock); if (st->up && @@ -276,8 +276,6 @@ static void cisco_timer(unsigned long arg) spin_unlock(&st->lock); st->timer.expires = jiffies + st->settings.interval * HZ; - st->timer.function = cisco_timer; - st->timer.data = arg; add_timer(&st->timer); } @@ -293,7 +291,8 @@ static void cisco_start(struct net_device *dev) st->up = st->txseq = st->rxseq = 0; spin_unlock_irqrestore(&st->lock, flags); - setup_timer(&st->timer, cisco_timer, (unsigned long)dev); + st->dev = dev; + timer_setup(&st->timer, cisco_timer, 0); st->timer.expires = jiffies + HZ; /* First poll after 1 s */ add_timer(&st->timer); } diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 425a47ffed25..038236a9c60e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -140,6 +140,7 @@ struct frad_state { int dce_pvc_count; struct timer_list timer; + struct net_device *dev; unsigned long last_poll; int reliable; int dce_changed; @@ -597,9 +598,10 @@ static void fr_set_link_state(int reliable, struct net_device *dev) } -static void fr_timer(unsigned long arg) +static void fr_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; + struct frad_state *st = from_timer(st, t, timer); + struct net_device *dev = st->dev; hdlc_device *hdlc = dev_to_hdlc(dev); int i, cnt = 0, reliable; u32 list; @@ -644,8 +646,6 @@ static void fr_timer(unsigned long arg) state(hdlc)->settings.t391 * HZ; } - state(hdlc)->timer.function = fr_timer; - state(hdlc)->timer.data = arg; add_timer(&state(hdlc)->timer); } @@ -1003,8 +1003,8 @@ static void fr_start(struct net_device *dev) state(hdlc)->n391cnt = 0; state(hdlc)->txseq = state(hdlc)->rxseq = 0; - setup_timer(&state(hdlc)->timer, fr_timer, - (unsigned long)dev); + state(hdlc)->dev = dev; + timer_setup(&state(hdlc)->timer, fr_timer, 0); /* First poll after 1 s */ state(hdlc)->timer.expires = jiffies + HZ; add_timer(&state(hdlc)->timer); From 9ba650a4ebdde5f5f59a62ed51563d7c4586e492 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:23 -0700 Subject: [PATCH 1115/2177] appletalk: Remove unneeded synchronization The use of del_timer_sync() will make sure a timer is not rescheduled. As such, there is no need to add external signals to kill timers. In preparation for switching the timer callback argument to the timer pointer, this drops the .data argument since it doesn't serve a meaningful purpose here. Cc: David Howells Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/appletalk/ltpc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index e4aa374caa4d..cc3dc9337eae 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -880,14 +880,10 @@ static void ltpc_poll(unsigned long l) } ltpc_poll_counter--; } - - if (!dev) - return; /* we've been downed */ /* poll 20 times per second */ idle(dev); ltpc_timer.expires = jiffies + HZ/20; - add_timer(<pc_timer); } @@ -1252,8 +1248,6 @@ static void __exit ltpc_cleanup(void) if(debug & DEBUG_VERBOSE) printk("unregister_netdev\n"); unregister_netdev(dev_ltpc); - ltpc_timer.data = 0; /* signal the poll routine that we're done */ - del_timer_sync(<pc_timer); if(debug & DEBUG_VERBOSE) printk("freeing irq\n"); From 5f2585d4e570f2dffd148f6e57308fb431079175 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:24 -0700 Subject: [PATCH 1116/2177] drivers/net/appletalk: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Adds a static variable to hold the polled net_device. Cc: David Howells Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/appletalk/ltpc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index cc3dc9337eae..75a5a9b87c5a 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -694,6 +694,7 @@ static int do_read(struct net_device *dev, void *cbuf, int cbuflen, /* end of idle handlers -- what should be seen is do_read, do_write */ static struct timer_list ltpc_timer; +static struct net_device *ltpc_timer_dev; static netdev_tx_t ltpc_xmit(struct sk_buff *skb, struct net_device *dev); @@ -867,10 +868,8 @@ static void set_multicast_list(struct net_device *dev) static int ltpc_poll_counter; -static void ltpc_poll(unsigned long l) +static void ltpc_poll(struct timer_list *unused) { - struct net_device *dev = (struct net_device *) l; - del_timer(<pc_timer); if(debug & DEBUG_VERBOSE) { @@ -882,7 +881,7 @@ static void ltpc_poll(unsigned long l) } /* poll 20 times per second */ - idle(dev); + idle(ltpc_timer_dev); ltpc_timer.expires = jiffies + HZ/20; add_timer(<pc_timer); } @@ -1161,7 +1160,8 @@ struct net_device * __init ltpc_probe(void) dev->irq = 0; /* polled mode -- 20 times per second */ /* this is really, really slow... should it poll more often? */ - setup_timer(<pc_timer, ltpc_poll, (unsigned long)dev); + ltpc_timer_dev = dev; + timer_setup(<pc_timer, ltpc_poll, 0); ltpc_timer.expires = jiffies + HZ/20; add_timer(<pc_timer); From 11286125326bb9d5e295c7c85fb1dc91ca2af46b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:26 -0700 Subject: [PATCH 1117/2177] isdnloop: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Added missing initialization for rb_timer. Cc: Karsten Keil Cc: "David S. Miller" Cc: Al Viro Cc: Stephen Hemminger Cc: Arnd Bergmann Cc: Johannes Berg Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/isdnloop/isdnloop.c | 33 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index e97232646ba1..a4597e96c916 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -90,9 +90,9 @@ isdnloop_bchan_send(isdnloop_card *card, int ch) * data = pointer to card struct, set by kernel timer.data */ static void -isdnloop_pollbchan(unsigned long data) +isdnloop_pollbchan(struct timer_list *t) { - isdnloop_card *card = (isdnloop_card *) data; + isdnloop_card *card = from_timer(card, t, rb_timer); unsigned long flags; if (card->flags & ISDNLOOP_FLAGS_B1ACTIVE) @@ -305,9 +305,9 @@ isdnloop_putmsg(isdnloop_card *card, unsigned char c) * data = pointer to card struct */ static void -isdnloop_polldchan(unsigned long data) +isdnloop_polldchan(struct timer_list *t) { - isdnloop_card *card = (isdnloop_card *) data; + isdnloop_card *card = from_timer(card, t, st_timer); struct sk_buff *skb; int avail; int left; @@ -373,8 +373,6 @@ isdnloop_polldchan(unsigned long data) card->flags |= ISDNLOOP_FLAGS_RBTIMER; spin_lock_irqsave(&card->isdnloop_lock, flags); del_timer(&card->rb_timer); - card->rb_timer.function = isdnloop_pollbchan; - card->rb_timer.data = (unsigned long) card; card->rb_timer.expires = jiffies + ISDNLOOP_TIMER_BCREAD; add_timer(&card->rb_timer); spin_unlock_irqrestore(&card->isdnloop_lock, flags); @@ -588,9 +586,10 @@ isdnloop_atimeout(isdnloop_card *card, int ch) * Wrapper for isdnloop_atimeout(). */ static void -isdnloop_atimeout0(unsigned long data) +isdnloop_atimeout0(struct timer_list *t) { - isdnloop_card *card = (isdnloop_card *) data; + isdnloop_card *card = from_timer(card, t, c_timer[0]); + isdnloop_atimeout(card, 0); } @@ -598,9 +597,10 @@ isdnloop_atimeout0(unsigned long data) * Wrapper for isdnloop_atimeout(). */ static void -isdnloop_atimeout1(unsigned long data) +isdnloop_atimeout1(struct timer_list *t) { - isdnloop_card *card = (isdnloop_card *) data; + isdnloop_card *card = from_timer(card, t, c_timer[1]); + isdnloop_atimeout(card, 1); } @@ -617,13 +617,9 @@ isdnloop_start_ctimer(isdnloop_card *card, int ch) unsigned long flags; spin_lock_irqsave(&card->isdnloop_lock, flags); - init_timer(&card->c_timer[ch]); + timer_setup(&card->c_timer[ch], ch ? isdnloop_atimeout1 + : isdnloop_atimeout0, 0); card->c_timer[ch].expires = jiffies + ISDNLOOP_TIMER_ALERTWAIT; - if (ch) - card->c_timer[ch].function = isdnloop_atimeout1; - else - card->c_timer[ch].function = isdnloop_atimeout0; - card->c_timer[ch].data = (unsigned long) card; add_timer(&card->c_timer[ch]); spin_unlock_irqrestore(&card->isdnloop_lock, flags); } @@ -1113,10 +1109,9 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) sdef.ptype); return -EINVAL; } - init_timer(&card->st_timer); + timer_setup(&card->rb_timer, isdnloop_pollbchan, 0); + timer_setup(&card->st_timer, isdnloop_polldchan, 0); card->st_timer.expires = jiffies + ISDNLOOP_TIMER_DCREAD; - card->st_timer.function = isdnloop_polldchan; - card->st_timer.data = (unsigned long) card; add_timer(&card->st_timer); card->flags |= ISDNLOOP_FLAGS_RUNNING; spin_unlock_irqrestore(&card->isdnloop_lock, flags); From de892f8f2cc8176368a490e0778fd2de15e9bfda Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:27 -0700 Subject: [PATCH 1118/2177] net: ethernet: apple: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Johannes Berg Cc: Jarod Wilson Cc: Rob Herring Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/apple/bmac.c | 12 +++++------- drivers/net/ethernet/apple/mace.c | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index eac740c476ce..5a655d289dd5 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -157,7 +157,7 @@ static irqreturn_t bmac_misc_intr(int irq, void *dev_id); static irqreturn_t bmac_txdma_intr(int irq, void *dev_id); static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id); static void bmac_set_timeout(struct net_device *dev); -static void bmac_tx_timeout(unsigned long data); +static void bmac_tx_timeout(struct timer_list *t); static int bmac_output(struct sk_buff *skb, struct net_device *dev); static void bmac_start(struct net_device *dev); @@ -555,8 +555,6 @@ static inline void bmac_set_timeout(struct net_device *dev) if (bp->timeout_active) del_timer(&bp->tx_timeout); bp->tx_timeout.expires = jiffies + TX_TIMEOUT; - bp->tx_timeout.function = bmac_tx_timeout; - bp->tx_timeout.data = (unsigned long) dev; add_timer(&bp->tx_timeout); bp->timeout_active = 1; spin_unlock_irqrestore(&bp->lock, flags); @@ -1321,7 +1319,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) bp->queue = (struct sk_buff_head *)(bp->rx_cmds + N_RX_RING + 1); skb_queue_head_init(bp->queue); - init_timer(&bp->tx_timeout); + timer_setup(&bp->tx_timeout, bmac_tx_timeout, 0); ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev); if (ret) { @@ -1471,10 +1469,10 @@ bmac_output(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void bmac_tx_timeout(unsigned long data) +static void bmac_tx_timeout(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct bmac_data *bp = netdev_priv(dev); + struct bmac_data *bp = from_timer(bp, t, tx_timeout); + struct net_device *dev = macio_get_drvdata(bp->mdev); volatile struct dbdma_regs __iomem *td = bp->tx_dma; volatile struct dbdma_regs __iomem *rd = bp->rx_dma; volatile struct dbdma_cmd *cp; diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index e58b157b7d7c..0b5429d76bcf 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -86,7 +86,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id); static irqreturn_t mace_txdma_intr(int irq, void *dev_id); static irqreturn_t mace_rxdma_intr(int irq, void *dev_id); static void mace_set_timeout(struct net_device *dev); -static void mace_tx_timeout(unsigned long data); +static void mace_tx_timeout(struct timer_list *t); static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma); static inline void mace_clean_rings(struct mace_data *mp); static void __mace_set_address(struct net_device *dev, void *addr); @@ -196,7 +196,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) memset((char *) mp->tx_cmds, 0, (NCMDS_TX*N_TX_RING + N_RX_RING + 2) * sizeof(struct dbdma_cmd)); - init_timer(&mp->tx_timeout); + timer_setup(&mp->tx_timeout, mace_tx_timeout, 0); spin_lock_init(&mp->lock); mp->timeout_active = 0; @@ -521,8 +521,6 @@ static inline void mace_set_timeout(struct net_device *dev) if (mp->timeout_active) del_timer(&mp->tx_timeout); mp->tx_timeout.expires = jiffies + TX_TIMEOUT; - mp->tx_timeout.function = mace_tx_timeout; - mp->tx_timeout.data = (unsigned long) dev; add_timer(&mp->tx_timeout); mp->timeout_active = 1; } @@ -801,10 +799,10 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void mace_tx_timeout(unsigned long data) +static void mace_tx_timeout(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct mace_data *mp = netdev_priv(dev); + struct mace_data *mp = from_timer(mp, t, tx_timeout); + struct net_device *dev = macio_get_drvdata(mp->mdev); volatile struct mace __iomem *mb = mp->mace; volatile struct dbdma_regs __iomem *td = mp->tx_dma; volatile struct dbdma_regs __iomem *rd = mp->rx_dma; From 0822c5d94e10d9790e82bdfea20a10f0884bca54 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:28 -0700 Subject: [PATCH 1119/2177] net: ethernet: sun: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Philippe Reynes Cc: Jarod Wilson Cc: Shannon Nelson Cc: Rob Herring Cc: chris hyser Cc: Tushar Dave Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 7 ++++--- drivers/net/ethernet/sun/ldmvsw.c | 3 +-- drivers/net/ethernet/sun/niu.c | 10 ++++------ drivers/net/ethernet/sun/sunbmac.c | 10 ++++------ drivers/net/ethernet/sun/sungem.c | 6 +++--- drivers/net/ethernet/sun/sunhme.c | 10 ++++------ drivers/net/ethernet/sun/sunvnet.c | 3 +-- drivers/net/ethernet/sun/sunvnet_common.c | 4 ++-- drivers/net/ethernet/sun/sunvnet_common.h | 2 +- 9 files changed, 24 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index a74d78f64af9..113bd57e2ea0 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -4079,9 +4079,9 @@ done: #endif } -static void cas_link_timer(unsigned long data) +static void cas_link_timer(struct timer_list *t) { - struct cas *cp = (struct cas *) data; + struct cas *cp = from_timer(cp, t, link_timer); int mask, pending = 0, reset = 0; unsigned long flags; @@ -5039,7 +5039,8 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&cp->stat_lock[N_TX_RINGS]); mutex_init(&cp->pm_mutex); - setup_timer(&cp->link_timer, cas_link_timer, (unsigned long)cp); + timer_setup(&cp->link_timer, cas_link_timer, 0); + #if 1 /* Just in case the implementation of atomic operations * change so that an explicit initialization is necessary. diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5feeaa9f0a9e..5ea037672e6f 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -363,8 +363,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) list_add_rcu(&port->list, &vp->port_list); spin_unlock_irqrestore(&vp->lock, flags); - setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common, - (unsigned long)port); + timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index bde19b307d0d..ab502ee35fb2 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -2221,9 +2221,9 @@ static int niu_link_status(struct niu *np, int *link_up_p) return err; } -static void niu_timer(unsigned long __opaque) +static void niu_timer(struct timer_list *t) { - struct niu *np = (struct niu *) __opaque; + struct niu *np = from_timer(np, t, timer); unsigned long off; int err, link_up; @@ -6123,7 +6123,7 @@ static int niu_open(struct net_device *dev) err = niu_init_hw(np); if (!err) { - setup_timer(&np->timer, niu_timer, (unsigned long)np); + timer_setup(&np->timer, niu_timer, 0); np->timer.expires = jiffies + HZ; err = niu_enable_interrupts(np, 1); @@ -6773,10 +6773,8 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) err = niu_init_hw(np); if (!err) { - init_timer(&np->timer); + timer_setup(&np->timer, niu_timer, 0); np->timer.expires = jiffies + HZ; - np->timer.data = (unsigned long) np; - np->timer.function = niu_timer; err = niu_enable_interrupts(np, 1); if (err) diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 3189722110c2..0b1f41f6bceb 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -523,9 +523,9 @@ static int try_next_permutation(struct bigmac *bp, void __iomem *tregs) return -1; } -static void bigmac_timer(unsigned long data) +static void bigmac_timer(struct timer_list *t) { - struct bigmac *bp = (struct bigmac *) data; + struct bigmac *bp = from_timer(bp, t, bigmac_timer); void __iomem *tregs = bp->tregs; int restart_timer = 0; @@ -613,8 +613,6 @@ static void bigmac_begin_auto_negotiation(struct bigmac *bp) bp->timer_state = ltrywait; bp->timer_ticks = 0; bp->bigmac_timer.expires = jiffies + (12 * HZ) / 10; - bp->bigmac_timer.data = (unsigned long) bp; - bp->bigmac_timer.function = bigmac_timer; add_timer(&bp->bigmac_timer); } @@ -921,7 +919,7 @@ static int bigmac_open(struct net_device *dev) printk(KERN_ERR "BIGMAC: Can't order irq %d to go.\n", dev->irq); return ret; } - init_timer(&bp->bigmac_timer); + timer_setup(&bp->bigmac_timer, bigmac_timer, 0); ret = bigmac_init_hw(bp, 0); if (ret) free_irq(dev->irq, bp); @@ -1172,7 +1170,7 @@ static int bigmac_ether_init(struct platform_device *op, "board-version", 1); /* Init auto-negotiation timer state. */ - init_timer(&bp->bigmac_timer); + timer_setup(&bp->bigmac_timer, bigmac_timer, 0); bp->timer_state = asleep; bp->timer_ticks = 0; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index b75ab8f44968..a7afcee3c5ae 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1496,9 +1496,9 @@ static int gem_mdio_link_not_up(struct gem *gp) } } -static void gem_link_timer(unsigned long data) +static void gem_link_timer(struct timer_list *t) { - struct gem *gp = (struct gem *) data; + struct gem *gp = from_timer(gp, t, link_timer); struct net_device *dev = gp->dev; int restart_aneg = 0; @@ -2910,7 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) gp->msg_enable = DEFAULT_MSG; - setup_timer(&gp->link_timer, gem_link_timer, (unsigned long)gp); + timer_setup(&gp->link_timer, gem_link_timer, 0); INIT_WORK(&gp->reset_task, gem_reset_task); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 9e983e1d8249..0431f1e5f511 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -685,9 +685,9 @@ static int is_lucent_phy(struct happy_meal *hp) return ret; } -static void happy_meal_timer(unsigned long data) +static void happy_meal_timer(struct timer_list *t) { - struct happy_meal *hp = (struct happy_meal *) data; + struct happy_meal *hp = from_timer(hp, t, happy_timer); void __iomem *tregs = hp->tcvregs; int restart_timer = 0; @@ -1413,8 +1413,6 @@ force_link: hp->timer_ticks = 0; hp->happy_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */ - hp->happy_timer.data = (unsigned long) hp; - hp->happy_timer.function = happy_meal_timer; add_timer(&hp->happy_timer); } @@ -2819,7 +2817,7 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe) hp->timer_state = asleep; hp->timer_ticks = 0; - init_timer(&hp->happy_timer); + timer_setup(&hp->happy_timer, happy_meal_timer, 0); hp->dev = dev; dev->netdev_ops = &hme_netdev_ops; @@ -3133,7 +3131,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, hp->timer_state = asleep; hp->timer_ticks = 0; - init_timer(&hp->happy_timer); + timer_setup(&hp->happy_timer, happy_meal_timer, 0); hp->irq = pdev->irq; hp->dev = dev; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 0b95105f7060..27fb22638885 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -492,8 +492,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) pr_info("%s: PORT ( remote-mac %pM%s )\n", vp->dev->name, port->raddr, switch_port ? " switch-port" : ""); - setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common, - (unsigned long)port); + timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0); napi_enable(&port->napi); vio_port_up(&port->vio); diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index ecf456c7b6d1..8aa3ce46bb81 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1040,9 +1040,9 @@ static inline void vnet_free_skbs(struct sk_buff *skb) } } -void sunvnet_clean_timer_expire_common(unsigned long port0) +void sunvnet_clean_timer_expire_common(struct timer_list *t) { - struct vnet_port *port = (struct vnet_port *)port0; + struct vnet_port *port = from_timer(port, t, clean_timer); struct sk_buff *freeskbs; unsigned pending; diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index b20d6fa7ef25..656673c31066 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -129,7 +129,7 @@ struct vnet { ((__port)->vsw ? (__port)->dev : (__port)->vp->dev) /* Common funcs */ -void sunvnet_clean_timer_expire_common(unsigned long port0); +void sunvnet_clean_timer_expire_common(struct timer_list *t); int sunvnet_open_common(struct net_device *dev); int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); From 6fd9c53f71862a4797b7ed8a5de80e2c64829f56 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:29 -0700 Subject: [PATCH 1120/2177] net: seeq: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/seeq/ether3.c | 11 ++++++----- drivers/net/ethernet/seeq/ether3.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 244c1e171017..da4807723a06 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -170,9 +170,11 @@ ether3_setbuffer(struct net_device *dev, buffer_rw_t read, int start) /* * Switch LED off... */ -static void ether3_ledoff(unsigned long data) +static void ether3_ledoff(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; + struct dev_priv *private = from_timer(priv, t, timer); + struct net_device *dev = private->dev; + ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2); } @@ -183,8 +185,6 @@ static inline void ether3_ledon(struct net_device *dev) { del_timer(&priv(dev)->timer); priv(dev)->timer.expires = jiffies + HZ / 50; /* leave on for 1/50th second */ - priv(dev)->timer.data = (unsigned long)dev; - priv(dev)->timer.function = ether3_ledoff; add_timer(&priv(dev)->timer); if (priv(dev)->regs.config2 & CFG2_CTRLO) ether3_outw(priv(dev)->regs.config2 &= ~CFG2_CTRLO, REG_CONFIG2); @@ -783,7 +783,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) ether3_addr(dev->dev_addr, ec); - init_timer(&priv(dev)->timer); + priv(dev)->dev = dev; + timer_setup(&priv(dev)->timer, ether3_ledoff, 0); /* Reset card... */ diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h index 2db63b08bdf3..ea2ba286e665 100644 --- a/drivers/net/ethernet/seeq/ether3.h +++ b/drivers/net/ethernet/seeq/ether3.h @@ -165,6 +165,7 @@ struct dev_priv { unsigned char tx_tail; /* buffer nr of transmitting packet */ unsigned int rx_head; /* address to fetch next packet from */ struct timer_list timer; + net_device *dev; int broken; /* 0 = ok, 1 = something went wrong */ }; From 41e9475c325a3643572639283318b0a07bbb8e0b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:30 -0700 Subject: [PATCH 1121/2177] hamradio/scc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Joerg Reuter Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/hamradio/scc.c | 69 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 295f267b73ea..c9f7215c5dc2 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -185,14 +185,15 @@ static const char banner[] __initconst = KERN_INFO \ "AX.25: Z8530 SCC driver version "VERSION".dl1bke\n"; -static void t_dwait(unsigned long); -static void t_txdelay(unsigned long); -static void t_tail(unsigned long); -static void t_busy(unsigned long); -static void t_maxkeyup(unsigned long); -static void t_idle(unsigned long); +static void t_dwait(struct timer_list *t); +static void t_txdelay(struct timer_list *t); +static void t_tail(struct timer_list *t); +static void t_busy(struct timer_list *); +static void t_maxkeyup(struct timer_list *); +static void t_idle(struct timer_list *t); static void scc_tx_done(struct scc_channel *); -static void scc_start_tx_timer(struct scc_channel *, void (*)(unsigned long), unsigned long); +static void scc_start_tx_timer(struct scc_channel *, + void (*)(struct timer_list *), unsigned long); static void scc_start_maxkeyup(struct scc_channel *); static void scc_start_defer(struct scc_channel *); @@ -992,24 +993,27 @@ static void scc_key_trx(struct scc_channel *scc, char tx) /* ----> SCC timer interrupt handler and friends. <---- */ -static void __scc_start_tx_timer(struct scc_channel *scc, void (*handler)(unsigned long), unsigned long when) +static void __scc_start_tx_timer(struct scc_channel *scc, + void (*handler)(struct timer_list *t), + unsigned long when) { del_timer(&scc->tx_t); if (when == 0) { - handler((unsigned long) scc); + handler(&scc->tx_t); } else if (when != TIMER_OFF) { - scc->tx_t.data = (unsigned long) scc; - scc->tx_t.function = handler; + scc->tx_t.function = (TIMER_FUNC_TYPE)handler; scc->tx_t.expires = jiffies + (when*HZ)/100; add_timer(&scc->tx_t); } } -static void scc_start_tx_timer(struct scc_channel *scc, void (*handler)(unsigned long), unsigned long when) +static void scc_start_tx_timer(struct scc_channel *scc, + void (*handler)(struct timer_list *t), + unsigned long when) { unsigned long flags; @@ -1027,8 +1031,7 @@ static void scc_start_defer(struct scc_channel *scc) if (scc->kiss.maxdefer != 0 && scc->kiss.maxdefer != TIMER_OFF) { - scc->tx_wdog.data = (unsigned long) scc; - scc->tx_wdog.function = t_busy; + scc->tx_wdog.function = (TIMER_FUNC_TYPE)t_busy; scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxdefer; add_timer(&scc->tx_wdog); } @@ -1044,8 +1047,7 @@ static void scc_start_maxkeyup(struct scc_channel *scc) if (scc->kiss.maxkeyup != 0 && scc->kiss.maxkeyup != TIMER_OFF) { - scc->tx_wdog.data = (unsigned long) scc; - scc->tx_wdog.function = t_maxkeyup; + scc->tx_wdog.function = (TIMER_FUNC_TYPE)t_maxkeyup; scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxkeyup; add_timer(&scc->tx_wdog); } @@ -1121,9 +1123,9 @@ static inline int is_grouped(struct scc_channel *scc) * fulldup == 2: mintime expired, reset status or key trx and start txdelay */ -static void t_dwait(unsigned long channel) +static void t_dwait(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_t); if (scc->stat.tx_state == TXS_WAIT) /* maxkeyup or idle timeout */ { @@ -1163,9 +1165,9 @@ static void t_dwait(unsigned long channel) * kick transmission by a fake scc_txint(scc), start 'maxkeyup' watchdog. */ -static void t_txdelay(unsigned long channel) +static void t_txdelay(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_t); scc_start_maxkeyup(scc); @@ -1184,9 +1186,9 @@ static void t_txdelay(unsigned long channel) * transmission after 'mintime' seconds */ -static void t_tail(unsigned long channel) +static void t_tail(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_t); unsigned long flags; spin_lock_irqsave(&scc->lock, flags); @@ -1211,9 +1213,9 @@ static void t_tail(unsigned long channel) * throw away send buffers if DCD remains active too long. */ -static void t_busy(unsigned long channel) +static void t_busy(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_wdog); del_timer(&scc->tx_t); netif_stop_queue(scc->dev); /* don't pile on the wabbit! */ @@ -1230,9 +1232,9 @@ static void t_busy(unsigned long channel) * this is our watchdog. */ -static void t_maxkeyup(unsigned long channel) +static void t_maxkeyup(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_wdog); unsigned long flags; spin_lock_irqsave(&scc->lock, flags); @@ -1264,9 +1266,9 @@ static void t_maxkeyup(unsigned long channel) * expires. */ -static void t_idle(unsigned long channel) +static void t_idle(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_t); del_timer(&scc->tx_wdog); @@ -1397,9 +1399,9 @@ static unsigned long scc_get_param(struct scc_channel *scc, unsigned int cmd) /* * Send calibration pattern * */ /* ******************************************************************* */ -static void scc_stop_calibrate(unsigned long channel) +static void scc_stop_calibrate(struct timer_list *t) { - struct scc_channel *scc = (struct scc_channel *) channel; + struct scc_channel *scc = from_timer(scc, t, tx_wdog); unsigned long flags; spin_lock_irqsave(&scc->lock, flags); @@ -1426,8 +1428,7 @@ scc_start_calibrate(struct scc_channel *scc, int duration, unsigned char pattern del_timer(&scc->tx_wdog); - scc->tx_wdog.data = (unsigned long) scc; - scc->tx_wdog.function = scc_stop_calibrate; + scc->tx_wdog.function = (TIMER_FUNC_TYPE)scc_stop_calibrate; scc->tx_wdog.expires = jiffies + HZ*duration; add_timer(&scc->tx_wdog); @@ -1522,8 +1523,8 @@ static int scc_net_alloc(const char *name, struct scc_channel *scc) dev->ml_priv = scc; scc->dev = dev; spin_lock_init(&scc->lock); - init_timer(&scc->tx_t); - init_timer(&scc->tx_wdog); + timer_setup(&scc->tx_t, NULL, 0); + timer_setup(&scc->tx_wdog, NULL, 0); err = register_netdevice(dev); if (err) { From dfc57004945b34cf83f600c697a54afca1fd15c5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:31 -0700 Subject: [PATCH 1122/2177] net/ethernet/sgi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/ioc3-eth.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 9c0488e0f08e..18d533fdf14c 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -764,9 +764,9 @@ static inline void ioc3_setup_duplex(struct ioc3_private *ip) ioc3_w_emcr(ip->emcr); } -static void ioc3_timer(unsigned long data) +static void ioc3_timer(struct timer_list *t) { - struct ioc3_private *ip = (struct ioc3_private *) data; + struct ioc3_private *ip = from_timer(ip, t, ioc3_timer); /* Print the link status if it has changed */ mii_check_media(&ip->mii, 1, 0); @@ -818,8 +818,6 @@ out: static void ioc3_mii_start(struct ioc3_private *ip) { ip->ioc3_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */ - ip->ioc3_timer.data = (unsigned long) ip; - ip->ioc3_timer.function = ioc3_timer; add_timer(&ip->ioc3_timer); } @@ -1291,7 +1289,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif spin_lock_init(&ip->ioc3_lock); - init_timer(&ip->ioc3_timer); + timer_setup(&ip->ioc3_timer, ioc3_timer, 0); ioc3_stop(ip); ioc3_init(dev); From d28bb967aa928d5cfd50a9a182f3b4218088c79d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:32 -0700 Subject: [PATCH 1123/2177] net: usb: Convert timers to use timer_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Woojung Huh Cc: Microchip Linux Driver Support Cc: "David S. Miller" Cc: Ben Hutchings Cc: Philippe Reynes Cc: Jarod Wilson Cc: Arvind Yadav Cc: "Bjørn Mork" Cc: "Stefan Brüns" Cc: Alexey Dobriyan Cc: Greg Ungerer Cc: linux-usb@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 6 +++--- drivers/net/usb/lan78xx.c | 10 +++------- drivers/net/usb/sierra_net.c | 12 ++++-------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index aeb62e17d19d..18d36dff97ea 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -611,9 +611,9 @@ static void catc_stats_done(struct catc *catc, struct ctrl_queue *q) catc->stats_vals[index >> 1] = data; } -static void catc_stats_timer(unsigned long data) +static void catc_stats_timer(struct timer_list *t) { - struct catc *catc = (void *) data; + struct catc *catc = from_timer(catc, t, timer); int i; for (i = 0; i < 8; i++) @@ -805,7 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id spin_lock_init(&catc->tx_lock); spin_lock_init(&catc->ctrl_lock); - setup_timer(&catc->timer, catc_stats_timer, (long)catc); + timer_setup(&catc->timer, catc_stats_timer, 0); catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL); catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0161f77641fa..94c7804903c4 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3516,11 +3516,9 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, }; -static void lan78xx_stat_monitor(unsigned long param) +static void lan78xx_stat_monitor(struct timer_list *t) { - struct lan78xx_net *dev; - - dev = (struct lan78xx_net *)param; + struct lan78xx_net *dev = from_timer(dev, t, stat_monitor); lan78xx_defer_kevent(dev, EVENT_STAT_UPDATE); } @@ -3571,10 +3569,8 @@ static int lan78xx_probe(struct usb_interface *intf, netdev->watchdog_timeo = TX_TIMEOUT_JIFFIES; netdev->ethtool_ops = &lan78xx_ethtool_ops; - dev->stat_monitor.function = lan78xx_stat_monitor; - dev->stat_monitor.data = (unsigned long)dev; dev->delta = 1; - init_timer(&dev->stat_monitor); + timer_setup(&dev->stat_monitor, lan78xx_stat_monitor, 0); mutex_init(&dev->stats.access_lock); diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 2110ab3513f0..c43087e06696 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -189,9 +189,6 @@ struct lsi_umts_dual { #define SIERRA_NET_LSI_UMTS_DS_STATUS_LEN \ (SIERRA_NET_LSI_UMTS_DS_LEN - SIERRA_NET_LSI_COMMON_LEN) -/* Forward definitions */ -static void sierra_sync_timer(unsigned long syncdata); - /* Our own net device operations structure */ static const struct net_device_ops sierra_net_device_ops = { .ndo_open = usbnet_open, @@ -475,8 +472,6 @@ static void sierra_net_dosync(struct usbnet *dev) "Send SYNC failed, status %d\n", status); /* Now, start a timer and make sure we get the Restart Indication */ - priv->sync_timer.function = sierra_sync_timer; - priv->sync_timer.data = (unsigned long) dev; priv->sync_timer.expires = jiffies + SIERRA_NET_SYNCDELAY; add_timer(&priv->sync_timer); } @@ -593,9 +588,10 @@ static void sierra_net_defer_kevent(struct usbnet *dev, int work) /* * Sync Retransmit Timer Handler. On expiry, kick the work queue */ -static void sierra_sync_timer(unsigned long syncdata) +static void sierra_sync_timer(struct timer_list *t) { - struct usbnet *dev = (struct usbnet *)syncdata; + struct sierra_net_data *priv = from_timer(priv, t, sync_timer); + struct usbnet *dev = priv->usbnet; dev_dbg(&dev->udev->dev, "%s", __func__); /* Kick the tasklet */ @@ -752,7 +748,7 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) INIT_WORK(&priv->sierra_net_kevent, sierra_net_kevent); /* Only need to do this once */ - init_timer(&priv->sync_timer); + timer_setup(&priv->sync_timer, sierra_sync_timer, 0); /* verify fw attributes */ status = sierra_net_get_fw_attr(dev, &fwattr); From e84a2ac9ffa9b1ba211c8982f07cd92f60239c3e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:33 -0700 Subject: [PATCH 1124/2177] net: neterion: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jon Mason Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/s2io.c | 13 ++++--------- drivers/net/ethernet/neterion/s2io.h | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 462eda926b1c..b8983e73265a 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -337,12 +337,6 @@ static const char ethtool_driver_stats_keys[][ETH_GSTRING_LEN] = { #define S2IO_TEST_LEN ARRAY_SIZE(s2io_gstrings) #define S2IO_STRINGS_LEN (S2IO_TEST_LEN * ETH_GSTRING_LEN) -#define S2IO_TIMER_CONF(timer, handle, arg, exp) \ - init_timer(&timer); \ - timer.function = handle; \ - timer.data = (unsigned long)arg; \ - mod_timer(&timer, (jiffies + exp)) \ - /* copy mac addr to def_mac_addr array */ static void do_s2io_copy_mac_addr(struct s2io_nic *sp, int offset, u64 mac_addr) { @@ -4193,9 +4187,9 @@ pci_map_failed: } static void -s2io_alarm_handle(unsigned long data) +s2io_alarm_handle(struct timer_list *t) { - struct s2io_nic *sp = (struct s2io_nic *)data; + struct s2io_nic *sp = from_timer(sp, t, alarm_timer); struct net_device *dev = sp->dev; s2io_handle_errors(dev); @@ -7186,7 +7180,8 @@ static int s2io_card_up(struct s2io_nic *sp) return -ENODEV; } - S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2)); + timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); + mod_timer(&sp->alarm_timer, jiffies + HZ / 2); set_bit(__S2IO_STATE_CARD_UP, &sp->state); diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 6c5997dc8afc..1a24a7218794 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1094,7 +1094,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget); static int s2io_poll_inta(struct napi_struct *napi, int budget); static void s2io_init_pci(struct s2io_nic * sp); static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr); -static void s2io_alarm_handle(unsigned long data); +static void s2io_alarm_handle(struct timer_list *t); static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id); static irqreturn_t From d039ef68e94eae81c13b0b39a18f0c3455491e4c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:34 -0700 Subject: [PATCH 1125/2177] net: hns: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Switches test of .data field to .function, since .data will be going away. Cc: Yisen Zhuang Cc: Salil Mehta Cc: "David S. Miller" Cc: lipeng Cc: Lin Yun Sheng Cc: Kejian Yan Cc: Arnd Bergmann Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 7 +++---- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 13 ++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 36520634c96a..91565c8fee08 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2159,9 +2159,9 @@ static void hns_nic_task_schedule(struct hns_nic_priv *priv) (void)schedule_work(&priv->service_task); } -static void hns_nic_service_timer(unsigned long data) +static void hns_nic_service_timer(struct timer_list *t) { - struct hns_nic_priv *priv = (struct hns_nic_priv *)data; + struct hns_nic_priv *priv = from_timer(priv, t, service_timer); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -2451,8 +2451,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(ndev); - setup_timer(&priv->service_timer, hns_nic_service_timer, - (unsigned long)priv); + timer_setup(&priv->service_timer, hns_nic_service_timer, 0); INIT_WORK(&priv->service_task, hns_nic_service_task); set_bit(NIC_STATE_SERVICE_INITED, &priv->state); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c322b4534148..6e93943c489a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2350,11 +2350,11 @@ static int hclge_get_status(struct hnae3_handle *handle) return hdev->hw.mac.link; } -static void hclge_service_timer(unsigned long data) +static void hclge_service_timer(struct timer_list *t) { - struct hclge_dev *hdev = (struct hclge_dev *)data; - (void)mod_timer(&hdev->service_timer, jiffies + HZ); + struct hclge_dev *hdev = from_timer(hdev, t, service_timer); + mod_timer(&hdev->service_timer, jiffies + HZ); hclge_task_schedule(hdev); } @@ -3204,7 +3204,7 @@ static int hclge_ae_start(struct hnae3_handle *handle) /* mac enable */ hclge_cfg_mac_mode(hdev, true); clear_bit(HCLGE_STATE_DOWN, &hdev->state); - (void)mod_timer(&hdev->service_timer, jiffies + HZ); + mod_timer(&hdev->service_timer, jiffies + HZ); ret = hclge_mac_start_phy(hdev); if (ret) @@ -4436,8 +4436,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_dcb_ops_set(hdev); - setup_timer(&hdev->service_timer, hclge_service_timer, - (unsigned long)hdev); + timer_setup(&hdev->service_timer, hclge_service_timer, 0); INIT_WORK(&hdev->service_task, hclge_service_task); set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state); @@ -4464,7 +4463,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) if (IS_ENABLED(CONFIG_PCI_IOV)) hclge_disable_sriov(hdev); - if (hdev->service_timer.data) + if (hdev->service_timer.function) del_timer_sync(&hdev->service_timer); if (hdev->service_task.func) cancel_work_sync(&hdev->service_task); From 26566eae80512d8a6b52e9d6f880f960893c96b4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:35 -0700 Subject: [PATCH 1126/2177] ethernet/intel: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Switches test of .data field to .function, since .data will be going away. Cc: Jeff Kirsher Cc: intel-wired-lan@lists.osuosl.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e100.c | 6 +++--- drivers/net/ethernet/intel/e1000e/netdev.c | 14 ++++++-------- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 8 ++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++---- .../net/ethernet/intel/i40evf/i40evf_main.c | 8 ++++---- drivers/net/ethernet/intel/igb/igb_main.c | 18 ++++++++---------- drivers/net/ethernet/intel/igbvf/netdev.c | 7 +++---- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 9 ++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++---- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 ++++---- 10 files changed, 43 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 184f11242f56..44b3937f7e81 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1710,9 +1710,9 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex) } } -static void e100_watchdog(unsigned long data) +static void e100_watchdog(struct timer_list *t) { - struct nic *nic = (struct nic *)data; + struct nic *nic = from_timer(nic, t, watchdog); struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; u32 speed; @@ -2920,7 +2920,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - setup_timer(&nic->watchdog, e100_watchdog, (unsigned long)nic); + timer_setup(&nic->watchdog, e100_watchdog, 0); INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index bf8f38f76953..f2f49239b015 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4823,9 +4823,9 @@ static void e1000e_update_phy_task(struct work_struct *work) * Need to wait a few seconds after link up to get diagnostic information from * the phy **/ -static void e1000_update_phy_info(unsigned long data) +static void e1000_update_phy_info(struct timer_list *t) { - struct e1000_adapter *adapter = (struct e1000_adapter *)data; + struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer); if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -5159,9 +5159,9 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) * e1000_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void e1000_watchdog(unsigned long data) +static void e1000_watchdog(struct timer_list *t) { - struct e1000_adapter *adapter = (struct e1000_adapter *)data; + struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); @@ -7267,10 +7267,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - setup_timer(&adapter->watchdog_timer, e1000_watchdog, - (unsigned long)adapter); - setup_timer(&adapter->phy_info_timer, e1000_update_phy_info, - (unsigned long)adapter); + timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0); + timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0); INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 1e9ae3197b17..7f605221a686 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -213,9 +213,10 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface) * fm10k_service_timer - Timer Call-back * @data: pointer to interface cast into an unsigned long **/ -static void fm10k_service_timer(unsigned long data) +static void fm10k_service_timer(struct timer_list *t) { - struct fm10k_intfc *interface = (struct fm10k_intfc *)data; + struct fm10k_intfc *interface = from_timer(interface, t, + service_timer); /* Reset the timer */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); @@ -2315,8 +2316,7 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Initialize service timer and service task late in order to avoid * cleanup issues. */ - setup_timer(&interface->service_timer, &fm10k_service_timer, - (unsigned long)interface); + timer_setup(&interface->service_timer, fm10k_service_timer, 0); INIT_WORK(&interface->service_task, fm10k_service_task); /* Setup the MAC/VLAN queue */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bb31d53c4923..39989147b30b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8800,9 +8800,9 @@ static void i40e_service_task(struct work_struct *work) * i40e_service_timer - timer callback * @data: pointer to PF struct **/ -static void i40e_service_timer(unsigned long data) +static void i40e_service_timer(struct timer_list *t) { - struct i40e_pf *pf = (struct i40e_pf *)data; + struct i40e_pf *pf = from_timer(pf, t, service_timer); mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); @@ -12648,7 +12648,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif /* CONFIG_I40E_DCB */ /* set up periodic task facility */ - setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf); + timer_setup(&pf->service_timer, i40e_service_timer, 0); pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); @@ -12972,7 +12972,7 @@ static void i40e_remove(struct pci_dev *pdev) /* no more scheduling of any task */ set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); - if (pf->service_timer.data) + if (pf->service_timer.function) del_timer_sync(&pf->service_timer); if (pf->service_task.func) cancel_work_sync(&pf->service_task); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 5bcbd46e2f6c..ca2ebdbd24d7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1594,9 +1594,10 @@ err: * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long **/ -static void i40evf_watchdog_timer(unsigned long data) +static void i40evf_watchdog_timer(struct timer_list *t) { - struct i40evf_adapter *adapter = (struct i40evf_adapter *)data; + struct i40evf_adapter *adapter = from_timer(adapter, t, + watchdog_timer); schedule_work(&adapter->watchdog_task); /* timer will be rescheduled in watchdog task */ @@ -2748,8 +2749,7 @@ static void i40evf_init_task(struct work_struct *work) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - setup_timer(&adapter->watchdog_timer, &i40evf_watchdog_timer, - (unsigned long)adapter); + timer_setup(&adapter->watchdog_timer, i40evf_watchdog_timer, 0); mod_timer(&adapter->watchdog_timer, jiffies + 1); adapter->tx_desc_count = I40EVF_DEFAULT_TXD; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 837d9b46a390..58d01a211367 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -133,8 +133,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *); static void igb_clean_tx_ring(struct igb_ring *); static void igb_clean_rx_ring(struct igb_ring *); static void igb_set_rx_mode(struct net_device *); -static void igb_update_phy_info(unsigned long); -static void igb_watchdog(unsigned long); +static void igb_update_phy_info(struct timer_list *); +static void igb_watchdog(struct timer_list *); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); static void igb_get_stats64(struct net_device *dev, @@ -2538,10 +2538,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); } - setup_timer(&adapter->watchdog_timer, igb_watchdog, - (unsigned long) adapter); - setup_timer(&adapter->phy_info_timer, igb_update_phy_info, - (unsigned long) adapter); + timer_setup(&adapter->watchdog_timer, igb_watchdog, 0); + timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -4425,9 +4423,9 @@ static void igb_spoof_check(struct igb_adapter *adapter) /* Need to wait a few seconds after link up to get diagnostic information from * the phy */ -static void igb_update_phy_info(unsigned long data) +static void igb_update_phy_info(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *) data; + struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer); igb_get_phy_info(&adapter->hw); } @@ -4514,9 +4512,9 @@ static void igb_check_lvmmc(struct igb_adapter *adapter) * igb_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void igb_watchdog(unsigned long data) +static void igb_watchdog(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 1ed556911b14..713e8df23744 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1915,9 +1915,9 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter) * igbvf_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void igbvf_watchdog(unsigned long data) +static void igbvf_watchdog(struct timer_list *t) { - struct igbvf_adapter *adapter = (struct igbvf_adapter *)data; + struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); @@ -2878,8 +2878,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->addr_len); } - setup_timer(&adapter->watchdog_timer, &igbvf_watchdog, - (unsigned long)adapter); + timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0); INIT_WORK(&adapter->reset_task, igbvf_reset_task); INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 1e6ec2277d54..2353c383f0a7 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -83,7 +83,7 @@ static void ixgb_setup_rctl(struct ixgb_adapter *adapter); static void ixgb_clean_tx_ring(struct ixgb_adapter *adapter); static void ixgb_clean_rx_ring(struct ixgb_adapter *adapter); static void ixgb_set_multi(struct net_device *netdev); -static void ixgb_watchdog(unsigned long data); +static void ixgb_watchdog(struct timer_list *t); static netdev_tx_t ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev); static int ixgb_change_mtu(struct net_device *netdev, int new_mtu); @@ -508,8 +508,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw); - setup_timer(&adapter->watchdog_timer, ixgb_watchdog, - (unsigned long)adapter); + timer_setup(&adapter->watchdog_timer, ixgb_watchdog, 0); INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task); @@ -1151,9 +1150,9 @@ alloc_failed: **/ static void -ixgb_watchdog(unsigned long data) +ixgb_watchdog(struct timer_list *t) { - struct ixgb_adapter *adapter = (struct ixgb_adapter *)data; + struct ixgb_adapter *adapter = from_timer(adapter, t, watchdog_timer); struct net_device *netdev = adapter->netdev; struct ixgb_desc_ring *txdr = &adapter->tx_ring; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7683c14024aa..3e83edd10e23 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7690,9 +7690,9 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) * ixgbe_service_timer - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void ixgbe_service_timer(unsigned long data) +static void ixgbe_service_timer(struct timer_list *t) { - struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data; + struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer); unsigned long next_event_offset; /* poll faster when waiting for link */ @@ -10508,8 +10508,7 @@ skip_sriov: ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); - setup_timer(&adapter->service_timer, &ixgbe_service_timer, - (unsigned long) adapter); + timer_setup(&adapter->service_timer, ixgbe_service_timer, 0); if (ixgbe_removed(hw->hw_addr)) { err = -EIO; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 032f8ac06357..12d3601b1d57 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2747,9 +2747,10 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) * ixgbevf_service_timer - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void ixgbevf_service_timer(unsigned long data) +static void ixgbevf_service_timer(struct timer_list *t) { - struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data; + struct ixgbevf_adapter *adapter = from_timer(adapter, t, + service_timer); /* Reset the timer */ mod_timer(&adapter->service_timer, (HZ * 2) + jiffies); @@ -4120,8 +4121,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } - setup_timer(&adapter->service_timer, &ixgbevf_service_timer, - (unsigned long)adapter); + timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0); INIT_WORK(&adapter->service_task, ixgbevf_service_task); set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state); From 99767f278ccf74a1857069bb3eec991e572f94cd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:36 -0700 Subject: [PATCH 1127/2177] net/core: Convert sk_timer users to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly for all users of sk_timer. Cc: "David S. Miller" Cc: Ralf Baechle Cc: Andrew Hendry Cc: Eric Dumazet Cc: Paolo Abeni Cc: David Howells Cc: Julia Lawall Cc: linzhang Cc: Ingo Molnar Cc: netdev@vger.kernel.org Cc: linux-hams@vger.kernel.org Cc: linux-x25@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/netrom/af_netrom.c | 6 +++--- net/netrom/nr_timer.c | 47 +++++++++++++++++++++--------------------- net/rose/rose_timer.c | 8 +++---- net/x25/af_x25.c | 8 ++++--- net/x25/x25_timer.c | 17 ++++++++------- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index f577df17bcf2..35656a9e4e44 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2683,7 +2683,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_init_common(sk); sk->sk_send_head = NULL; - setup_timer(&sk->sk_timer, NULL, (unsigned long)sk); + timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ebf16f7f9089..2dec3583c97d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *); /* * Handler for deferred kills. */ -static void nr_destroy_timer(unsigned long data) +static void nr_destroy_timer(struct timer_list *t) { - struct sock *sk=(struct sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); bh_lock_sock(sk); sock_hold(sk); nr_destroy_socket(sk); @@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - sk->sk_timer.function = nr_destroy_timer; + sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; add_timer(&sk->sk_timer); } else diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index f84ce71f1f5f..43569aea0f5e 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -29,23 +29,23 @@ #include #include -static void nr_heartbeat_expiry(unsigned long); -static void nr_t1timer_expiry(unsigned long); -static void nr_t2timer_expiry(unsigned long); -static void nr_t4timer_expiry(unsigned long); -static void nr_idletimer_expiry(unsigned long); +static void nr_heartbeat_expiry(struct timer_list *); +static void nr_t1timer_expiry(struct timer_list *); +static void nr_t2timer_expiry(struct timer_list *); +static void nr_t4timer_expiry(struct timer_list *); +static void nr_idletimer_expiry(struct timer_list *); void nr_init_timers(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk); - setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk); - setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk); - setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk); + timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); + timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); + timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); + timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.function = &nr_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) @@ -112,9 +112,9 @@ int nr_t1timer_running(struct sock *sk) return timer_pending(&nr_sk(sk)->t1timer); } -static void nr_heartbeat_expiry(unsigned long param) +static void nr_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); struct nr_sock *nr = nr_sk(sk); bh_lock_sock(sk); @@ -151,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t2timer_expiry(unsigned long param) +static void nr_t2timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, t2timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); if (nr->condition & NR_COND_ACK_PENDING) { @@ -164,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t4timer_expiry(unsigned long param) +static void nr_t4timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct nr_sock *nr = from_timer(nr, t, t4timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); } -static void nr_idletimer_expiry(unsigned long param) +static void nr_idletimer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, idletimer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); @@ -201,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t1timer_expiry(unsigned long param) +static void nr_t1timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, t1timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); switch (nr->state) { diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index e08201185214..ea613b2a9735 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -28,7 +28,7 @@ #include #include -static void rose_heartbeat_expiry(unsigned long); +static void rose_heartbeat_expiry(struct timer_list *t); static void rose_timer_expiry(struct timer_list *); static void rose_idletimer_expiry(struct timer_list *); @@ -36,7 +36,7 @@ void rose_start_heartbeat(struct sock *sk) { del_timer(&sk->sk_timer); - sk->sk_timer.function = &rose_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; add_timer(&sk->sk_timer); @@ -119,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk) del_timer(&rose_sk(sk)->idletimer); } -static void rose_heartbeat_expiry(unsigned long param) +static void rose_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c590c0bd1393..ea87143314f3 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ -static void x25_destroy_timer(unsigned long data) +static void x25_destroy_timer(struct timer_list *t) { - x25_destroy_socket_from_timer((struct sock *)data); + struct sock *sk = from_timer(sk, t, sk_timer); + + x25_destroy_socket_from_timer(sk); } /* @@ -413,7 +415,7 @@ static void __x25_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; - sk->sk_timer.function = x25_destroy_timer; + sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer; add_timer(&sk->sk_timer); } else { /* drop last reference so sock_put will free */ diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index de5cec41d100..1dfba3c23459 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -26,17 +26,17 @@ #include #include -static void x25_heartbeat_expiry(unsigned long); -static void x25_timer_expiry(unsigned long); +static void x25_heartbeat_expiry(struct timer_list *t); +static void x25_timer_expiry(struct timer_list *t); void x25_init_timers(struct sock *sk) { struct x25_sock *x25 = x25_sk(sk); - setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk); + timer_setup(&x25->timer, x25_timer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.function = &x25_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry; } void x25_start_heartbeat(struct sock *sk) @@ -92,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk) return x25->timer.expires - jiffies; } -static void x25_heartbeat_expiry(unsigned long param) +static void x25_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); bh_lock_sock(sk); if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */ @@ -159,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk) } } -static void x25_timer_expiry(unsigned long param) +static void x25_timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct x25_sock *x25 = from_timer(x25, t, timer); + struct sock *sk = &x25->sk; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */ From ba421793505f91026cccbf9398ff866dd308036d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:37 -0700 Subject: [PATCH 1128/2177] net: atm: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Also drops a redundant initialization that is already set up by DEFINE_TIMER. Cc: "David S. Miller" Cc: Hans Liljestrand Cc: "Reshetova, Elena" Cc: Bhumika Goyal Cc: Johannes Berg Cc: Roopa Prabhu Cc: Augusto Mecking Caringi Cc: Jarod Wilson Cc: Kalle Valo Cc: Thomas Gleixner Cc: Alexey Dobriyan Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/atm/clip.c | 4 ++-- net/atm/lec.c | 19 +++++++++---------- net/atm/mpc.c | 1 - 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/net/atm/clip.c b/net/atm/clip.c index 65f706e4344c..d4f6029d5109 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n) return 1; } -static void idle_timer_check(unsigned long dummy) +static void idle_timer_check(struct timer_list *unused) { write_lock(&arp_tbl.lock); __neigh_for_each_release(&arp_tbl, neigh_check_cb); @@ -887,7 +887,7 @@ static int __init atm_clip_init(void) register_netdevice_notifier(&clip_dev_notifier); register_inetaddr_notifier(&clip_inet_notifier); - setup_timer(&idle_timer, idle_timer_check, 0); + timer_setup(&idle_timer, idle_timer_check, 0); #ifdef CONFIG_PROC_FS { diff --git a/net/atm/lec.c b/net/atm/lec.c index a3d93a1bb133..c976196da3ea 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #define LEC_ARP_REFRESH_INTERVAL (3*HZ) static void lec_arp_check_expire(struct work_struct *work); -static void lec_arp_expire_arp(unsigned long data); +static void lec_arp_expire_arp(struct timer_list *t); /* * Arp table funcs @@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, } ether_addr_copy(to_return->mac_addr, mac_addr); INIT_HLIST_NODE(&to_return->next); - setup_timer(&to_return->timer, lec_arp_expire_arp, - (unsigned long)to_return); + timer_setup(&to_return->timer, lec_arp_expire_arp, 0); to_return->last_used = jiffies; to_return->priv = priv; skb_queue_head_init(&to_return->tx_wait); @@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, } /* Arp sent timer expired */ -static void lec_arp_expire_arp(unsigned long data) +static void lec_arp_expire_arp(struct timer_list *t) { struct lec_arp_table *entry; - entry = (struct lec_arp_table *)data; + entry = from_timer(entry, t, timer); pr_debug("\n"); if (entry->status == ESI_ARP_PENDING) { @@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data) } /* Unknown/unused vcc expire, remove associated entry */ -static void lec_arp_expire_vcc(unsigned long data) +static void lec_arp_expire_vcc(struct timer_list *t) { unsigned long flags; - struct lec_arp_table *to_remove = (struct lec_arp_table *)data; + struct lec_arp_table *to_remove = from_timer(to_remove, t, timer); struct lec_priv *priv = to_remove->priv; del_timer(&to_remove->timer); @@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, else send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); entry->timer.expires = jiffies + (1 * HZ); - entry->timer.function = lec_arp_expire_arp; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp; add_timer(&entry->timer); found = priv->mcast_vcc; } @@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->old_recv_push = old_push; entry->status = ESI_UNKNOWN; entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; hlist_add_head(&entry->next, &priv->lec_no_forward); add_timer(&entry->timer); dump_arp_table(priv); @@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->status = ESI_UNKNOWN; hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; add_timer(&entry->timer); pr_debug("After vcc was added\n"); dump_arp_table(priv); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 5677147209e8..b43d99430eb6 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg) int err; if (mpcs == NULL) { - init_timer(&mpc_timer); mpc_timer_refresh(); /* This lets us now how our LECs are doing */ From cac6a8f9017b532b39ecb9a9b040eadf26895035 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:38 -0700 Subject: [PATCH 1129/2177] net/xen-netback: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Wei Liu Cc: Paul Durrant Cc: xen-devel@lists.xenproject.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netback/netback.c | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5b1d2e8402d9..a46a1e94505d 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -307,7 +307,7 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) return to_xenbus_device(vif->dev->dev.parent); } -void xenvif_tx_credit_callback(unsigned long data); +void xenvif_tx_credit_callback(struct timer_list *t); struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index dcfcb153918c..5cbe0ae55a07 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -520,7 +520,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->credit_bytes = queue->remaining_credit = ~0UL; queue->credit_usec = 0UL; - setup_timer(&queue->credit_timeout, xenvif_tx_credit_callback, 0UL); + timer_setup(&queue->credit_timeout, xenvif_tx_credit_callback, 0); queue->credit_window_start = get_jiffies_64(); queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 5042ff8d449a..a27daa23c9dc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -183,9 +183,9 @@ static void tx_add_credit(struct xenvif_queue *queue) queue->rate_limited = false; } -void xenvif_tx_credit_callback(unsigned long data) +void xenvif_tx_credit_callback(struct timer_list *t) { - struct xenvif_queue *queue = (struct xenvif_queue *)data; + struct xenvif_queue *queue = from_timer(queue, t, credit_timeout); tx_add_credit(queue); xenvif_napi_schedule_or_enable_events(queue); } @@ -700,8 +700,6 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) /* Still too big to send right now? Set a callback. */ if (size > queue->remaining_credit) { - queue->credit_timeout.data = - (unsigned long)queue; mod_timer(&queue->credit_timeout, next_credit); queue->credit_window_start = next_credit; From 7d85b2c8d1ca09040e16a3cd7f019d6a9ddd22d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:39 -0700 Subject: [PATCH 1130/2177] net: fs_enet: Remove unused timer Removes unused timer and its old initialization call. Cc: Pantelis Antoniou Cc: Vitaly Bordug Cc: linuxppc-dev@lists.ozlabs.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 -- drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 753259091b22..7892f2f0c6b5 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1023,8 +1023,6 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->ethtool_ops = &fs_ethtool_ops; - init_timer(&fep->phy_timer_list); - netif_carrier_off(ndev); ndev->features |= NETIF_F_SG; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 5ce516c8a62a..dd306deb7cf1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -137,7 +137,6 @@ struct fs_enet_private { cbd_t __iomem *cur_rx; cbd_t __iomem *cur_tx; int tx_free; - struct timer_list phy_timer_list; const struct phy_info *phy; u32 msg_enable; struct mii_if_info mii_if; From 02ab4fcaf0b50434529cb263082ff95b5158c8ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:40 -0700 Subject: [PATCH 1131/2177] um: net: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. (Note that this timer is actually disabled.) Cc: Jeff Dike Cc: Richard Weinberger Cc: "David S. Miller" Cc: Jarod Wilson Cc: user-mode-linux-devel@lists.sourceforge.net Cc: user-mode-linux-user@lists.sourceforge.net Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- arch/um/drivers/net_kern.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 1669240c7a25..b305f8247909 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -168,7 +168,6 @@ static int uml_net_open(struct net_device *dev) goto out_close; } - lp->tl.data = (unsigned long) &lp->user; netif_start_queue(dev); /* clear buffer - it can happen that the host side of the interface @@ -278,10 +277,11 @@ static const struct ethtool_ops uml_net_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -static void uml_net_user_timer_expire(unsigned long _conn) +static void uml_net_user_timer_expire(struct timer_list *t) { #ifdef undef - struct connection *conn = (struct connection *)_conn; + struct uml_net_private *lp = from_timer(lp, t, tl); + struct connection *conn = &lp->user; dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); do_connect(conn); @@ -458,9 +458,8 @@ static void eth_configure(int n, void *init, char *mac, .add_address = transport->user->add_address, .delete_address = transport->user->delete_address }); - init_timer(&lp->tl); + timer_setup(&lp->tl, uml_net_user_timer_expire, 0); spin_lock_init(&lp->lock); - lp->tl.function = uml_net_user_timer_expire; memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); if ((transport->user->init != NULL) && From 1ab791dc27faef5aee80fe76d73980d2de0bebc8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:41 -0700 Subject: [PATCH 1132/2177] ipv4: timewait: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 5b039159e67a..a4bab81f1462 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -142,9 +142,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -static void tw_timer_handler(unsigned long data) +static void tw_timer_handler(struct timer_list *t) { - struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; + struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer); if (tw->tw_kill) __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); @@ -188,8 +188,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); - setup_pinned_timer(&tw->tw_timer, tw_timer_handler, - (unsigned long)tw); + timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this From ff861c4d64f2df1c7eaabaf2ba8f2f8ebc4b28e3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 17:29:42 -0700 Subject: [PATCH 1133/2177] sunrpc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Trond Myklebust Cc: Anna Schumaker Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: "David S. Miller" Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/sunrpc/sched.c | 8 ++++---- net/sunrpc/svc.c | 2 +- net/sunrpc/svc_xprt.c | 9 ++++----- net/sunrpc/xprt.c | 9 ++++----- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 0cc83839c13c..5dea47eb31bb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); -static void __rpc_queue_timer_fn(unsigned long ptr); +static void __rpc_queue_timer_fn(struct timer_list *t); /* * RPC tasks sit here while waiting for conditions to improve. @@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); queue->qlen = 0; - setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0); INIT_LIST_HEAD(&queue->timer_list.list); rpc_assign_waitqueue_name(queue, qname); } @@ -635,9 +635,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) } EXPORT_SYMBOL_GPL(rpc_wake_up_status); -static void __rpc_queue_timer_fn(unsigned long ptr) +static void __rpc_queue_timer_fn(struct timer_list *t) { - struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer); struct rpc_task *task, *n; unsigned long expires, now, timeo; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index aa04666f929d..33f4ae68426d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_xdrsize = xdrsize; INIT_LIST_HEAD(&serv->sv_tempsocks); INIT_LIST_HEAD(&serv->sv_permsocks); - init_timer(&serv->sv_temptimer); + timer_setup(&serv->sv_temptimer, NULL, 0); spin_lock_init(&serv->sv_lock); __svc_init_bc(serv); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d16a8b423c20..71de77bd4423 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); -static void svc_age_temp_xprts(unsigned long closure); +static void svc_age_temp_xprts(struct timer_list *t); static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close @@ -785,8 +785,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt serv->sv_tmpcnt++; if (serv->sv_temptimer.function == NULL) { /* setup timer to age temp transports */ - setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, - (unsigned long)serv); + serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts; mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } @@ -960,9 +959,9 @@ out: * Timer function to close old temporary transports, using * a mark-and-sweep algorithm. */ -static void svc_age_temp_xprts(unsigned long closure) +static void svc_age_temp_xprts(struct timer_list *t) { - struct svc_serv *serv = (struct svc_serv *)closure; + struct svc_serv *serv = from_timer(serv, t, sv_temptimer); struct svc_xprt *xprt; struct list_head *le, *next; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e741ec2b4d8e..4b00302e1867 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt) } static void -xprt_init_autodisconnect(unsigned long data) +xprt_init_autodisconnect(struct timer_list *t) { - struct rpc_xprt *xprt = (struct rpc_xprt *)data; + struct rpc_xprt *xprt = from_timer(xprt, t, timer); spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv)) @@ -1422,10 +1422,9 @@ found: xprt->idle_timeout = 0; INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) - setup_timer(&xprt->timer, xprt_init_autodisconnect, - (unsigned long)xprt); + timer_setup(&xprt->timer, xprt_init_autodisconnect, 0); else - init_timer(&xprt->timer); + timer_setup(&xprt->timer, NULL, 0); if (strlen(args->servername) > RPC_MAXNETNAMELEN) { xprt_destroy(xprt); From 2dc7c1fef9565c73c5054fd3c134afada09476c1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 10:12:54 -0700 Subject: [PATCH 1134/2177] tools: bpftool: use more common tag format Program tag is usually displayed as string of bytes without any separators (e.g. as "aa5520b1090cfeb6" vs MAC addr-like format bpftool uses currently: "aa:55:20:b1:09:0c:fe:b6"). Make bptfool use the more common format both for displaying the tag and selecting the program by tag. This was pointed out in review but I misunderstood the comment. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- tools/bpf/bpftool/main.h | 3 +-- tools/bpf/bpftool/prog.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 04d12f768f06..3968f0bd37db 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -55,7 +55,7 @@ EXAMPLES **# bpftool prog show** :: - 10: xdp name some_prog tag 00:5a:3d:21:23:62:0c:8b + 10: xdp name some_prog tag 005a3d2123620c8b loaded_at Sep 29/20:11 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8e809b2bb311..844e4ef6db56 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -53,8 +53,7 @@ #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) #define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; }) -#define BPF_TAG_FMT "%02hhx:%02hhx:%02hhx:%02hhx:" \ - "%02hhx:%02hhx:%02hhx:%02hhx" +#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9e2681c83717..d60f5307b6e2 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -224,7 +224,7 @@ static int show_prog(int fd) printf("name %s ", info.name); printf("tag "); - print_hex(info.tag, BPF_TAG_SIZE, ":"); + print_hex(info.tag, BPF_TAG_SIZE, ""); printf("\n"); if (info.load_time) { From 48167c9ce0b91c068430345bf039c7be23fa2f3f Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:05 +0300 Subject: [PATCH 1135/2177] fsl/fman: remove of_node The FMan MAC driver allocates a platform device for the Ethernet driver to probe on. Setting pdev->dev.of_node with the MAC node triggers the MAC driver probing of the new platform device. While this fails quickly and does not affect the functionality of the drivers, it is incorrect and must be removed. This was added to address a report that DSA code using of_find_net_device_by_node() is unable to use the DPAA interfaces. Error message seen before this fix: fsl_mac dpaa-ethernet.0: __devm_request_mem_region(mac) failed fsl_mac: probe of dpaa-ethernet.0 failed with error -16 Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 387eb4a88b72..9a265f862065 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -623,7 +623,6 @@ static struct platform_device *dpaa_eth_add_device(int fman_id, goto no_mem; } - pdev->dev.of_node = node; pdev->dev.parent = priv->dev; set_dma_ops(&pdev->dev, get_dma_ops(priv->dev)); From 3c38ec67867c0a3fe9eeda68a3d2a945d5d09a24 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:06 +0300 Subject: [PATCH 1136/2177] dpaa_eth: move of_phy_connect() to the eth driver Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 48 ++++++++- drivers/net/ethernet/freescale/fman/mac.c | 97 ++++--------------- drivers/net/ethernet/freescale/fman/mac.h | 5 +- 3 files changed, 66 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 42258060f142..7cf61d62ad5e 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2435,6 +2435,48 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv) } } +static void dpaa_adjust_link(struct net_device *net_dev) +{ + struct mac_device *mac_dev; + struct dpaa_priv *priv; + + priv = netdev_priv(net_dev); + mac_dev = priv->mac_dev; + mac_dev->adjust_link(mac_dev); +} + +static int dpaa_phy_init(struct net_device *net_dev) +{ + struct mac_device *mac_dev; + struct phy_device *phy_dev; + struct dpaa_priv *priv; + + priv = netdev_priv(net_dev); + mac_dev = priv->mac_dev; + + phy_dev = of_phy_connect(net_dev, mac_dev->phy_node, + &dpaa_adjust_link, 0, + mac_dev->phy_if); + if (!phy_dev) { + netif_err(priv, ifup, net_dev, "init_phy() failed\n"); + return -ENODEV; + } + + /* Remove any features not supported by the controller */ + phy_dev->supported &= mac_dev->if_support; + + /* Enable the symmetric and asymmetric PAUSE frame advertisements, + * as most of the PHY drivers do not enable them by default. + */ + phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); + phy_dev->advertising = phy_dev->supported; + + mac_dev->phy_dev = phy_dev; + net_dev->phydev = phy_dev; + + return 0; +} + static int dpaa_open(struct net_device *net_dev) { struct mac_device *mac_dev; @@ -2445,12 +2487,8 @@ static int dpaa_open(struct net_device *net_dev) mac_dev = priv->mac_dev; dpaa_eth_napi_enable(priv); - net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev); - if (!net_dev->phydev) { - netif_err(priv, ifup, net_dev, "init_phy() failed\n"); - err = -ENODEV; + if (dpaa_phy_init(net_dev)) goto phy_init_failed; - } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { err = fman_port_enable(mac_dev->port[i]); diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 9a265f862065..a0a3107c1f45 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -57,9 +57,7 @@ struct mac_priv_s { struct device *dev; void __iomem *vaddr; u8 cell_index; - phy_interface_t phy_if; struct fman *fman; - struct device_node *phy_node; struct device_node *internal_phy_node; /* List of multicast addresses */ struct list_head mc_addr_list; @@ -106,7 +104,7 @@ static void set_fman_mac_params(struct mac_device *mac_dev, resource_size(mac_dev->res)); memcpy(¶ms->addr, mac_dev->addr, sizeof(mac_dev->addr)); params->max_speed = priv->max_speed; - params->phy_if = priv->phy_if; + params->phy_if = mac_dev->phy_if; params->basex_if = false; params->mac_id = priv->cell_index; params->fm = (void *)priv->fman; @@ -419,15 +417,12 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause, } EXPORT_SYMBOL(fman_get_pause_cfg); -static void adjust_link_void(struct net_device *net_dev) +static void adjust_link_void(struct mac_device *mac_dev) { } -static void adjust_link_dtsec(struct net_device *net_dev) +static void adjust_link_dtsec(struct mac_device *mac_dev) { - struct device *dev = net_dev->dev.parent; - struct dpaa_eth_data *eth_data = dev->platform_data; - struct mac_device *mac_dev = eth_data->mac_dev; struct phy_device *phy_dev = mac_dev->phy_dev; struct fman_mac *fman_mac; bool rx_pause, tx_pause; @@ -444,14 +439,12 @@ static void adjust_link_dtsec(struct net_device *net_dev) fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause); err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause); if (err < 0) - netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err); + dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n", + err); } -static void adjust_link_memac(struct net_device *net_dev) +static void adjust_link_memac(struct mac_device *mac_dev) { - struct device *dev = net_dev->dev.parent; - struct dpaa_eth_data *eth_data = dev->platform_data; - struct mac_device *mac_dev = eth_data->mac_dev; struct phy_device *phy_dev = mac_dev->phy_dev; struct fman_mac *fman_mac; bool rx_pause, tx_pause; @@ -463,60 +456,12 @@ static void adjust_link_memac(struct net_device *net_dev) fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause); err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause); if (err < 0) - netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err); -} - -/* Initializes driver's PHY state, and attaches to the PHY. - * Returns 0 on success. - */ -static struct phy_device *init_phy(struct net_device *net_dev, - struct mac_device *mac_dev, - void (*adj_lnk)(struct net_device *)) -{ - struct phy_device *phy_dev; - struct mac_priv_s *priv = mac_dev->priv; - - phy_dev = of_phy_connect(net_dev, priv->phy_node, adj_lnk, 0, - priv->phy_if); - if (!phy_dev) { - netdev_err(net_dev, "Could not connect to PHY\n"); - return NULL; - } - - /* Remove any features not supported by the controller */ - phy_dev->supported &= mac_dev->if_support; - /* Enable the symmetric and asymmetric PAUSE frame advertisements, - * as most of the PHY drivers do not enable them by default. - */ - phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); - phy_dev->advertising = phy_dev->supported; - - mac_dev->phy_dev = phy_dev; - - return phy_dev; -} - -static struct phy_device *dtsec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, &adjust_link_dtsec); -} - -static struct phy_device *tgec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, adjust_link_void); -} - -static struct phy_device *memac_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, &adjust_link_memac); + dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n", + err); } static void setup_dtsec(struct mac_device *mac_dev) { - mac_dev->init_phy = dtsec_init_phy; mac_dev->init = dtsec_initialization; mac_dev->set_promisc = dtsec_set_promiscuous; mac_dev->change_addr = dtsec_modify_mac_address; @@ -528,14 +473,13 @@ static void setup_dtsec(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_dtsec; mac_dev->priv->enable = dtsec_enable; mac_dev->priv->disable = dtsec_disable; } static void setup_tgec(struct mac_device *mac_dev) { - mac_dev->init_phy = tgec_init_phy; mac_dev->init = tgec_initialization; mac_dev->set_promisc = tgec_set_promiscuous; mac_dev->change_addr = tgec_modify_mac_address; @@ -547,14 +491,13 @@ static void setup_tgec(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_void; mac_dev->priv->enable = tgec_enable; mac_dev->priv->disable = tgec_disable; } static void setup_memac(struct mac_device *mac_dev) { - mac_dev->init_phy = memac_init_phy; mac_dev->init = memac_initialization; mac_dev->set_promisc = memac_set_promiscuous; mac_dev->change_addr = memac_modify_mac_address; @@ -566,7 +509,7 @@ static void setup_memac(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_memac; mac_dev->priv->enable = memac_enable; mac_dev->priv->disable = memac_disable; } @@ -850,13 +793,13 @@ static int mac_probe(struct platform_device *_of_dev) mac_node); phy_if = PHY_INTERFACE_MODE_SGMII; } - priv->phy_if = phy_if; + mac_dev->phy_if = phy_if; - priv->speed = phy2speed[priv->phy_if]; + priv->speed = phy2speed[mac_dev->phy_if]; priv->max_speed = priv->speed; mac_dev->if_support = DTSEC_SUPPORTED; /* We don't support half-duplex in SGMII mode */ - if (priv->phy_if == PHY_INTERFACE_MODE_SGMII) + if (mac_dev->phy_if == PHY_INTERFACE_MODE_SGMII) mac_dev->if_support &= ~(SUPPORTED_10baseT_Half | SUPPORTED_100baseT_Half); @@ -865,12 +808,12 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev->if_support |= SUPPORTED_1000baseT_Full; /* The 10G interface only supports one mode */ - if (priv->phy_if == PHY_INTERFACE_MODE_XGMII) + if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) mac_dev->if_support = SUPPORTED_10000baseT_Full; /* Get the rest of the PHY information */ - priv->phy_node = of_parse_phandle(mac_node, "phy-handle", 0); - if (!priv->phy_node && of_phy_is_fixed_link(mac_node)) { + mac_dev->phy_node = of_parse_phandle(mac_node, "phy-handle", 0); + if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) { struct phy_device *phy; err = of_phy_register_fixed_link(mac_node); @@ -884,8 +827,8 @@ static int mac_probe(struct platform_device *_of_dev) goto _return_dev_set_drvdata; } - priv->phy_node = of_node_get(mac_node); - phy = of_phy_find_device(priv->phy_node); + mac_dev->phy_node = of_node_get(mac_node); + phy = of_phy_find_device(mac_dev->phy_node); if (!phy) { err = -EINVAL; goto _return_dev_set_drvdata; @@ -903,7 +846,7 @@ static int mac_probe(struct platform_device *_of_dev) err = mac_dev->init(mac_dev); if (err < 0) { dev_err(dev, "mac_dev->init() = %d\n", err); - of_node_put(priv->phy_node); + of_node_put(mac_dev->phy_node); goto _return_dev_set_drvdata; } diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index d7313f0c5135..1ca85a18ab38 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -50,6 +50,8 @@ struct mac_device { struct fman_port *port[2]; u32 if_support; struct phy_device *phy_dev; + phy_interface_t phy_if; + struct device_node *phy_node; bool autoneg_pause; bool rx_pause_req; @@ -58,11 +60,10 @@ struct mac_device { bool tx_pause_active; bool promisc; - struct phy_device *(*init_phy)(struct net_device *net_dev, - struct mac_device *mac_dev); int (*init)(struct mac_device *mac_dev); int (*start)(struct mac_device *mac_dev); int (*stop)(struct mac_device *mac_dev); + void (*adjust_link)(struct mac_device *mac_dev); int (*set_promisc)(struct fman_mac *mac_dev, bool enable); int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr); int (*set_multi)(struct net_device *net_dev, From c6e26ea8c893687a83c9feda7ab4f89205e19726 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:07 +0300 Subject: [PATCH 1137/2177] dpaa_eth: change device used Change device used for DMA mapping to the MAC device that is an of_device, with proper DMA ops. Using this device for the netdevice should also address the issue with DSA scenarios that need the netdevice to be backed by an of_device. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 42 ++++++------------- drivers/net/ethernet/freescale/fman/mac.c | 37 +++++++--------- drivers/net/ethernet/freescale/fman/mac.h | 1 - 3 files changed, 27 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 7cf61d62ad5e..823aa6597e56 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -385,34 +385,19 @@ out: static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev) { - struct platform_device *of_dev; struct dpaa_eth_data *eth_data; - struct device *dpaa_dev, *dev; - struct device_node *mac_node; + struct device *dpaa_dev; struct mac_device *mac_dev; dpaa_dev = &pdev->dev; eth_data = dpaa_dev->platform_data; - if (!eth_data) + if (!eth_data) { + dev_err(dpaa_dev, "eth_data missing\n"); return ERR_PTR(-ENODEV); - - mac_node = eth_data->mac_node; - - of_dev = of_find_device_by_node(mac_node); - if (!of_dev) { - dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n", - mac_node); - of_node_put(mac_node); - return ERR_PTR(-EINVAL); } - of_node_put(mac_node); - - dev = &of_dev->dev; - - mac_dev = dev_get_drvdata(dev); + mac_dev = eth_data->mac_dev; if (!mac_dev) { - dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n", - dev_name(dev)); + dev_err(dpaa_dev, "mac_dev missing\n"); return ERR_PTR(-EINVAL); } @@ -2696,7 +2681,13 @@ static int dpaa_eth_probe(struct platform_device *pdev) int err = 0, i, channel; struct device *dev; - dev = &pdev->dev; + /* device used for DMA mapping */ + dev = pdev->dev.parent; + err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (err) { + dev_err(dev, "dma_coerce_mask_and_coherent() failed\n"); + return err; + } /* Allocate this early, so we can store relevant information in * the private area @@ -2738,14 +2729,6 @@ static int dpaa_eth_probe(struct platform_device *pdev) priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */ priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */ - /* device used for DMA mapping */ - set_dma_ops(dev, get_dma_ops(&pdev->dev)); - err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40)); - if (err) { - dev_err(dev, "dma_coerce_mask_and_coherent() failed\n"); - goto dev_mask_failed; - } - /* bp init */ for (i = 0; i < DPAA_BPS_NUM; i++) { int err; @@ -2879,7 +2862,6 @@ get_channel_failed: dpaa_bps_free(priv); bp_create_failed: fq_probe_failed: -dev_mask_failed: mac_probe_failed: dev_set_drvdata(dev, NULL); free_netdev(net_dev); diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index a0a3107c1f45..1d6da1ea7bfb 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -542,8 +542,7 @@ static const u16 phy2speed[] = { }; static struct platform_device *dpaa_eth_add_device(int fman_id, - struct mac_device *mac_dev, - struct device_node *node) + struct mac_device *mac_dev) { struct platform_device *pdev; struct dpaa_eth_data data; @@ -556,10 +555,8 @@ static struct platform_device *dpaa_eth_add_device(int fman_id, data.mac_dev = mac_dev; data.mac_hw_id = priv->cell_index; data.fman_hw_id = fman_id; - data.mac_node = node; mutex_lock(ð_lock); - pdev = platform_device_alloc("dpaa-ethernet", dpaa_eth_dev_cnt); if (!pdev) { ret = -ENOMEM; @@ -648,9 +645,6 @@ static int mac_probe(struct platform_device *_of_dev) goto _return; } - /* Register mac_dev */ - dev_set_drvdata(dev, mac_dev); - INIT_LIST_HEAD(&priv->mc_addr_list); /* Get the FM node */ @@ -659,7 +653,7 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "of_get_parent(%pOF) failed\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } of_dev = of_find_device_by_node(dev_node); @@ -693,7 +687,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err < 0) { dev_err(dev, "of_address_to_resource(%pOF) = %d\n", mac_node, err); - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } mac_dev->res = __devm_request_region(dev, @@ -703,7 +697,7 @@ static int mac_probe(struct platform_device *_of_dev) if (!mac_dev->res) { dev_err(dev, "__devm_request_mem_region(mac) failed\n"); err = -EBUSY; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } priv->vaddr = devm_ioremap(dev, mac_dev->res->start, @@ -711,7 +705,7 @@ static int mac_probe(struct platform_device *_of_dev) if (!priv->vaddr) { dev_err(dev, "devm_ioremap() failed\n"); err = -EIO; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } if (!of_device_is_available(mac_node)) { @@ -728,7 +722,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err) { dev_err(dev, "failed to read cell-index for %pOF\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } priv->cell_index = (u8)val; @@ -737,7 +731,7 @@ static int mac_probe(struct platform_device *_of_dev) if (!mac_addr) { dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr)); @@ -747,14 +741,14 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n", mac_node); err = nph; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } if (nph != ARRAY_SIZE(mac_dev->port)) { dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { @@ -818,20 +812,20 @@ static int mac_probe(struct platform_device *_of_dev) err = of_phy_register_fixed_link(mac_node); if (err) - goto _return_dev_set_drvdata; + goto _return_of_get_parent; priv->fixed_link = kzalloc(sizeof(*priv->fixed_link), GFP_KERNEL); if (!priv->fixed_link) { err = -ENOMEM; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } mac_dev->phy_node = of_node_get(mac_node); phy = of_phy_find_device(mac_dev->phy_node); if (!phy) { err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } priv->fixed_link->link = phy->link; @@ -847,7 +841,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err < 0) { dev_err(dev, "mac_dev->init() = %d\n", err); of_node_put(mac_dev->phy_node); - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } /* pause frame autonegotiation enabled */ @@ -868,7 +862,7 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev->addr[0], mac_dev->addr[1], mac_dev->addr[2], mac_dev->addr[3], mac_dev->addr[4], mac_dev->addr[5]); - priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev, mac_node); + priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev); if (IS_ERR(priv->eth_dev)) { dev_err(dev, "failed to add Ethernet platform device for MAC %d\n", priv->cell_index); @@ -879,9 +873,8 @@ static int mac_probe(struct platform_device *_of_dev) _return_of_node_put: of_node_put(dev_node); -_return_dev_set_drvdata: +_return_of_get_parent: kfree(priv->fixed_link); - dev_set_drvdata(dev, NULL); _return: return err; } diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index 1ca85a18ab38..eefb3357e304 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -83,7 +83,6 @@ struct mac_device { }; struct dpaa_eth_data { - struct device_node *mac_node; struct mac_device *mac_dev; int mac_hw_id; int fman_hw_id; From 8b9b5a2c27e1a7292f1e97e0eb19b0ae603dfa68 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:08 +0300 Subject: [PATCH 1138/2177] dpaa_eth: cleanup dpaa_eth_probe() error paths Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 53 +++++++------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 823aa6597e56..c6b97a1b6e43 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2695,7 +2695,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM); if (!net_dev) { dev_err(dev, "alloc_etherdev_mq() failed\n"); - goto alloc_etherdev_mq_failed; + return -ENOMEM; } /* Do this here, so we can be verbose early */ @@ -2711,7 +2711,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (IS_ERR(mac_dev)) { dev_err(dev, "dpaa_mac_dev_get() failed\n"); err = PTR_ERR(mac_dev); - goto mac_probe_failed; + goto free_netdev; } /* If fsl_fm_max_frm is set to a higher value than the all-common 1500, @@ -2735,7 +2735,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) dpaa_bps[i] = dpaa_bp_alloc(dev); if (IS_ERR(dpaa_bps[i])) - return PTR_ERR(dpaa_bps[i]); + goto free_dpaa_bps; /* the raw size of the buffers used for reception */ dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM); /* avoid runtime computations by keeping the usable size here */ @@ -2743,11 +2743,8 @@ static int dpaa_eth_probe(struct platform_device *pdev) dpaa_bps[i]->dev = dev; err = dpaa_bp_alloc_pool(dpaa_bps[i]); - if (err < 0) { - dpaa_bps_free(priv); - priv->dpaa_bps[i] = NULL; - goto bp_create_failed; - } + if (err < 0) + goto free_dpaa_bps; priv->dpaa_bps[i] = dpaa_bps[i]; } @@ -2758,7 +2755,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs); if (err < 0) { dev_err(dev, "dpaa_alloc_all_fqs() failed\n"); - goto fq_probe_failed; + goto free_dpaa_bps; } priv->mac_dev = mac_dev; @@ -2767,7 +2764,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (channel < 0) { dev_err(dev, "dpaa_get_channel() failed\n"); err = channel; - goto get_channel_failed; + goto free_dpaa_bps; } priv->channel = (u16)channel; @@ -2787,20 +2784,20 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_eth_cgr_init(priv); if (err < 0) { dev_err(dev, "Error initializing CGR\n"); - goto tx_cgr_init_failed; + goto free_dpaa_bps; } err = dpaa_ingress_cgr_init(priv); if (err < 0) { dev_err(dev, "Error initializing ingress CGR\n"); - goto rx_cgr_init_failed; + goto delete_egress_cgr; } /* Add the FQs to the interface, and make them active */ list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) { err = dpaa_fq_init(dpaa_fq, false); if (err < 0) - goto fq_alloc_failed; + goto free_dpaa_fqs; } priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]); @@ -2810,7 +2807,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs, &priv->buf_layout[0], dev); if (err) - goto init_ports_failed; + goto free_dpaa_fqs; /* Rx traffic distribution based on keygen hashing defaults to on */ priv->keygen_in_use = true; @@ -2819,7 +2816,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (!priv->percpu_priv) { dev_err(dev, "devm_alloc_percpu() failed\n"); err = -ENOMEM; - goto alloc_percpu_failed; + goto free_dpaa_fqs; } for_each_possible_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); @@ -2832,11 +2829,11 @@ static int dpaa_eth_probe(struct platform_device *pdev) /* Initialize NAPI */ err = dpaa_napi_add(net_dev); if (err < 0) - goto napi_add_failed; + goto delete_dpaa_napi; err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout); if (err < 0) - goto netdev_init_failed; + goto delete_dpaa_napi; dpaa_eth_sysfs_init(&net_dev->dev); @@ -2845,31 +2842,21 @@ static int dpaa_eth_probe(struct platform_device *pdev) return 0; -netdev_init_failed: -napi_add_failed: +delete_dpaa_napi: dpaa_napi_del(net_dev); -alloc_percpu_failed: -init_ports_failed: +free_dpaa_fqs: dpaa_fq_free(dev, &priv->dpaa_fq_list); -fq_alloc_failed: qman_delete_cgr_safe(&priv->ingress_cgr); qman_release_cgrid(priv->ingress_cgr.cgrid); -rx_cgr_init_failed: +delete_egress_cgr: qman_delete_cgr_safe(&priv->cgr_data.cgr); qman_release_cgrid(priv->cgr_data.cgr.cgrid); -tx_cgr_init_failed: -get_channel_failed: +free_dpaa_bps: dpaa_bps_free(priv); -bp_create_failed: -fq_probe_failed: -mac_probe_failed: +free_netdev: dev_set_drvdata(dev, NULL); free_netdev(net_dev); -alloc_etherdev_mq_failed: - for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) { - if (atomic_read(&dpaa_bps[i]->refs) == 0) - devm_kfree(dev, dpaa_bps[i]); - } + return err; } From c69fde72bf03d7aa2c8d8ab158cc55835a2c0026 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:09 +0300 Subject: [PATCH 1139/2177] fsl/fman: add dpaa in module names This change just renames the FMan driver modules, using a common prefix for the DPAA FMan and DPAA Ethernet drivers. Besides making the names more aligned, this allows writing udev rules that match on either driver name, if needed, using the fsl_dpaa_* prefix. The change of netdev dev required for the DSA probing makes the previous rules written using this prefix fail, this change makes them work again, ensuring backwards compatibility for their users. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index 2c38119b172c..4ae524a352a2 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -1,9 +1,9 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman -obj-$(CONFIG_FSL_FMAN) += fsl_fman.o -obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o -obj-$(CONFIG_FSL_FMAN) += fsl_mac.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman_port.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_mac.o -fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o -fsl_fman_port-objs := fman_port.o -fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o +fsl_dpaa_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o +fsl_dpaa_fman_port-objs := fman_port.o +fsl_dpaa_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o From f1851a69b1f4008a7d29c6e446b3da13ed13b7da Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Oct 2017 21:36:10 +0300 Subject: [PATCH 1140/2177] dpaa_eth: remove obsolete comment Comment is no longer valid for a long time now. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index c6b97a1b6e43..a8d0be824149 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2449,10 +2449,6 @@ static int dpaa_phy_init(struct net_device *net_dev) /* Remove any features not supported by the controller */ phy_dev->supported &= mac_dev->if_support; - - /* Enable the symmetric and asymmetric PAUSE frame advertisements, - * as most of the PHY drivers do not enable them by default. - */ phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); phy_dev->advertising = phy_dev->supported; From 86e58cce939cefd806105922e4a0840e9a04a88f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2017 15:11:22 -0500 Subject: [PATCH 1141/2177] decnet: af_decnet: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d4c9a8bbad3e..518cea17b811 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -630,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk) goto disc_reject; case DN_RUN: scp->state = DN_DI; + /* fall through */ case DN_DI: case DN_DR: disc_reject: dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); + /* fall through */ case DN_NC: case DN_NR: case DN_RJ: @@ -647,6 +649,7 @@ disc_reject: break; default: printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); + /* fall through */ case DN_O: dn_stop_slow_timer(sk); From fcfd6dfab97006d44c7db5d6c908eac383af6649 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2017 15:48:55 -0500 Subject: [PATCH 1142/2177] ipv4: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in some cases I placed the "fall through" comment on its own line, which is what GCC is expecting to find. Addresses-Coverity-ID: 115108 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 ++- net/ipv4/arp.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/ipmr.c | 1 + net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 3 ++- net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_ipv4.c | 3 ++- 7 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 43a1bbed7a42..ce4aa827be05 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -827,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for POLLHUP, even on eg. unconnected UDP sockets -- RR */ + /* fall through */ default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -840,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* Fall through */ + /* fall through */ case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7c45b8896709..a8d7c5a9fb05 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; + /* fall through */ case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6d9b072d903b..e1e2ec0525e6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1522,6 +1522,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ + /* fall through */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b3ee01b0551b..40a43ad294cb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1528,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; + /* fall through */ case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index a0f37b208268..0443ca4120b0 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, else return NF_ACCEPT; } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + /* Only ICMPs can be IP_CT_IS_REPLY: */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d0682ce2a5d6..b2390bfdc68f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2885,6 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ + /* fall through */ default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -6044,6 +6045,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; + /* fall through */ case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: /* RFC 793 says to queue data in these states, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5418ecf03b78..ecee4ddb24c5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1779,8 +1779,9 @@ do_time_wait: refcounted = false; goto process; } - /* Fall through to ACK */ } + /* to ACK */ + /* fall through */ case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; From fb6ff75e18937a20dbec1eb47b5f893f38eabae4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 16 Oct 2017 14:24:02 -0700 Subject: [PATCH 1143/2177] tcp: Use pI6c in tcp tracepoint The compact form for IPv6 addresses is more user friendly than the full version. For example: compact: 2001:db8:1::1 full: 2001:0db8:0001:0000:0000:0000:0000:0004i Update the tcp tracepoint to show the compact form. Signed-off-by: David Ahern Acked-by: Cong Wang Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 3d1cbd072b7e..1ffab6d96e94 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -57,7 +57,7 @@ TRACE_EVENT(tcp_retransmit_skb, } ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6 daddrv6=%pI6", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); From 275757e6bae15a8621130907a78096afd1e15d2c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2017 16:36:52 -0500 Subject: [PATCH 1144/2177] ipv6: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in some cases I placed the "fall through" comment on its own line, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 1 + net/ipv6/exthdrs.c | 1 + net/ipv6/icmp.c | 6 ++---- net/ipv6/ip6_fib.c | 4 ++++ net/ipv6/ip6_tunnel.c | 1 + net/ipv6/ip6mr.c | 1 + net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 3 ++- net/ipv6/raw.c | 4 ++++ net/ipv6/tcp_ipv6.c | 3 ++- net/ipv6/xfrm6_policy.c | 1 + 10 files changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7802b72196f3..37bb33fbc742 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) case NEXTHDR_DEST: if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); + /* fall through */ case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { net_dbg_ratelimited("overrun %sopts\n", diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 95516138e861..7835dea930b4 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -89,6 +89,7 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) break; + /* fall through */ case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); return false; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4e52d52a6752..6ae5dd3f4d0d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -864,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb) goto discard_it; hdr = icmp6_hdr(skb); - /* - * Drop through to notify - */ - + /* to notify */ + /* fall through */ case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 548af48212fc..1ada9672d198 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1780,6 +1780,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } w->state = FWS_L; #endif + /* fall through */ case FWS_L: left = rcu_dereference_protected(fn->left, 1); if (left) { @@ -1788,6 +1789,7 @@ static int fib6_walk_continue(struct fib6_walker *w) continue; } w->state = FWS_R; + /* fall through */ case FWS_R: right = rcu_dereference_protected(fn->right, 1); if (right) { @@ -1797,6 +1799,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } w->state = FWS_C; w->leaf = rcu_dereference_protected(fn->leaf, 1); + /* fall through */ case FWS_C: if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; @@ -1815,6 +1818,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } skip: w->state = FWS_U; + /* fall through */ case FWS_U: if (fn == w->root) return 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 825548680b1e..4212879ff35e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -593,6 +593,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case NDISC_REDIRECT: rel_type = ICMP_REDIRECT; rel_code = ICMP_REDIR_HOST; + /* fall through */ default: return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f5500f5444e9..59fad81e5f7a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1722,6 +1722,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ADD_MFC: case MRT6_DEL_MFC: parent = -1; + /* fall through */ case MRT6_ADD_MFC_PROXY: case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 46d6dba50698..1d2fb9267d6f 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, else return NF_ACCEPT; } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + /* Only ICMPs can be IP_CT_IS_REPLY: */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e4462b0ff801..761a473a07c5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1055,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -1077,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1138,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1160,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 64d94afa427f..ae83615b7f6d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1577,8 +1577,9 @@ do_time_wait: refcounted = false; goto process; } - /* Fall through to ACK */ } + /* to ACK */ + /* fall through */ case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); break; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 11d1314ab6c5..4ed9f8cc3b6a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -152,6 +152,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) switch (nexthdr) { case NEXTHDR_FRAGMENT: onlyproto = 1; + /* fall through */ case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: From d4f4da3e13ae7a405fa1da11ff4070588e700445 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2017 16:53:16 -0500 Subject: [PATCH 1145/2177] net: ipx: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index ac598ec90589..d21a9d128d3e 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; + /* fall through */ case SIOCGIFADDR: rc = ipxitf_ioctl(cmd, argp); break; From 386fd5da401dc6c4b0ab6a54d333609876b699fe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 16 Oct 2017 15:32:07 -0700 Subject: [PATCH 1146/2177] tcp: Check daddr_cache before use in tracepoint Running perf in one window to capture tcp_retransmit_skb tracepoint: $ perf record -e tcp:tcp_retransmit_skb -a And causing a retransmission on an active TCP session (e.g., dropping packets in the receiver, changing MTU on the interface to 500 and back to 1500) triggers a panic: [ 58.543144] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 58.545300] IP: perf_trace_tcp_retransmit_skb+0xd0/0x145 [ 58.546770] PGD 0 P4D 0 [ 58.547472] Oops: 0000 [#1] SMP [ 58.548328] Modules linked in: vrf [ 58.549262] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-rc4+ #26 [ 58.551004] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 58.554560] task: ffffffff81a0e540 task.stack: ffffffff81a00000 [ 58.555817] RIP: 0010:perf_trace_tcp_retransmit_skb+0xd0/0x145 [ 58.557137] RSP: 0018:ffff88003fc03d68 EFLAGS: 00010282 [ 58.558292] RAX: 0000000000000000 RBX: ffffe8ffffc0ec80 RCX: ffff880038543098 [ 58.559850] RDX: 0400000000000000 RSI: ffff88003fc03d70 RDI: ffff88003fc14b68 [ 58.561099] RBP: ffff88003fc03da8 R08: 0000000000000000 R09: ffffea0000d3224a [ 58.562005] R10: ffff88003fc03db8 R11: 0000000000000010 R12: ffff8800385428c0 [ 58.562930] R13: ffffe8ffffc0e478 R14: ffffffff81a93a40 R15: ffff88003d4f0c00 [ 58.563845] FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 58.564873] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 58.565613] CR2: 0000000000000008 CR3: 000000003d68f004 CR4: 00000000000606f0 [ 58.566538] Call Trace: [ 58.566865] [ 58.567140] __tcp_retransmit_skb+0x4ab/0x4c6 [ 58.567704] ? tcp_set_ca_state+0x22/0x3f [ 58.568231] tcp_retransmit_skb+0x14/0xa3 [ 58.568754] tcp_retransmit_timer+0x472/0x5e3 [ 58.569324] ? tcp_write_timer_handler+0x1e9/0x1e9 [ 58.569946] tcp_write_timer_handler+0x95/0x1e9 [ 58.570548] tcp_write_timer+0x2a/0x58 Check that daddr_cache is non-NULL before de-referencing. Fixes: e086101b150a ("tcp: add a tracepoint for tcp retransmission") Signed-off-by: David Ahern Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1ffab6d96e94..f51c130f1e0f 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -27,7 +27,6 @@ TRACE_EVENT(tcp_retransmit_skb, ), TP_fast_assign( - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; @@ -44,11 +43,12 @@ TRACE_EVENT(tcp_retransmit_skb, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; - if (np) { + /* IPv6 socket ? */ + if (inet6_sk(sk)) { pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = np->saddr; + *pin6 = sk->sk_v6_rcv_saddr; pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = *(np->daddr_cache); + *pin6 = sk->sk_v6_daddr; } else { pin6 = (struct in6_addr *)__entry->saddr_v6; ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); From 7de16e3a35578f4f5accc6f5f23970310483d0a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:53 -0700 Subject: [PATCH 1147/2177] bpf: split verifier and program ops struct bpf_verifier_ops contains both verifier ops and operations used later during program's lifetime (test_run). Split the runtime ops into a different structure. BPF_PROG_TYPE() will now append ## _prog_ops or ## _verifier_ops to the names. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 15 ++++++++----- include/linux/bpf_types.h | 28 ++++++++++++------------ kernel/bpf/syscall.c | 16 +++++++++++--- kernel/bpf/verifier.c | 12 +++++------ kernel/trace/bpf_trace.c | 15 ++++++++++--- net/core/filter.c | 45 +++++++++++++++++++++++++++++++-------- 6 files changed, 91 insertions(+), 40 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d4dd844828a..e1fba5504ca5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,6 +157,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +struct bpf_prog_ops { + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -172,8 +177,6 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); - int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); }; struct bpf_prog_aux { @@ -184,7 +187,8 @@ struct bpf_prog_aux { u32 id; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; - const struct bpf_verifier_ops *ops; + const struct bpf_prog_ops *ops; + const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; @@ -279,8 +283,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -#define BPF_PROG_TYPE(_id, _ops) \ - extern const struct bpf_verifier_ops _ops; +#define BPF_PROG_TYPE(_id, _name) \ + extern const struct bpf_prog_ops _name ## _prog_ops; \ + extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; #include diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 814c1081a4a9..36418ad43245 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -1,22 +1,22 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 54fba06942f5..444902b5a30d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -739,9 +739,18 @@ err_put: return err; } -static const struct bpf_verifier_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _ops) \ - [_id] = &_ops, +static const struct bpf_prog_ops * const bpf_prog_types[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _prog_ops, +#define BPF_MAP_TYPE(_id, _ops) +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; + +static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) #include #undef BPF_PROG_TYPE @@ -754,6 +763,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) return -EINVAL; prog->aux->ops = bpf_prog_types[type]; + prog->aux->vops = bpf_verifier_ops[type]; prog->type = type; return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4d5136725a2..38e24d69fc95 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -856,8 +856,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, *reg_type = info.reg_type; return 0; } - } else if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + } else if (env->prog->aux->vops->is_valid_access && + env->prog->aux->vops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower @@ -1565,8 +1565,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } - if (env->prog->aux->ops->get_func_proto) - fn = env->prog->aux->ops->get_func_proto(func_id); + if (env->prog->aux->vops->get_func_proto) + fn = env->prog->aux->vops->get_func_proto(func_id); if (!fn) { verbose(env, "unknown func %s#%d\n", func_id_name(func_id), @@ -4035,7 +4035,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { - const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const struct bpf_verifier_ops *ops = env->prog->aux->vops; int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; @@ -4236,7 +4236,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn = new_prog->insnsi + i + delta; } patch_call_imm: - fn = prog->aux->ops->get_func_proto(insn->imm); + fn = prog->aux->vops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 04ea5314f2bc..3126da2f468a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -561,11 +561,14 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -const struct bpf_verifier_ops kprobe_prog_ops = { +const struct bpf_verifier_ops kprobe_verifier_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; +const struct bpf_prog_ops kprobe_prog_ops = { +}; + BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, u64, flags, void *, data, u64, size) { @@ -667,11 +670,14 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type return true; } -const struct bpf_verifier_ops tracepoint_prog_ops = { +const struct bpf_verifier_ops tracepoint_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; +const struct bpf_prog_ops tracepoint_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -727,8 +733,11 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops perf_event_prog_ops = { +const struct bpf_verifier_ops perf_event_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; + +const struct bpf_prog_ops perf_event_prog_ops = { +}; diff --git a/net/core/filter.c b/net/core/filter.c index 4d88e0665c41..1dd3034f846f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4395,68 +4395,95 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops sk_filter_prog_ops = { +const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -const struct bpf_verifier_ops tc_cls_act_prog_ops = { +const struct bpf_prog_ops sk_filter_prog_ops = { +}; + +const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops xdp_prog_ops = { +const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, +}; + +const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; -const struct bpf_verifier_ops cg_skb_prog_ops = { +const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_inout_prog_ops = { +const struct bpf_verifier_ops lwt_inout_verifier_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops lwt_inout_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_xmit_prog_ops = { +const struct bpf_verifier_ops lwt_xmit_verifier_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops lwt_xmit_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops cg_sock_prog_ops = { +const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -const struct bpf_verifier_ops sock_ops_prog_ops = { +const struct bpf_prog_ops cg_sock_prog_ops = { +}; + +const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, .convert_ctx_access = sock_ops_convert_ctx_access, }; -const struct bpf_verifier_ops sk_skb_prog_ops = { +const struct bpf_prog_ops sock_ops_prog_ops = { +}; + +const struct bpf_verifier_ops sk_skb_verifier_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; +const struct bpf_prog_ops sk_skb_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; From 00176a34d9e27ab1e77db75fe13abc005cffe0ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:54 -0700 Subject: [PATCH 1148/2177] bpf: remove the verifier ops from program structure Since the verifier ops don't have to be associated with the program for its entire lifetime we can move it to verifier's struct bpf_verifier_env. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 - include/linux/bpf_verifier.h | 1 + kernel/bpf/syscall.c | 10 ---------- kernel/bpf/verifier.c | 23 +++++++++++++++++------ 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1fba5504ca5..cf91977e8719 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -188,7 +188,6 @@ struct bpf_prog_aux { struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_prog_ops *ops; - const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f00ef751c1c5..feeaea93d959 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -141,6 +141,7 @@ struct bpf_ext_analyzer_ops { */ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ + const struct bpf_verifier_ops *ops; struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 444902b5a30d..0e893cac6795 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -748,22 +748,12 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = { #undef BPF_MAP_TYPE }; -static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { -#define BPF_PROG_TYPE(_id, _name) \ - [_id] = & _name ## _verifier_ops, -#define BPF_MAP_TYPE(_id, _ops) -#include -#undef BPF_PROG_TYPE -#undef BPF_MAP_TYPE -}; - static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) { if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) return -EINVAL; prog->aux->ops = bpf_prog_types[type]; - prog->aux->vops = bpf_verifier_ops[type]; prog->type = type; return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 38e24d69fc95..3b6e2c550e96 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23,6 +23,15 @@ #include "disasm.h" +static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _verifier_ops, +#define BPF_MAP_TYPE(_id, _ops) +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; + /* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. * All paths of conditional branches are analyzed until 'bpf_exit' insn. @@ -856,8 +865,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, *reg_type = info.reg_type; return 0; } - } else if (env->prog->aux->vops->is_valid_access && - env->prog->aux->vops->is_valid_access(off, size, t, &info)) { + } else if (env->ops->is_valid_access && + env->ops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower @@ -1565,8 +1574,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } - if (env->prog->aux->vops->get_func_proto) - fn = env->prog->aux->vops->get_func_proto(func_id); + if (env->ops->get_func_proto) + fn = env->ops->get_func_proto(func_id); if (!fn) { verbose(env, "unknown func %s#%d\n", func_id_name(func_id), @@ -4035,7 +4044,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { - const struct bpf_verifier_ops *ops = env->prog->aux->vops; + const struct bpf_verifier_ops *ops = env->ops; int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; @@ -4236,7 +4245,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn = new_prog->insnsi + i + delta; } patch_call_imm: - fn = prog->aux->vops->get_func_proto(insn->imm); + fn = env->ops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions */ @@ -4294,6 +4303,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!env->insn_aux_data) goto err_free_env; env->prog = *prog; + env->ops = bpf_verifier_ops[env->prog->type]; /* grab the mutex to protect few globals used by verifier */ mutex_lock(&bpf_verifier_lock); @@ -4406,6 +4416,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, if (!env->insn_aux_data) goto err_free_env; env->prog = prog; + env->ops = bpf_verifier_ops[env->prog->type]; env->analyzer_ops = ops; env->analyzer_priv = priv; From 4f9218aaf8a463f76cac40aa08d859d065f8cc9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:55 -0700 Subject: [PATCH 1149/2177] bpf: move knowledge about post-translation offsets out of verifier Use the fact that verifier ops are now separate from program ops to define a separate set of callbacks for verification of already translated programs. Since we expect the analyzer ops to be defined only for a small subset of all program types initialize their array by hand (don't use linux/bpf_types.h). Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ kernel/bpf/verifier.c | 55 +++++++++++++------------------------------ net/core/filter.c | 40 +++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cf91977e8719..d67ccdc0099f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -291,6 +291,9 @@ DECLARE_PER_CPU(int, bpf_prog_active); #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; +extern const struct bpf_verifier_ops xdp_analyzer_ops; + struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3b6e2c550e96..545b8c45a578 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -822,36 +822,6 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, return err; } -static bool analyzer_is_valid_access(struct bpf_verifier_env *env, int off, - struct bpf_insn_access_aux *info) -{ - switch (env->prog->type) { - case BPF_PROG_TYPE_XDP: - switch (off) { - case offsetof(struct xdp_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct xdp_buff, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; - case BPF_PROG_TYPE_SCHED_CLS: - switch (off) { - case offsetof(struct sk_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct sk_buff, cb) + - offsetof(struct bpf_skb_data_end, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; - default: - return false; - } -} - /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) @@ -860,13 +830,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, .reg_type = *reg_type, }; - if (env->analyzer_ops) { - if (analyzer_is_valid_access(env, off, &info)) { - *reg_type = info.reg_type; - return 0; - } - } else if (env->ops->is_valid_access && - env->ops->is_valid_access(off, size, t, &info)) { + if (env->ops->is_valid_access && + env->ops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower @@ -874,9 +839,12 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, * will only allow for whole field access and rejects any other * type of narrower access. */ - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; *reg_type = info.reg_type; + if (env->analyzer_ops) + return 0; + + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -4400,12 +4368,21 @@ err_free_env: return ret; } +static const struct bpf_verifier_ops * const bpf_analyzer_ops[] = { + [BPF_PROG_TYPE_XDP] = &xdp_analyzer_ops, + [BPF_PROG_TYPE_SCHED_CLS] = &tc_cls_act_analyzer_ops, +}; + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv) { struct bpf_verifier_env *env; int ret; + if (prog->type >= ARRAY_SIZE(bpf_analyzer_ops) || + !bpf_analyzer_ops[prog->type]) + return -EOPNOTSUPP; + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; @@ -4416,7 +4393,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, if (!env->insn_aux_data) goto err_free_env; env->prog = prog; - env->ops = bpf_verifier_ops[env->prog->type]; + env->ops = bpf_analyzer_ops[env->prog->type]; env->analyzer_ops = ops; env->analyzer_priv = priv; diff --git a/net/core/filter.c b/net/core/filter.c index 1dd3034f846f..7373a08fbef7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3732,6 +3732,23 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } +static bool +tc_cls_act_is_valid_access_analyzer(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case offsetof(struct sk_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct sk_buff, cb) + + offsetof(struct bpf_skb_data_end, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; +} + static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -3766,6 +3783,21 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } +static bool xdp_is_valid_access_analyzer(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case offsetof(struct xdp_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct xdp_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; +} + void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; @@ -4411,6 +4443,10 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .gen_prologue = tc_cls_act_prologue, }; +const struct bpf_verifier_ops tc_cls_act_analyzer_ops = { + .is_valid_access = tc_cls_act_is_valid_access_analyzer, +}; + const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; @@ -4421,6 +4457,10 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .convert_ctx_access = xdp_convert_ctx_access, }; +const struct bpf_verifier_ops xdp_analyzer_ops = { + .is_valid_access = xdp_is_valid_access_analyzer, +}; + const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; From 29d1b33a2e0a3288374102b004a15cb278a2303e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:56 -0700 Subject: [PATCH 1150/2177] bpf: allow access to skb->len from offloads Since we are now doing strict checking of what offloads may access, make sure skb->len is on that list. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 7373a08fbef7..09e011f20291 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3738,6 +3738,8 @@ tc_cls_act_is_valid_access_analyzer(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { + case offsetof(struct sk_buff, len): + return true; case offsetof(struct sk_buff, data): info->reg_type = PTR_TO_PACKET; return true; From b9f1f1ce866c28e3d9b86202441b220244754a69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Oct 2017 19:38:35 -0700 Subject: [PATCH 1151/2177] tcp: fix tcp_xmit_retransmit_queue() after rbtree introduction I tried to hard avoiding a call to rb_first() (via tcp_rtx_queue_head) in tcp_xmit_retransmit_queue(). But this was probably too bold. Quoting Yuchung : We might miss re-arming the RTO if tp->retransmit_skb_hint is not NULL. This can happen when RACK marks the first packet lost again and resets tp->retransmit_skb_hint for example (tcp_rack_mark_skb_lost()) Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6c74f2a39778..53dc1267c85e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2921,7 +2921,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) void tcp_xmit_retransmit_queue(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb, *rtx_head = NULL, *hole = NULL; + struct sk_buff *skb, *rtx_head, *hole = NULL; struct tcp_sock *tp = tcp_sk(sk); u32 max_segs; int mib_idx; @@ -2929,11 +2929,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!tp->packets_out) return; - skb = tp->retransmit_skb_hint; - if (!skb) { - rtx_head = tcp_rtx_queue_head(sk); - skb = rtx_head; - } + rtx_head = tcp_rtx_queue_head(sk); + skb = tp->retransmit_skb_hint ?: rtx_head; max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); skb_rbtree_walk_from(skb) { __u8 sacked; From 56fd2b2ca467f22f940282bf2c1ca4e1cb8f4c67 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 16 Oct 2017 22:44:44 -0400 Subject: [PATCH 1152/2177] macvlan/macvtap: Add support for L2 forwarding offloads with macvtap This patch reverts earlier commit b13ba1b83f52 ("macvlan: forbid L2 fowarding offload for macvtap"). The reason for reverting this is because the original patch no longer fixes what it previously did as the underlying structure has changed for macvtap. Specifically macvtap originally pulled packets directly off of the lowerdev. However in commit 6acf54f1cf0a ("macvtap: Add support of packet capture on macvtap device.") that code was changed and instead macvtap would listen directly on the macvtap device itself instead of the lower device. As such, the L2 forwarding offload should now be able to provide a performance advantage of skipping the checks on the lower dev while not introducing any sort of regression. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1e1df54c5d31..a178c5efd33e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -598,8 +598,6 @@ static const struct header_ops macvlan_hard_header_ops = { .cache_update = eth_header_cache_update, }; -static struct rtnl_link_ops macvlan_link_ops; - static int macvlan_open(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -615,8 +613,7 @@ static int macvlan_open(struct net_device *dev) goto hash_add; } - if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD && - dev->rtnl_link_ops == &macvlan_link_ops) { + if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) { vlan->fwd_priv = lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); From 30d240dfa2e88f7941f72fac9a256358f7d55ad8 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 17 Oct 2017 14:51:30 +0800 Subject: [PATCH 1153/2177] net: hns3: Add mqprio hardware offload support in hns3 driver When using tc qdisc, dcb_ops->setup_tc is used to tell hclge_dcb module to do the tm related setup. Only TC_MQPRIO_MODE_CHANNEL offload mode is supported. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + .../hisilicon/hns3/hns3pf/hclge_dcb.c | 47 +++++++++++++++- .../hisilicon/hns3/hns3pf/hclge_main.h | 1 + .../hisilicon/hns3/hns3pf/hns3_enet.c | 56 +++++++++++++------ 4 files changed, 86 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 575f50df340c..3acd8db0a794 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -381,6 +381,7 @@ struct hnae3_dcb_ops { u8 (*setdcbx)(struct hnae3_handle *, u8); int (*map_update)(struct hnae3_handle *); + int (*setup_tc)(struct hnae3_handle *, u8, u8 *); }; struct hnae3_ae_algo { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 1b30a6f966d8..5018d6633133 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -178,7 +178,8 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) u8 num_tc = 0; int ret; - if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) return -EINVAL; ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); @@ -228,7 +229,8 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) struct hclge_dev *hdev = vport->back; u8 i, j, pfc_map, *prio_tc; - if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) return -EINVAL; prio_tc = hdev->tm_info.prio_tc; @@ -257,6 +259,9 @@ static u8 hclge_getdcbx(struct hnae3_handle *h) struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; + if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + return 0; + return hdev->dcbx_cap; } @@ -276,6 +281,43 @@ static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode) return 0; } +/* Set up TC for hardware offloaded mqprio in channel mode */ +static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + int ret; + + if (hdev->flag & HCLGE_FLAG_DCB_ENABLE) + return -EINVAL; + + if (tc > hdev->tc_max) { + dev_err(&hdev->pdev->dev, + "setup tc failed, tc(%u) > tc_max(%u)\n", + tc, hdev->tc_max); + return -EINVAL; + } + + hclge_tm_schd_info_update(hdev, tc); + + ret = hclge_tm_prio_tc_info_update(hdev, prio_tc); + if (ret) + return ret; + + ret = hclge_tm_init_hw(hdev); + if (ret) + return ret; + + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + + if (tc > 1) + hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE; + + return 0; +} + static const struct hnae3_dcb_ops hns3_dcb_ops = { .ieee_getets = hclge_ieee_getets, .ieee_setets = hclge_ieee_setets, @@ -284,6 +326,7 @@ static const struct hnae3_dcb_ops hns3_dcb_ops = { .getdcbx = hclge_getdcbx, .setdcbx = hclge_setdcbx, .map_update = hclge_map_update, + .setup_tc = hclge_setup_tc, }; void hclge_dcb_ops_set(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index a7c018c7b0ec..bca4430bb7e7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -470,6 +470,7 @@ struct hclge_dev { #define HCLGE_FLAG_MAIN 0x00000004 #define HCLGE_FLAG_DCB_CAPABLE 0x00000008 #define HCLGE_FLAG_DCB_ENABLE 0x00000010 +#define HCLGE_FLAG_MQPRIO_ENABLE 0x00000020 u32 flag; u32 pkt_buf_size; /* Total pf buf size for tx/rx */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index ba550c1b5b01..8fa4e658b273 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "hnae3.h" @@ -1186,53 +1187,74 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev, } } -static int hns3_setup_tc(struct net_device *netdev, u8 tc) +static int hns3_setup_tc(struct net_device *netdev, void *type_data) { + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; + u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map; + u8 tc = mqprio_qopt->qopt.num_tc; + u16 mode = mqprio_qopt->mode; + u8 hw = mqprio_qopt->qopt.hw; + bool if_running; unsigned int i; int ret; + if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS && + mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0))) + return -EOPNOTSUPP; + if (tc > HNAE3_MAX_TC) return -EINVAL; - if (kinfo->num_tc == tc) - return 0; - if (!netdev) return -EINVAL; - if (!tc) { - netdev_reset_tc(netdev); - return 0; + if_running = netif_running(netdev); + if (if_running) { + hns3_nic_net_stop(netdev); + msleep(100); } - /* Set num_tc for netdev */ - ret = netdev_set_num_tc(netdev, tc); + ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; if (ret) - return ret; + goto out; + + if (tc <= 1) { + netdev_reset_tc(netdev); + } else { + ret = netdev_set_num_tc(netdev, tc); + if (ret) + goto out; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (!kinfo->tc_info[i].enable) + continue; - /* Set per TC queues for the VSI */ - for (i = 0; i < HNAE3_MAX_TC; i++) { - if (kinfo->tc_info[i].enable) netdev_set_tc_queue(netdev, kinfo->tc_info[i].tc, kinfo->tc_info[i].tqp_count, kinfo->tc_info[i].tqp_offset); + } } - return 0; + ret = hns3_nic_set_real_num_queue(netdev); + +out: + if (if_running) + hns3_nic_net_open(netdev); + + return ret; } static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) return -EOPNOTSUPP; - return hns3_setup_tc(dev, mqprio->num_tc); + return hns3_setup_tc(dev, type_data); } static int hns3_vlan_rx_add_vid(struct net_device *netdev, From f436baf326ae62aecffbee8572f8bc75394dbaa3 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 17 Oct 2017 10:23:25 +0300 Subject: [PATCH 1154/2177] qed: Fix iWARP out of order flow Out of order flow is not working for iWARP. This patch got cut out from initial series that added out of order support for iWARP. Make out of order code common for iWARP and iSCSI. Add new configuration option CONFIG_QED_OOO. Set by qedr and qedi Kconfigs. Fixes: d1abfd0b4ee2 ("qed: Add iWARP out of order support") Signed-off-by: Michal Kalderon Signed-off-by: Manish Rangankar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/infiniband/hw/qedr/Kconfig | 1 + drivers/net/ethernet/qlogic/Kconfig | 3 +++ drivers/net/ethernet/qlogic/qed/Makefile | 3 ++- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 7 ++++++- drivers/net/ethernet/qlogic/qed/qed_ooo.c | 16 +++++++++++++--- drivers/net/ethernet/qlogic/qed/qed_ooo.h | 2 +- drivers/scsi/qedi/Kconfig | 1 + 7 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig index 6c9f3923e838..60e867d80b88 100644 --- a/drivers/infiniband/hw/qedr/Kconfig +++ b/drivers/infiniband/hw/qedr/Kconfig @@ -2,6 +2,7 @@ config INFINIBAND_QEDR tristate "QLogic RoCE driver" depends on 64BIT && QEDE select QED_LL2 + select QED_OOO select QED_RDMA ---help--- This driver provides low-level InfiniBand over Ethernet diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index c2e24afbaeb2..26ddf092e3ec 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -117,4 +117,7 @@ config QED_ISCSI config QED_FCOE bool +config QED_OOO + bool + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 82dd47068e18..c3c599950574 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -6,5 +6,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o -qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o +qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed-$(CONFIG_QED_FCOE) += qed_fcoe.o +qed-$(CONFIG_QED_OOO) += qed_ooo.o diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index b2b1f87864ef..409041eab189 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1410,13 +1410,18 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn) INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list); spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock); - return qed_iwarp_prealloc_ep(p_hwfn, true); + rc = qed_iwarp_prealloc_ep(p_hwfn, true); + if (rc) + return rc; + + return qed_ooo_alloc(p_hwfn); } void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) { struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + qed_ooo_free(p_hwfn); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); kfree(iwarp_info->mpa_bufs); kfree(iwarp_info->partial_fpdus); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c index 000636530111..6172354b451c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c @@ -103,18 +103,28 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn) { u16 max_num_archipelagos = 0, cid_base; struct qed_ooo_info *p_ooo_info; + enum protocol_type proto; u16 max_num_isles = 0; u32 i; - if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) { + switch (p_hwfn->hw_info.personality) { + case QED_PCI_ISCSI: + proto = PROTOCOLID_ISCSI; + break; + case QED_PCI_ETH_RDMA: + case QED_PCI_ETH_IWARP: + proto = PROTOCOLID_IWARP; + break; + default: DP_NOTICE(p_hwfn, "Failed to allocate qed_ooo_info: unknown personality\n"); return -EINVAL; } - max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons; + max_num_archipelagos = (u16)qed_cxt_get_proto_cid_count(p_hwfn, proto, + NULL); max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos; - cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ISCSI); + cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, proto); if (!max_num_archipelagos) { DP_NOTICE(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h index e8ed40b848f5..49c4e75b15b1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.h @@ -83,7 +83,7 @@ struct qed_ooo_info { u16 cid_base; }; -#if IS_ENABLED(CONFIG_QED_ISCSI) +#if IS_ENABLED(CONFIG_QED_OOO) void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info, struct ooo_opaque *p_cqe); diff --git a/drivers/scsi/qedi/Kconfig b/drivers/scsi/qedi/Kconfig index 2ff753ce6e27..d1db92d24889 100644 --- a/drivers/scsi/qedi/Kconfig +++ b/drivers/scsi/qedi/Kconfig @@ -4,6 +4,7 @@ config QEDI depends on QED select SCSI_ISCSI_ATTRS select QED_LL2 + select QED_OOO select QED_ISCSI select ISCSI_BOOT_SYSFS ---help--- From c057c68303b13387d9849a33557057ef428c2d3c Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:30:34 +0000 Subject: [PATCH 1155/2177] MAINTAINERS: change ENA driver maintainers email domain ENA driver was developed by developers from Annapurna Labs. Annapurna Labs was acquired by Amazon and the company's domain (@annapurnalabs.com) will become deprecated soon. Update the email addresses of the maintainers to the alternative amazon emails (@amazon.com) Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3944f1626911..f218fe1e43fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -700,9 +700,9 @@ F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h AMAZON ETHERNET DRIVERS -M: Netanel Belgazal -R: Saeed Bishara -R: Zorik Machulsky +M: Netanel Belgazal +R: Saeed Bishara +R: Zorik Machulsky L: netdev@vger.kernel.org S: Supported F: Documentation/networking/ena.txt From 88aef2f51c9e3640268aca04a256b8f26cf6bdff Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:56 +0000 Subject: [PATCH 1156/2177] net: ena: improve ENA driver boot time. The ena admin commands timeout is in resolutions of 100ms. Therefore, When the driver works in polling mode, it sleeps for 100ms each time. The overall boot time of the ENA driver is ~1.5 sec. To reduce the boot time, This change modifies the granularity of the sleeps to 5ms. This change improves the boot time to 220ms. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index ded29af648c9..bf2de5298005 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -63,6 +63,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_POLL_MS 5 + /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ @@ -533,7 +535,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - msleep(100); + msleep(ENA_POLL_MS); } if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { @@ -746,6 +748,9 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, { u32 val, i; + /* Convert timeout from resolution of 100ms to ENA_POLL_MS */ + timeout = (timeout * 100) / ENA_POLL_MS; + for (i = 0; i < timeout; i++) { val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); @@ -758,8 +763,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, exp_state) return 0; - /* The resolution of the timeout is 100ms */ - msleep(100); + msleep(ENA_POLL_MS); } return -ETIME; @@ -1253,7 +1257,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) spin_lock_irqsave(&admin_queue->q_lock, flags); while (atomic_read(&admin_queue->outstanding_cmds) != 0) { spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(20); + msleep(ENA_POLL_MS); spin_lock_irqsave(&admin_queue->q_lock, flags); } spin_unlock_irqrestore(&admin_queue->q_lock, flags); From dbeaf1e3c24f0e87c8047fc8dcbd8163ab82c8e7 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:57 +0000 Subject: [PATCH 1157/2177] net: ena: remove legacy suspend suspend/resume support Remove ena_device_io_suspend/resume() methods Those methods were intend to be used by the device to trigger suspend/resume but eventually it was dropped. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 2 - drivers/net/ethernet/amazon/ena/ena_netdev.c | 50 ------------------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 4 -- 3 files changed, 56 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index b1212debc2e1..27b8f4618103 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -60,8 +60,6 @@ struct ena_stats { static const struct ena_stats ena_stats_global_strings[] = { ENA_STAT_GLOBAL_ENTRY(tx_timeout), - ENA_STAT_GLOBAL_ENTRY(io_suspend), - ENA_STAT_GLOBAL_ENTRY(io_resume), ENA_STAT_GLOBAL_ENTRY(wd_expired), ENA_STAT_GLOBAL_ENTRY(interface_up), ENA_STAT_GLOBAL_ENTRY(interface_down), diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index f7dc22f65d9f..6d8e1f1325e9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2361,38 +2361,6 @@ static const struct net_device_ops ena_netdev_ops = { #endif /* CONFIG_NET_POLL_CONTROLLER */ }; -static void ena_device_io_suspend(struct work_struct *work) -{ - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, suspend_io_task); - struct net_device *netdev = adapter->netdev; - - /* ena_napi_disable_all disables only the IO handling. - * We are still subject to AENQ keep alive watchdog. - */ - u64_stats_update_begin(&adapter->syncp); - adapter->dev_stats.io_suspend++; - u64_stats_update_begin(&adapter->syncp); - ena_napi_disable_all(adapter); - netif_tx_lock(netdev); - netif_device_detach(netdev); - netif_tx_unlock(netdev); -} - -static void ena_device_io_resume(struct work_struct *work) -{ - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, resume_io_task); - struct net_device *netdev = adapter->netdev; - - u64_stats_update_begin(&adapter->syncp); - adapter->dev_stats.io_resume++; - u64_stats_update_end(&adapter->syncp); - - netif_device_attach(netdev); - ena_napi_enable_all(adapter); -} - static int ena_device_validate_params(struct ena_adapter *adapter, struct ena_com_dev_get_features_ctx *get_feat_ctx) { @@ -3275,8 +3243,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_rss; } - INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); - INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); INIT_WORK(&adapter->reset_task, ena_fw_reset_device); adapter->last_keep_alive_jiffies = jiffies; @@ -3310,8 +3276,6 @@ err_free_msix: err_worker_destroy: ena_com_destroy_interrupt_moderation(ena_dev); del_timer(&adapter->timer_service); - cancel_work_sync(&adapter->suspend_io_task); - cancel_work_sync(&adapter->resume_io_task); err_netdev_destroy: free_netdev(netdev); err_device_destroy: @@ -3381,10 +3345,6 @@ static void ena_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); - cancel_work_sync(&adapter->suspend_io_task); - - cancel_work_sync(&adapter->resume_io_task); - /* Reset the device only if the device is running. */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) ena_com_dev_reset(ena_dev, adapter->reset_reason); @@ -3503,16 +3463,6 @@ static void ena_notification(void *adapter_data, ENA_ADMIN_NOTIFICATION); switch (aenq_e->aenq_common_desc.syndrom) { - case ENA_ADMIN_SUSPEND: - /* Suspend just the IO queues. - * We deliberately don't suspend admin so the timer and - * the keep_alive events should remain. - */ - queue_work(ena_wq, &adapter->suspend_io_task); - break; - case ENA_ADMIN_RESUME: - queue_work(ena_wq, &adapter->resume_io_task); - break; case ENA_ADMIN_UPDATE_HINTS: hints = (struct ena_admin_ena_hw_hints *) (&aenq_e->inline_data_w4); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 29bb5704260b..fb0c98bb9290 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -257,8 +257,6 @@ struct ena_ring { struct ena_stats_dev { u64 tx_timeout; - u64 io_suspend; - u64 io_resume; u64 wd_expired; u64 interface_up; u64 interface_down; @@ -326,8 +324,6 @@ struct ena_adapter { /* timer service */ struct work_struct reset_task; - struct work_struct suspend_io_task; - struct work_struct resume_io_task; struct timer_list timer_service; bool wd_state; From 8c5c7abdeb2dfe4b4b28a48702c2cfa83fac15c9 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:58 +0000 Subject: [PATCH 1158/2177] net: ena: add power management ops to the ENA driver Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 2 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 119 +++++++++++++----- drivers/net/ethernet/amazon/ena/ena_netdev.h | 3 + 3 files changed, 96 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 27b8f4618103..897e638a014a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -60,6 +60,8 @@ struct ena_stats { static const struct ena_stats ena_stats_global_strings[] = { ENA_STAT_GLOBAL_ENTRY(tx_timeout), + ENA_STAT_GLOBAL_ENTRY(suspend), + ENA_STAT_GLOBAL_ENTRY(resume), ENA_STAT_GLOBAL_ENTRY(wd_expired), ENA_STAT_GLOBAL_ENTRY(interface_up), ENA_STAT_GLOBAL_ENTRY(interface_down), diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6d8e1f1325e9..adc3957df3ab 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2529,38 +2529,31 @@ err_disable_msix: return rc; } -static void ena_fw_reset_device(struct work_struct *work) +static void ena_destroy_device(struct ena_adapter *adapter) { - struct ena_com_dev_get_features_ctx get_feat_ctx; - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, reset_task); struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; - struct pci_dev *pdev = adapter->pdev; - bool dev_up, wd_state; - int rc; - - if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - dev_err(&pdev->dev, - "device reset schedule while reset bit is off\n"); - return; - } + bool dev_up; netif_carrier_off(netdev); del_timer_sync(&adapter->timer_service); - rtnl_lock(); - dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + adapter->dev_up_before_reset = dev_up; + ena_com_set_admin_running_state(ena_dev, false); - /* After calling ena_close the tx queues and the napi - * are disabled so no one can interfere or touch the - * data structures - */ ena_close(netdev); + /* Before releasing the ENA resources, a device reset is required. + * (to prevent the device from accessing them). + * In case the reset flag is set and the device is up, ena_close + * already perform the reset, so it can be skipped. + */ + if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) + ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); + ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); @@ -2574,9 +2567,17 @@ static void ena_fw_reset_device(struct work_struct *work) ena_com_mmio_reg_read_request_destroy(ena_dev); adapter->reset_reason = ENA_REGS_RESET_NORMAL; - clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - /* Finish with the destroy part. Start the init part */ + clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + +static int ena_restore_device(struct ena_adapter *adapter) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool wd_state; + int rc; rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); if (rc) { @@ -2598,7 +2599,7 @@ static void ena_fw_reset_device(struct work_struct *work) goto err_device_destroy; } /* If the interface was up before the reset bring it up */ - if (dev_up) { + if (adapter->dev_up_before_reset) { rc = ena_up(adapter); if (rc) { dev_err(&pdev->dev, "Failed to create I/O queues\n"); @@ -2607,24 +2608,38 @@ static void ena_fw_reset_device(struct work_struct *work) } mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); - - rtnl_unlock(); - dev_err(&pdev->dev, "Device reset completed successfully\n"); - return; + return rc; err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: ena_com_admin_destroy(ena_dev); err: - rtnl_unlock(); - clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); dev_err(&pdev->dev, "Reset attempt failed. Can not reset the device\n"); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct pci_dev *pdev = adapter->pdev; + + if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + dev_err(&pdev->dev, + "device reset schedule while reset bit is off\n"); + return; + } + rtnl_lock(); + ena_destroy_device(adapter); + ena_restore_device(adapter); + rtnl_unlock(); } static int check_missing_comp_in_queue(struct ena_adapter *adapter, @@ -3378,11 +3393,59 @@ static void ena_remove(struct pci_dev *pdev) vfree(ena_dev); } +#ifdef CONFIG_PM +/* ena_suspend - PM suspend callback + * @pdev: PCI device information struct + * @state:power state + */ +static int ena_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.suspend++; + u64_stats_update_end(&adapter->syncp); + + rtnl_lock(); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + dev_err(&pdev->dev, + "ignoring device reset request as the device is being suspended\n"); + clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + ena_destroy_device(adapter); + rtnl_unlock(); + return 0; +} + +/* ena_resume - PM resume callback + * @pdev: PCI device information struct + * + */ +static int ena_resume(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + int rc; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.resume++; + u64_stats_update_end(&adapter->syncp); + + rtnl_lock(); + rc = ena_restore_device(adapter); + rtnl_unlock(); + return rc; +} +#endif + static struct pci_driver ena_pci_driver = { .name = DRV_MODULE_NAME, .id_table = ena_pci_tbl, .probe = ena_probe, .remove = ena_remove, +#ifdef CONFIG_PM + .suspend = ena_suspend, + .resume = ena_resume, +#endif .sriov_configure = ena_sriov_configure, }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index fb0c98bb9290..7b07bfbf0fe4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -257,6 +257,8 @@ struct ena_ring { struct ena_stats_dev { u64 tx_timeout; + u64 suspend; + u64 resume; u64 wd_expired; u64 interface_up; u64 interface_down; @@ -327,6 +329,7 @@ struct ena_adapter { struct timer_list timer_service; bool wd_state; + bool dev_up_before_reset; unsigned long last_keep_alive_jiffies; struct u64_stats_sync syncp; From 11095fdb712b1aaa7ffd6ccd86d0c45d29732eec Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:59 +0000 Subject: [PATCH 1159/2177] net: ena: add statistics for missed tx packets Add a new statistic to ethtool stats that show the number of packets without transmit acknowledgement from ENA device. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 30 +++++++++++-------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 897e638a014a..0d97311a1b26 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -81,6 +81,7 @@ static const struct ena_stats ena_stats_tx_strings[] = { ENA_STAT_TX_ENTRY(doorbells), ENA_STAT_TX_ENTRY(prepare_ctx_err), ENA_STAT_TX_ENTRY(bad_req_id), + ENA_STAT_TX_ENTRY(missed_tx), }; static const struct ena_stats ena_stats_rx_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index adc3957df3ab..47bdbf9bdefb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2648,7 +2648,7 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter, struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; u32 missed_tx = 0; - int i; + int i, rc = 0; for (i = 0; i < tx_ring->ring_size; i++) { tx_buf = &tx_ring->tx_buffer_info[i]; @@ -2662,21 +2662,25 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter, tx_buf->print_once = 1; missed_tx++; - - if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { - netif_err(adapter, tx_err, adapter->netdev, - "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", - missed_tx, - adapter->missing_tx_completion_threshold); - adapter->reset_reason = - ENA_REGS_RESET_MISS_TX_CMPL; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - return -EIO; - } } } - return 0; + if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + missed_tx, + adapter->missing_tx_completion_threshold); + adapter->reset_reason = + ENA_REGS_RESET_MISS_TX_CMPL; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + rc = -EIO; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.missed_tx = missed_tx; + u64_stats_update_end(&tx_ring->syncp); + + return rc; } static void check_for_missing_tx_completions(struct ena_adapter *adapter) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 7b07bfbf0fe4..eafc5774dd49 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -185,6 +185,7 @@ struct ena_stats_tx { u64 tx_poll; u64 doorbells; u64 bad_req_id; + u64 missed_tx; }; struct ena_stats_rx { From 58894d5219c5d3fdd72d4166f007df5004817e84 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:34:00 +0000 Subject: [PATCH 1160/2177] net: ena: add new admin define for future support of IPv6 RSS Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 305dc1996b4e..4532e574ebcd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -627,6 +627,12 @@ enum ena_admin_flow_hash_proto { ENA_ADMIN_RSS_NOT_IP = 7, + /* TCPv6 with extension header */ + ENA_ADMIN_RSS_TCP6_EX = 8, + + /* IPv6 with extension header */ + ENA_ADMIN_RSS_IP6_EX = 9, + ENA_ADMIN_RSS_PROTO_NUM = 16, }; From 046b30718928d616f7decc79c272fdd4f42cc61d Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:34:01 +0000 Subject: [PATCH 1161/2177] net: ena: increase ena driver version to 1.3.0 Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index eafc5774dd49..ed8bd0a579c4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -44,7 +44,7 @@ #include "ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 1 -#define DRV_MODULE_VER_MINOR 2 +#define DRV_MODULE_VER_MINOR 3 #define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME "ena" @@ -52,7 +52,7 @@ #define DRV_MODULE_VERSION \ __stringify(DRV_MODULE_VER_MAJOR) "." \ __stringify(DRV_MODULE_VER_MINOR) "." \ - __stringify(DRV_MODULE_VER_SUBMINOR) "k" + __stringify(DRV_MODULE_VER_SUBMINOR) "K" #endif #define DEVICE_NAME "Elastic Network Adapter (ENA)" From 9a03c3d398c17eadfc5bc470c1084beb71c088f1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Oct 2017 15:32:17 +0300 Subject: [PATCH 1162/2177] thunderbolt: Fix a couple right shifting to zero bugs The problematic code looks like this: res_seq = res_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; res_seq >>= TB_XDOMAIN_SN_SHIFT; TB_XDOMAIN_SN_SHIFT is 27, and right shifting a u8 27 bits is always going to result in zero. The fix is to declare these variables as u32. Fixes: d1ff70241a27 ("thunderbolt: Add support for XDomain discovery protocol") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/thunderbolt/xdomain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 138027537d29..ff8d91189e99 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -56,7 +56,7 @@ static bool tb_xdomain_match(const struct tb_cfg_request *req, case TB_CFG_PKG_XDOMAIN_RESP: { const struct tb_xdp_header *res_hdr = pkg->buffer; const struct tb_xdp_header *req_hdr = req->request; - u8 req_seq, res_seq; + u32 req_seq, res_seq; if (pkg->frame.size < req->response_size / 4) return false; @@ -476,7 +476,7 @@ static void tb_xdp_handle_request(struct work_struct *work) struct tb_ctl *ctl = tb->ctl; const uuid_t *uuid; int ret = 0; - u8 sequence; + u32 sequence; u64 route; route = ((u64)xhdr->route_hi << 32 | xhdr->route_lo) & ~BIT_ULL(63); From fa31f0c98d64212b2b2b4a1e6d887208b6acb2d9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Oct 2017 15:33:01 +0300 Subject: [PATCH 1163/2177] thunderbolt: Right shifting to zero bug in tbnet_handle_packet() There is a problem when we do: sequence = pkg->hdr.length_sn & TBIP_HDR_SN_MASK; sequence >>= TBIP_HDR_SN_SHIFT; TBIP_HDR_SN_SHIFT is 27, and right shifting a u8 27 bits is always going to result in zero. The fix is to declare these variables as u32. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Dan Carpenter Acked-by: Yehezkel Bernat Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 1a7bc0bf4598..435854688a7a 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -394,7 +394,7 @@ static int tbnet_handle_packet(const void *buf, size_t size, void *data) struct tbnet *net = data; u32 command_id; int ret = 0; - u8 sequence; + u32 sequence; u64 route; /* Make sure the packet is for us */ From 22ce97fe49b5522e0f97b7c2282ed71a1abd7410 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Oct 2017 16:01:30 +0100 Subject: [PATCH 1164/2177] mqprio: fix potential null pointer dereference on opt The pointer opt has a null check however before for this check opt is dereferenced when len is initialized, hence we potentially have a null pointer deference on opt. Avoid this by checking for a null opt before dereferencing it. Detected by CoverityScan, CID#1458234 ("Dereference before null check") Fixes: 4e8b86c06269 ("mqprio: Introduce new hardware offload mode and shaper in mqprio") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/sched/sch_mqprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index cae91b4b08a6..51c2b289c69b 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -142,7 +142,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct nlattr *tb[TCA_MQPRIO_MAX + 1]; struct nlattr *attr; int rem; - int len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + int len; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -164,6 +164,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (mqprio_parse_opt(dev, qopt)) return -EINVAL; + len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); if (len > 0) { err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, sizeof(*qopt)); From 010f245b9dd734adda6386c494a4ace953ea8dc4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Oct 2017 10:07:44 -0700 Subject: [PATCH 1165/2177] tun: relax check on eth_get_headlen() return value syzkaller hit the WARN() in tun_get_user(), providing skb with payload in fragments only, and nothing in skb->head GRO layer is fine with this, so relax the check. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 57e4c31fa84a..c64ec19af9b7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1737,7 +1737,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, /* Exercise flow dissector code path. */ u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); - if (headlen > skb_headlen(skb) || headlen < ETH_HLEN) { + if (unlikely(headlen > skb_headlen(skb))) { this_cpu_inc(tun->pcpu_stats->rx_dropped); napi_free_frags(&tfile->napi); mutex_unlock(&tfile->napi_mutex); From 154820563dd4621c78e03e98e70216e832422f8e Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 17 Oct 2017 12:36:54 -0500 Subject: [PATCH 1166/2177] ibmvnic: Enable scatter-gather support This patch enables scatter gather support. Since there is no HW/FW scatter-gather support at this time, the driver needs to loop through each fragment and copy it to a contiguous, pre-mapped buffer entry. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4bc14a901571..b508877397e1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1204,9 +1204,28 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) offset = index * adapter->req_mtu; dst = tx_pool->long_term_buff.buff + offset; memset(dst, 0, adapter->req_mtu); - skb_copy_from_linear_data(skb, dst, skb->len); data_dma_addr = tx_pool->long_term_buff.addr + offset; + if (skb_shinfo(skb)->nr_frags) { + int cur, i; + + /* Copy the head */ + skb_copy_from_linear_data(skb, dst, skb_headlen(skb)); + cur = skb_headlen(skb); + + /* Copy the frags */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(dst + cur, + page_address(skb_frag_page(frag)) + + frag->page_offset, skb_frag_size(frag)); + cur += skb_frag_size(frag); + } + } else { + skb_copy_from_linear_data(skb, dst, skb->len); + } + tx_pool->consumer_index = (tx_pool->consumer_index + 1) % adapter->req_tx_entries_per_subcrq; @@ -2948,7 +2967,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - adapter->netdev->features = NETIF_F_GSO; + adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) adapter->netdev->features |= NETIF_F_IP_CSUM; From fdb061056f57e849a05cac072a4998c7f33d797e Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 17 Oct 2017 12:36:55 -0500 Subject: [PATCH 1167/2177] ibmvnic: Enable TSO support This patch enables TSO support. It includes additional buffers reserved exclusively for large packets. Throughput is greatly increased with TSO enabled, from about 1 Gb/s to 9 Gb/s on our test systems. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 56 +++++++++++++++++++++++++----- drivers/net/ethernet/ibm/ibmvnic.h | 5 +++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b508877397e1..aedb81c230a6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -553,6 +553,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) if (rc) return rc; + rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb); + if (rc) + return rc; + memset(tx_pool->tx_buff, 0, adapter->req_tx_entries_per_subcrq * sizeof(struct ibmvnic_tx_buff)); @@ -562,6 +566,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) tx_pool->consumer_index = 0; tx_pool->producer_index = 0; + tx_pool->tso_index = 0; } return 0; @@ -581,6 +586,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) tx_pool = &adapter->tx_pool[i]; kfree(tx_pool->tx_buff); free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_long_term_buff(adapter, &tx_pool->tso_ltb); kfree(tx_pool->free_map); } @@ -625,6 +631,16 @@ static int init_tx_pools(struct net_device *netdev) return -1; } + /* alloc TSO ltb */ + if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb, + IBMVNIC_TSO_BUFS * + IBMVNIC_TSO_BUF_SZ)) { + release_tx_pools(adapter); + return -1; + } + + tx_pool->tso_index = 0; + tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) { @@ -1201,10 +1217,21 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); index = tx_pool->free_map[tx_pool->consumer_index]; - offset = index * adapter->req_mtu; - dst = tx_pool->long_term_buff.buff + offset; - memset(dst, 0, adapter->req_mtu); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + + if (skb_is_gso(skb)) { + offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ; + dst = tx_pool->tso_ltb.buff + offset; + memset(dst, 0, IBMVNIC_TSO_BUF_SZ); + data_dma_addr = tx_pool->tso_ltb.addr + offset; + tx_pool->tso_index++; + if (tx_pool->tso_index == IBMVNIC_TSO_BUFS) + tx_pool->tso_index = 0; + } else { + offset = index * adapter->req_mtu; + dst = tx_pool->long_term_buff.buff + offset; + memset(dst, 0, adapter->req_mtu); + data_dma_addr = tx_pool->long_term_buff.addr + offset; + } if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -1245,7 +1272,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_sge = 1; tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED; tx_crq.v1.correlator = cpu_to_be32(index); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + if (skb_is_gso(skb)) + tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id); + else + tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); @@ -1270,6 +1300,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; hdrs += 2; } + if (skb_is_gso(skb)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; + tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + hdrs += 2; + } /* determine if l2/3/4 headers are sent to firmware */ if ((*hdrs >> 7) & 1 && (skb->protocol == htons(ETH_P_IP) || @@ -2960,10 +2995,10 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum; + adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4; + adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6; - /* large_tx/rx disabled for now, additional features needed */ - adapter->ip_offload_ctrl.large_tx_ipv4 = 0; - adapter->ip_offload_ctrl.large_tx_ipv6 = 0; + /* large_rx disabled for now, additional features needed */ adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; @@ -2979,6 +3014,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) adapter->netdev->features |= NETIF_F_RXCSUM; + if (buf->large_tx_ipv4) + adapter->netdev->features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->features |= NETIF_F_TSO6; + memset(&crq, 0, sizeof(crq)); crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index d02257ccc377..7aa347a21e78 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -39,6 +39,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_TX_QUEUES 5 +#define IBMVNIC_TSO_BUF_SZ 65536 +#define IBMVNIC_TSO_BUFS 64 + struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -896,6 +899,8 @@ struct ibmvnic_tx_pool { wait_queue_head_t ibmvnic_tx_comp_q; struct task_struct *work_thread; struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_long_term_buff tso_ltb; + int tso_index; }; struct ibmvnic_rx_buff { From aa0bf8510dac901badc6889b208fc0e7d9225924 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 17 Oct 2017 12:36:56 -0500 Subject: [PATCH 1168/2177] ibmvnic: Let users change net device features Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index aedb81c230a6..b991703319f9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3019,6 +3019,8 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) if (buf->large_tx_ipv6) adapter->netdev->features |= NETIF_F_TSO6; + adapter->netdev->hw_features |= adapter->netdev->features; + memset(&crq, 0, sizeof(crq)); crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; From b14bec89042ee6f9a43b437f8133cfcbea140f20 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Oct 2017 13:59:20 -0500 Subject: [PATCH 1169/2177] liquidio: remove unnecessary NULL check before kfree in delete_glists NULL check before freeing functions like kfree is not needed. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 2e993ce43b66..e4a112cf4f8e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -435,8 +435,7 @@ static void delete_glists(struct lio *lio) do { g = (struct octnic_gather *) list_delete_head(&lio->glist[i]); - if (g) - kfree(g); + kfree(g); } while (g); if (lio->glists_virt_base && lio->glists_virt_base[i] && From 48acc9e847ef335f7d3b62926825397c6bf4eab2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Oct 2017 14:01:45 -0500 Subject: [PATCH 1170/2177] liquidio: mark expected switch fall-through in octeon_destroy_resources In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Acked-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index e4a112cf4f8e..4c3b5688529b 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -747,7 +747,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) if (lio_wait_for_oq_pkts(oct)) dev_err(&oct->pci_dev->dev, "OQ had pending packets\n"); - + /* fall through */ case OCT_DEV_INTR_SET_DONE: /* Disable interrupts */ oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); From be070c77ca805a4f06f135599415195078f0ed68 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Oct 2017 17:42:53 -0500 Subject: [PATCH 1171/2177] net: l2tp: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I replaced the "NOBREAK" comment with a "fall through" comment, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 7135f4645d3a..f5179424eaf1 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -406,7 +406,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) goto nla_put_failure; - /* NOBREAK */ + /* fall through */ case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (np) { From 7a0947e755084b918e33242fd558e55cb443408e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 17 Oct 2017 17:16:52 -0700 Subject: [PATCH 1172/2177] dql: make dql_init return void dql_init always returned 0, and the only place that uses it in network core code didn't care about the return value anyway. Signed-off-by: Stephen Hemminger Acked-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- include/linux/dynamic_queue_limits.h | 2 +- lib/dynamic_queue_limits.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index a4be70398ce1..f69f98541953 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -98,7 +98,7 @@ void dql_completed(struct dql *dql, unsigned int count); void dql_reset(struct dql *dql); /* Initialize dql state */ -int dql_init(struct dql *dql, unsigned hold_time); +void dql_init(struct dql *dql, unsigned int hold_time); #endif /* _KERNEL_ */ diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index f346715e2255..dbe61c4c2a97 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -127,12 +127,11 @@ void dql_reset(struct dql *dql) } EXPORT_SYMBOL(dql_reset); -int dql_init(struct dql *dql, unsigned hold_time) +void dql_init(struct dql *dql, unsigned int hold_time) { dql->max_limit = DQL_MAX_LIMIT; dql->min_limit = 0; dql->slack_hold_time = hold_time; dql_reset(dql); - return 0; } EXPORT_SYMBOL(dql_init); From 87d9fa647020fb65985ec08826f6c77b9b4084af Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 18 Oct 2017 09:21:26 +0200 Subject: [PATCH 1173/2177] dt-bindings: net: sh_eth: add R-Car Gen[12] fallback compatibility strings Add fallback compatibility strings for R-Car Gen 1 and 2. In the case of Renesas R-Car hardware we know that there are generations of SoCs, f.e. Gen 1 and 2. But beyond that its not clear what the relationship between IP blocks might be. For example, I believe that r8a7790 is older than r8a7791 but that doesn't imply that the latter is a descendant of the former or vice versa. We can, however, by examining the documentation and behaviour of the hardware at run-time observe that the current driver implementation appears to be compatible with the IP blocks on SoCs within a given generation. For the above reasons and convenience when enabling new SoCs a per-generation fallback compatibility string scheme is being adopted for drivers for Renesas SoCs. Note that R-Car Gen2 and RZ/G1 have many compatible IP blocks. The approach that has been consistently taken for other IP blocks is to name common code, compatibility strings and so on after R-Car Gen2. Signed-off-by: Simon Horman Reviewed-by: Geert Uytterhoeven Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/sh_eth.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 0115c85a2425..5172799a7f1a 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -4,7 +4,8 @@ This file provides information on what the device node for the SH EtherMAC interface contains. Required properties: -- compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. +- compatible: Must contain one or more of the following: + "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. @@ -14,6 +15,14 @@ Required properties: "renesas,ether-r8a7793" if the device is a part of R8A7793 SoC. "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC. "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC. + "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device. + "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1 + device. + + When compatible with the generic version, nodes must list + the SoC-specific version corresponding to the platform + first followed by the generic version. + - reg: offset and length of (1) the E-DMAC/feLic register block (required), (2) the TSU register block (optional). - interrupts: interrupt specifier for the sole interrupt. @@ -36,7 +45,8 @@ Optional properties: Example (Lager board): ethernet@ee700000 { - compatible = "renesas,ether-r8a7790"; + compatible = "renesas,ether-r8a7790", + "renesas,rcar-gen2-ether"; reg = <0 0xee700000 0 0x400>; interrupt-parent = <&gic>; interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>; From 6c4b2f7e675cf11587182f51adcf0e129005e2f9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 18 Oct 2017 09:21:27 +0200 Subject: [PATCH 1174/2177] net: sh_eth: rename name structures as rcar_gen[12]_* Rename structures describing R-Car SoCs as rcar_gen[12]_* rather than r8a77[79]x_*. This seems a little easier on the eyes. And will make things slightly cleaner in a follow-up patch that adds fallback-compatibility strings for these SoCs. Note that R-Car Gen2 and RZ/G1 have many compatible IP blocks. The approach that has been consistently taken for other IP blocks is to name common code, compatibility strings and so on after R-Car Gen2. Also rename sh_eth_set_rate_r8a777x as sh_eth_set_rate_rcar as it it is used by the R-Car generations supported by the driver. This patch should have no run-time effect and is compile-tested only. Signed-off-by: Simon Horman Reviewed-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d2e88a30f57b..c9f92fc8555e 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -594,7 +594,7 @@ static struct sh_eth_cpu_data r8a7740_data = { }; /* There is CPU dependent code */ -static void sh_eth_set_rate_r8a777x(struct net_device *ndev) +static void sh_eth_set_rate_rcar(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); @@ -608,10 +608,10 @@ static void sh_eth_set_rate_r8a777x(struct net_device *ndev) } } -/* R8A7778/9 */ -static struct sh_eth_cpu_data r8a777x_data = { +/* R-Car Gen1 */ +static struct sh_eth_cpu_data rcar_gen1_data = { .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_r8a777x, + .set_rate = sh_eth_set_rate_rcar, .register_type = SH_ETH_REG_FAST_RCAR, @@ -635,10 +635,10 @@ static struct sh_eth_cpu_data r8a777x_data = { .hw_swap = 1, }; -/* R8A7790/1 */ -static struct sh_eth_cpu_data r8a779x_data = { +/* R-Car Gen2 and RZ/G1 */ +static struct sh_eth_cpu_data rcar_gen2_data = { .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_r8a777x, + .set_rate = sh_eth_set_rate_rcar, .register_type = SH_ETH_REG_FAST_RCAR, @@ -3086,14 +3086,14 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, - { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, - { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, - { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7791", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7793", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7794", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7743", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7745", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7778", .data = &rcar_gen1_data }, + { .compatible = "renesas,ether-r8a7779", .data = &rcar_gen1_data }, + { .compatible = "renesas,ether-r8a7790", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data }, { } }; From b4804e0c71c144b673b6c53ca4acfcac6eb98704 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 18 Oct 2017 09:21:28 +0200 Subject: [PATCH 1175/2177] net: sh_eth: implement R-Car Gen[12] fallback compatibility strings Implement fallback compatibility strings for R-Car Gen 1 and 2. In the case of Renesas R-Car hardware we know that there are generations of SoCs, f.e. Gen 1 and 2. But beyond that its not clear what the relationship between IP blocks might be. For example, I believe that r8a7790 is older than r8a7791 but that doesn't imply that the latter is a descendant of the former or vice versa. We can, however, by examining the documentation and behaviour of the hardware at run-time observe that the current driver implementation appears to be compatible with the IP blocks on SoCs within a given generation. For the above reasons and convenience when enabling new SoCs a per-generation fallback compatibility string scheme is being adopted for drivers for Renesas SoCs. Note that R-Car Gen2 and RZ/G1 have many compatible IP blocks. The approach that has been consistently taken for other IP blocks is to name common code, compatibility strings and so on after R-Car Gen2. Signed-off-by: Simon Horman Reviewed-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c9f92fc8555e..7e060aa9fbed 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3095,6 +3095,8 @@ static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data }, + { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data }, + { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data }, { } }; MODULE_DEVICE_TABLE(of, sh_eth_match_table); From c75e427d9349ec3e0059752cc784e1c301474c2d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Oct 2017 10:48:25 +0300 Subject: [PATCH 1176/2177] tipc: checking for NULL instead of IS_ERR() The tipc_alloc_conn() function never returns NULL, it returns error pointers, so I have fixed the check. Fixes: 14c04493cb77 ("tipc: add ability to order and receive topology events in driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/tipc/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 713077536d0c..acaef80fb88c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -504,7 +504,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, *(u32 *)&sub.usr_handle = port; con = tipc_alloc_conn(tipc_topsrv(net)); - if (!con) + if (IS_ERR(con)) return false; *conid = con->conid; From d18b4b35e310c5e30a3726309a93db8893cd3251 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Oct 2017 10:33:37 +0200 Subject: [PATCH 1177/2177] net: sched: cls_u32: use hash_ptr() for tc_u_hash After the change to the tp hash, we now get a build warning on 32-bit architectures: net/sched/cls_u32.c: In function 'tc_u_hash': net/sched/cls_u32.c:338:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] return hash_64((u64) tp->chain->block, U32_HASH_SHIFT); Using hash_ptr() instead of hash_64() lets us drop the cast and fixes the warning while still resulting in the same hash value. Fixes: 7fa9d974f3c2 ("net: sched: cls_u32: use block instead of q in tc_u_common") Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b6d46065f661..49d96b45a8ce 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -335,7 +335,7 @@ static struct hlist_head *tc_u_common_hash; static unsigned int tc_u_hash(const struct tcf_proto *tp) { - return hash_64((u64) tp->chain->block, U32_HASH_SHIFT); + return hash_ptr(tp->chain->block, U32_HASH_SHIFT); } static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) From 17c918840fb07e5819f8df03345d7f4a5e5b791c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 18 Oct 2017 10:24:28 -0400 Subject: [PATCH 1178/2177] doc: Update VRF documentation metric Two things: 1) Update examples to show usage of metric 2) Discuss reasoning for using such a high metric. Signed-off-by: Donald Sharp Acked-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/vrf.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index 3918dae964d4..8ff7b4c8f91b 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt @@ -71,7 +71,12 @@ Setup ip ru add iif vrf-blue table 10 3. Set the default route for the table (and hence default route for the VRF). - ip route add table 10 unreachable default + ip route add table 10 unreachable default metric 4278198272 + + This high metric value ensures that the default unreachable route can + be overridden by a routing protocol suite. FRRouting interprets + kernel metrics as a combined admin distance (upper byte) and priority + (lower 3 bytes). Thus the above metric translates to [255/8192]. 4. Enslave L3 interfaces to a VRF device. ip link set dev eth1 master vrf-blue @@ -256,7 +261,7 @@ older form without it. For example: $ ip route show vrf red - prohibit default + unreachable default metric 4278198272 broadcast 10.2.1.0 dev eth1 proto kernel scope link src 10.2.1.2 10.2.1.0/24 dev eth1 proto kernel scope link src 10.2.1.2 local 10.2.1.2 dev eth1 proto kernel scope host src 10.2.1.2 @@ -282,7 +287,7 @@ older form without it. ff00::/8 dev red metric 256 pref medium ff00::/8 dev eth1 metric 256 pref medium ff00::/8 dev eth2 metric 256 pref medium - + unreachable default dev lo metric 4278198272 error -101 pref medium 8. Route Lookup for a VRF @@ -331,7 +336,7 @@ function vrf_create ip link add ${VRF} type vrf table ${TBID} if [ "${VRF}" != "mgmt" ]; then - ip route add table ${TBID} unreachable default + ip route add table ${TBID} unreachable default metric 4278198272 fi ip link set dev ${VRF} up } From 890056783c60ad9d0789774af2bc10fe4f27dd9d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 08:17:29 -0700 Subject: [PATCH 1179/2177] tcp: Remove use of inet6_sk and add IPv6 checks to tracepoint 386fd5da401d ("tcp: Check daddr_cache before use in tracepoint") was the second version of the tracepoint fixup patch. This patch is the delta between v2 and v3. Specifically, remove the use of inet6_sk and check sk_family as requested by Eric and add IS_ENABLED(CONFIG_IPV6) around the use of sk_v6_rcv_saddr and sk_v6_daddr as done in sock_common (noted by Cong). Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Tested-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index f51c130f1e0f..c3220d914475 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -43,13 +43,15 @@ TRACE_EVENT(tcp_retransmit_skb, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; - /* IPv6 socket ? */ - if (inet6_sk(sk)) { +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { pin6 = (struct in6_addr *)__entry->saddr_v6; *pin6 = sk->sk_v6_rcv_saddr; pin6 = (struct in6_addr *)__entry->daddr_v6; *pin6 = sk->sk_v6_daddr; - } else { + } else +#endif + { pin6 = (struct in6_addr *)__entry->saddr_v6; ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); pin6 = (struct in6_addr *)__entry->daddr_v6; From bda1e229153fbdd0efd22a14c1c76a28c05d1b27 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:07 +0530 Subject: [PATCH 1180/2177] cxgb4: add tc flower match support for TOS Add support for matching on IP TOS. Also check on ethtype value to be either IPv4 or IPv6. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 92a311767381..647c86ae343d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -147,6 +147,19 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->mask.fport = cpu_to_be16(mask->src); } + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *key, *mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->mask); + fs->val.tos = key->tos; + fs->mask.tos = mask->tos; + } + /* Match only packets coming from the ingress port where this * filter will be created. */ @@ -157,16 +170,52 @@ static void cxgb4_process_flow_match(struct net_device *dev, static int cxgb4_validate_flow_match(struct net_device *dev, struct tc_cls_flower_offload *cls) { + u16 ethtype_mask = 0; + u16 ethtype_key = 0; + if (cls->dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_IP))) { netdev_warn(dev, "Unsupported key used: 0x%x\n", cls->dissector->used_keys); return -EOPNOTSUPP; } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->mask); + ethtype_key = ntohs(key->n_proto); + ethtype_mask = ntohs(mask->n_proto); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { + u16 eth_ip_type = ethtype_key & ethtype_mask; + struct flow_dissector_key_ip *mask; + + if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) { + netdev_err(dev, "IP Key supported only with IPv4/v6"); + return -EINVAL; + } + + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->mask); + if (mask->ttl) { + netdev_warn(dev, "ttl match unsupported for offload"); + return -EOPNOTSUPP; + } + } + return 0; } From ad9af3e09cb6b201db56190d92eb3ffe469a0bc4 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:08 +0530 Subject: [PATCH 1181/2177] cxgb4: add tc flower match support for vlan Add support for matching on vlan tci. Construct vlan tci match param based on vlan-id and vlan-pcp values supplied by tc. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 647c86ae343d..f7554b768e9d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -160,6 +160,40 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->mask.tos = mask->tos; } + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key, *mask; + u16 vlan_tci, vlan_tci_mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_VLAN, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_VLAN, + cls->mask); + vlan_tci = key->vlan_id | (key->vlan_priority << + VLAN_PRIO_SHIFT); + vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << + VLAN_PRIO_SHIFT); + fs->val.ivlan = cpu_to_be16(vlan_tci); + fs->mask.ivlan = cpu_to_be16(vlan_tci_mask); + + /* Chelsio adapters use ivlan_vld bit to match vlan packets + * as 802.1Q. Also, when vlan tag is present in packets, + * ethtype match is used then to match on ethtype of inner + * header ie. the header following the vlan header. + * So, set the ivlan_vld based on ethtype info supplied by + * TC for vlan packets if its 802.1Q. And then reset the + * ethtype value else, hw will try to match the supplied + * ethtype value with ethtype of inner header. + */ + if (fs->val.ethtype == ETH_P_8021Q) { + fs->val.ivlan_vld = 1; + fs->mask.ivlan_vld = 1; + fs->val.ethtype = 0; + fs->mask.ethtype = 0; + } + } + /* Match only packets coming from the ingress port where this * filter will be created. */ @@ -179,6 +213,7 @@ static int cxgb4_validate_flow_match(struct net_device *dev, BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IP))) { netdev_warn(dev, "Unsupported key used: 0x%x\n", cls->dissector->used_keys); From c39bff47d735e39fdbf59ad56df5581b0cf88c7c Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:09 +0530 Subject: [PATCH 1182/2177] cxgb4: add tc flower support for action PASS Add support for tc flower action PASS. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index f7554b768e9d..4d4f3af20496 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -263,7 +263,9 @@ static void cxgb4_process_flow_actions(struct net_device *in, tcf_exts_to_list(cls->exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + fs->action = FILTER_PASS; + } else if (is_tcf_gact_shot(a)) { fs->action = FILTER_DROP; } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); @@ -306,7 +308,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, tcf_exts_to_list(cls->exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + /* Do nothing */ + } else if (is_tcf_gact_shot(a)) { /* Do nothing */ } else if (is_tcf_mirred_egress_redirect(a)) { struct adapter *adap = netdev2adap(dev); From 27ece1f357b71c63e6e35c645b9c344835d4a129 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:10 +0530 Subject: [PATCH 1183/2177] cxgb4: add tc flower support for ETH-DMAC rewrite Add support for ETH-DMAC Rewrite via TC-PEDIT action. Also, add check to assert that vlan/eth-dmac rewrite actions are valid only in combination with action egress redirect. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 108 +++++++++++++++++- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 19 +++ 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 4d4f3af20496..7c8b0c65934c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -32,8 +32,9 @@ * SOFTWARE. */ -#include #include +#include +#include #include #include "cxgb4.h" @@ -41,6 +42,11 @@ #define STATS_CHECK_PERIOD (HZ / 2) +struct ch_tc_pedit_fields pedits[] = { + PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0), + PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4), +}; + static struct ch_tc_flower_entry *allocate_flower_entry(void) { struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); @@ -254,6 +260,41 @@ static int cxgb4_validate_flow_match(struct net_device *dev, return 0; } +static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask, + u8 field) +{ + u32 set_val = val & ~mask; + u32 offset; + u8 size; + int i; + + for (i = 0; i < ARRAY_SIZE(pedits); i++) { + if (pedits[i].field == field) { + offset = pedits[i].offset; + size = pedits[i].size; + break; + } + } + memcpy((u8 *)fs + offset, &set_val, size); +} + +static void process_pedit_field(struct ch_filter_specification *fs, u32 val, + u32 mask, u32 offset, u8 htype) +{ + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + fs->newdmac = 1; + offload_pedit(fs, val, mask, ETH_DMAC_31_0); + break; + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + if (~mask & PEDIT_ETH_DMAC_MASK) + offload_pedit(fs, val, mask, ETH_DMAC_47_32); + } + } +} + static void cxgb4_process_flow_actions(struct net_device *in, struct tc_cls_flower_offload *cls, struct ch_filter_specification *fs) @@ -296,6 +337,21 @@ static void cxgb4_process_flow_actions(struct net_device *in, default: break; } + } else if (is_tcf_pedit(a)) { + u32 mask, val, offset; + int nkeys, i; + u8 htype; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + process_pedit_field(fs, val, mask, offset, + htype); + } } } } @@ -304,6 +360,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, struct tc_cls_flower_offload *cls) { const struct tc_action *a; + bool act_redir = false; + bool act_pedit = false; + bool act_vlan = false; LIST_HEAD(actions); tcf_exts_to_list(cls->exts, &actions); @@ -335,6 +394,7 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, __func__); return -EINVAL; } + act_redir = true; } else if (is_tcf_vlan(a)) { u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); u32 vlan_action = tcf_vlan_action(a); @@ -355,11 +415,57 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, __func__); return -EOPNOTSUPP; } + act_vlan = true; + } else if (is_tcf_pedit(a)) { + u32 mask, val, offset; + u8 cmd, htype; + int nkeys, i; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) { + netdev_err(dev, "%s: Unsupported pedit cmd\n", + __func__); + return -EOPNOTSUPP; + } + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return -EOPNOTSUPP; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit type\n", + __func__); + return -EOPNOTSUPP; + } + } + act_pedit = true; } else { netdev_err(dev, "%s: Unsupported action\n", __func__); return -EOPNOTSUPP; } } + + if ((act_pedit || act_vlan) && !act_redir) { + netdev_err(dev, "%s: pedit/vlan rewrite invalid without egress redirect\n", + __func__); + return -EINVAL; + } + return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 604feffc752e..7bf6cfa892aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -54,6 +54,25 @@ struct ch_tc_flower_entry { u32 filter_id; }; +enum { + ETH_DMAC_31_0, /* dmac bits 0.. 31 */ + ETH_DMAC_47_32, /* dmac bits 32..47 */ +}; + +struct ch_tc_pedit_fields { + u8 field; + u8 size; + u32 offset; +}; + +#define PEDIT_FIELDS(type, field, size, fs_field, offset) \ + { type## field, size, \ + offsetof(struct ch_filter_specification, fs_field) + (offset) } + +#define PEDIT_ETH_DMAC_MASK 0xffff +#define PEDIT_ETH_DMAC_31_0 0x0 +#define PEDIT_ETH_DMAC_47_32_SMAC_15_0 0x4 + int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls); int cxgb4_tc_flower_destroy(struct net_device *dev, From 3bdb376e6944134d0f4d6d65497054a54ef273c9 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:11 +0530 Subject: [PATCH 1184/2177] cxgb4: introduce SMT ops to prepare for SMAC rewrite support Introduce SMT operations for allocating/removing entries from SMAC table. Make TCAM filters use the SMT ops whenever SMAC rewrite is required. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 +- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 107 ++++++-- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 12 + drivers/net/ethernet/chelsio/cxgb4/smt.c | 247 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/smt.h | 76 ++++++ drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 44 +++- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 47 ++++ 8 files changed, 519 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/smt.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/smt.h create mode 100644 drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 70d454379996..43c86b74dfb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ +cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \ cudbg_common.o cudbg_lib.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 4eaca05ebd3a..f7aa3516bf12 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -858,6 +858,7 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; + struct smt_data *smt; struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; @@ -1098,9 +1099,9 @@ struct filter_entry { u32 locked:1; /* filter is administratively locked */ u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ struct filter_ctx *ctx; /* Caller's completion hook */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct smt_entry *smt; /* Source Mac Table entry for smac */ struct net_device *dev; /* Associated net device */ u32 tid; /* This will store the actual tid */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 15361ca2857c..a8084be5e13b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -34,7 +34,9 @@ #include "cxgb4.h" #include "t4_regs.h" +#include "t4_tcb.h" #include "l2t.h" +#include "smt.h" #include "t4fw_api.h" #include "cxgb4_filter.h" @@ -332,6 +334,21 @@ int set_filter_wr(struct adapter *adapter, int fidx) } } + /* If the new filter requires loopback Source MAC rewriting then + * we need to allocate a SMT entry for the filter. + */ + if (f->fs.newsmac) { + f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac); + if (!f->smt) { + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + kfree_skb(skb); + return -ENOMEM; + } + } + fwr = __skb_put_zero(skb, sizeof(*fwr)); /* It would be nice to put most of the following in t4_hw.c but most @@ -357,7 +374,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || @@ -404,8 +420,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->lpm = htons(f->fs.mask.lport); fwr->fp = htons(f->fs.val.fport); fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -463,6 +477,9 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) if (f->l2t) cxgb4_l2t_release(f->l2t); + if (f->smt) + cxgb4_smt_release(f->smt); + /* The zeroing of the filter rule below clears the filter valid, * pending, locked flags, l2t pointer, etc. so it's all we need for * this operation. @@ -757,6 +774,62 @@ out: return ret; } +static int set_tcb_field(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, u16 word, u64 mask, u64 val, + int no_reply) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | + QUEUENO_V(adap->sge.fw_evtq.abs_id) | + NO_REPLY_V(no_reply)); + req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adap, skb); + return 0; +} + +/* Set one of the t_flags bits in the TCB. + */ +static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, unsigned int bit_pos, + unsigned int val, int no_reply) +{ + return set_tcb_field(adap, f, ftid, TCB_T_FLAGS_W, 1ULL << bit_pos, + (unsigned long long)val << bit_pos, no_reply); +} + +static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) +{ + int err; + + /* do a set-tcb for smac-sel and CWR bit.. */ + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); + if (err) + goto smac_err; + + err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, + TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), + TCB_SMAC_SEL_V(f->smt->idx), 1); + if (!err) + return 0; + +smac_err: + dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n", + f->tid, err); + return err; +} + /* Handle a filter write/deletion reply. */ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) { @@ -795,19 +868,23 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) clear_filter(adap, f); if (ctx) ctx->result = 0; - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - if (ctx) - ctx->result = -ENOMEM; } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - if (ctx) { - ctx->result = 0; - ctx->tid = idx; + int err = 0; + + if (f->fs.newsmac) + err = configure_filter_smac(adap, f); + + if (!err) { + f->pending = 0; /* async setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } + } else { + clear_filter(adap, f); + if (ctx) + ctx->result = err; } } else { /* Something went wrong. Issue a warning about the diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 8d97ae6039aa..796b37de464f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -77,6 +77,7 @@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "smt.h" #include "sched.h" #include "cxgb4_tc_u32.h" #include "cxgb4_tc_flower.h" @@ -563,6 +564,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SMT_WRITE_RPL) { + const struct cpl_smt_write_rpl *p = (void *)rsp; + + do_smt_write_rpl(q->adap, p); } else if (opcode == CPL_SET_TCB_RPL) { const struct cpl_set_tcb_rpl *p = (void *)rsp; @@ -4641,6 +4646,7 @@ static void free_some_resources(struct adapter *adapter) { unsigned int i; + kvfree(adapter->smt); kvfree(adapter->l2t); t4_cleanup_sched(adapter); kvfree(adapter->tids.tid_tab); @@ -5067,6 +5073,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) */ cfg_queues(adapter); + adapter->smt = t4_init_smt(); + if (!adapter->smt) { + /* We tolerate a lack of SMT, giving up some functionality */ + dev_warn(&pdev->dev, "could not allocate SMT, continuing\n"); + } + adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end); if (!adapter->l2t) { /* We tolerate a lack of L2T, giving up some functionality */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c new file mode 100644 index 000000000000..7b2207a2a130 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c @@ -0,0 +1,247 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "smt.h" +#include "t4_msg.h" +#include "t4fw_api.h" +#include "t4_regs.h" +#include "t4_values.h" + +struct smt_data *t4_init_smt(void) +{ + unsigned int smt_size; + struct smt_data *s; + int i; + + smt_size = SMT_SIZE; + + s = kvzalloc(sizeof(*s) + smt_size * sizeof(struct smt_entry), + GFP_KERNEL); + if (!s) + return NULL; + s->smt_size = smt_size; + rwlock_init(&s->lock); + for (i = 0; i < s->smt_size; ++i) { + s->smtab[i].idx = i; + s->smtab[i].state = SMT_STATE_UNUSED; + memset(&s->smtab[i].src_mac, 0, ETH_ALEN); + spin_lock_init(&s->smtab[i].lock); + atomic_set(&s->smtab[i].refcnt, 0); + } + return s; +} + +static struct smt_entry *find_or_alloc_smte(struct smt_data *s, u8 *smac) +{ + struct smt_entry *first_free = NULL; + struct smt_entry *e, *end; + + for (e = &s->smtab[0], end = &s->smtab[s->smt_size]; e != end; ++e) { + if (atomic_read(&e->refcnt) == 0) { + if (!first_free) + first_free = e; + } else { + if (e->state == SMT_STATE_SWITCHING) { + /* This entry is actually in use. See if we can + * re-use it ? + */ + if (memcmp(e->src_mac, smac, ETH_ALEN) == 0) + goto found_reuse; + } + } + } + + if (first_free) { + e = first_free; + goto found; + } + return NULL; + +found: + e->state = SMT_STATE_UNUSED; + +found_reuse: + return e; +} + +static void t4_smte_free(struct smt_entry *e) +{ + spin_lock_bh(&e->lock); + if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ + e->state = SMT_STATE_UNUSED; + } + spin_unlock_bh(&e->lock); +} + +/** + * @e: smt entry to release + * + * Releases ref count and frees up an smt entry from SMT table + */ +void cxgb4_smt_release(struct smt_entry *e) +{ + if (atomic_dec_and_test(&e->refcnt)) + t4_smte_free(e); +} +EXPORT_SYMBOL(cxgb4_smt_release); + +void do_smt_write_rpl(struct adapter *adap, const struct cpl_smt_write_rpl *rpl) +{ + unsigned int smtidx = TID_TID_G(GET_TID(rpl)); + struct smt_data *s = adap->smt; + + if (unlikely(rpl->status != CPL_ERR_NONE)) { + struct smt_entry *e = &s->smtab[smtidx]; + + dev_err(adap->pdev_dev, + "Unexpected SMT_WRITE_RPL status %u for entry %u\n", + rpl->status, smtidx); + spin_lock(&e->lock); + e->state = SMT_STATE_ERROR; + spin_unlock(&e->lock); + return; + } +} + +static int write_smt_entry(struct adapter *adapter, struct smt_entry *e) +{ + struct cpl_t6_smt_write_req *t6req; + struct smt_data *s = adapter->smt; + struct cpl_smt_write_req *req; + struct sk_buff *skb; + int size; + u8 row; + + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) { + size = sizeof(*req); + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + /* Source MAC Table (SMT) contains 256 SMAC entries + * organized in 128 rows of 2 entries each. + */ + req = (struct cpl_smt_write_req *)__skb_put(skb, size); + INIT_TP_WR(req, 0); + + /* Each row contains an SMAC pair. + * LSB selects the SMAC entry within a row + */ + row = (e->idx >> 1); + if (e->idx & 1) { + req->pfvf1 = 0x0; + memcpy(req->src_mac1, e->src_mac, ETH_ALEN); + + /* fill pfvf0/src_mac0 with entry + * at prev index from smt-tab. + */ + req->pfvf0 = 0x0; + memcpy(req->src_mac0, s->smtab[e->idx - 1].src_mac, + ETH_ALEN); + } else { + req->pfvf0 = 0x0; + memcpy(req->src_mac0, e->src_mac, ETH_ALEN); + + /* fill pfvf1/src_mac1 with entry + * at next index from smt-tab + */ + req->pfvf1 = 0x0; + memcpy(req->src_mac1, s->smtab[e->idx + 1].src_mac, + ETH_ALEN); + } + } else { + size = sizeof(*t6req); + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + /* Source MAC Table (SMT) contains 256 SMAC entries */ + t6req = (struct cpl_t6_smt_write_req *)__skb_put(skb, size); + INIT_TP_WR(t6req, 0); + req = (struct cpl_smt_write_req *)t6req; + + /* fill pfvf0/src_mac0 from smt-tab */ + req->pfvf0 = 0x0; + memcpy(req->src_mac0, s->smtab[e->idx].src_mac, ETH_ALEN); + row = e->idx; + } + + OPCODE_TID(req) = + htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, e->idx | + TID_QID_V(adapter->sge.fw_evtq.abs_id))); + req->params = htonl(SMTW_NORPL_V(0) | + SMTW_IDX_V(row) | + SMTW_OVLAN_IDX_V(0)); + t4_mgmt_tx(adapter, skb); + return 0; +} + +static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf, + u8 *smac) +{ + struct smt_data *s = adap->smt; + struct smt_entry *e; + + write_lock_bh(&s->lock); + e = find_or_alloc_smte(s, smac); + if (e) { + spin_lock(&e->lock); + if (!atomic_read(&e->refcnt)) { + atomic_set(&e->refcnt, 1); + e->state = SMT_STATE_SWITCHING; + e->pfvf = pfvf; + memcpy(e->src_mac, smac, ETH_ALEN); + write_smt_entry(adap, e); + } else { + atomic_inc(&e->refcnt); + } + spin_unlock(&e->lock); + } + write_unlock_bh(&s->lock); + return e; +} + +/** + * @dev: net_device pointer + * @smac: MAC address to add to SMT + * Returns pointer to the SMT entry created + * + * Allocates an SMT entry to be used by switching rule of a filter. + */ +struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac) +{ + struct adapter *adap = netdev2adap(dev); + + return t4_smt_alloc_switching(adap, 0x0, smac); +} +EXPORT_SYMBOL(cxgb4_smt_alloc_switching); diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h new file mode 100644 index 000000000000..d6c2cc271398 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h @@ -0,0 +1,76 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_SMT_H +#define __CXGB4_SMT_H + +#include +#include +#include + +struct adapter; +struct cpl_smt_write_rpl; + +/* SMT related handling. Heavily adapted based on l2t ops in l2t.h/l2t.c + */ +enum { + SMT_STATE_SWITCHING, + SMT_STATE_UNUSED, + SMT_STATE_ERROR +}; + +enum { + SMT_SIZE = 256 +}; + +struct smt_entry { + u16 state; + u16 idx; + u16 pfvf; + u8 src_mac[ETH_ALEN]; + atomic_t refcnt; + spinlock_t lock; /* protect smt entry add,removal */ +}; + +struct smt_data { + unsigned int smt_size; + rwlock_t lock; + struct smt_entry smtab[0]; +}; + +struct smt_data *t4_init_smt(void); +struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac); +void cxgb4_smt_release(struct smt_entry *e); +void do_smt_write_rpl(struct adapter *p, const struct cpl_smt_write_rpl *rpl); +#endif /* __CXGB4_SMT_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b0ff78da8aa2..ce4838d907da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -50,6 +50,7 @@ enum { CPL_RX_DATA_ACK = 0xD, CPL_TX_PKT = 0xE, CPL_L2T_WRITE_REQ = 0x12, + CPL_SMT_WRITE_REQ = 0x14, CPL_TID_RELEASE = 0x1A, CPL_TX_DATA_ISO = 0x1F, @@ -60,6 +61,7 @@ enum { CPL_PEER_CLOSE = 0x26, CPL_ABORT_REQ_RSS = 0x2B, CPL_ABORT_RPL_RSS = 0x2D, + CPL_SMT_WRITE_RPL = 0x2E, CPL_RX_PHYS_ADDR = 0x30, CPL_CLOSE_CON_RPL = 0x32, @@ -681,8 +683,8 @@ struct cpl_set_tcb_field { }; /* cpl_set_tcb_field.word_cookie fields */ -#define TCB_WORD_S 0 -#define TCB_WORD(x) ((x) << TCB_WORD_S) +#define TCB_WORD_S 0 +#define TCB_WORD_V(x) ((x) << TCB_WORD_S) #define TCB_COOKIE_S 5 #define TCB_COOKIE_M 0x7 @@ -1266,6 +1268,44 @@ struct cpl_l2t_write_rpl { u8 rsvd[3]; }; +struct cpl_smt_write_req { + WR_HDR; + union opcode_tid ot; + __be32 params; + __be16 pfvf1; + u8 src_mac1[6]; + __be16 pfvf0; + u8 src_mac0[6]; +}; + +struct cpl_t6_smt_write_req { + WR_HDR; + union opcode_tid ot; + __be32 params; + __be64 tag; + __be16 pfvf0; + u8 src_mac0[6]; + __be32 local_ip; + __be32 rsvd; +}; + +struct cpl_smt_write_rpl { + union opcode_tid ot; + u8 status; + u8 rsvd[3]; +}; + +/* cpl_smt_{read,write}_req.params fields */ +#define SMTW_OVLAN_IDX_S 16 +#define SMTW_OVLAN_IDX_V(x) ((x) << SMTW_OVLAN_IDX_S) + +#define SMTW_IDX_S 20 +#define SMTW_IDX_V(x) ((x) << SMTW_IDX_S) + +#define SMTW_NORPL_S 31 +#define SMTW_NORPL_V(x) ((x) << SMTW_NORPL_S) +#define SMTW_NORPL_F SMTW_NORPL_V(1U) + struct cpl_rdma_terminate { union opcode_tid ot; __be16 rsvd; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h new file mode 100644 index 000000000000..c1c76663034d --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -0,0 +1,47 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __T4_TCB_H +#define __T4_TCB_H + +#define TCB_SMAC_SEL_W 0 +#define TCB_SMAC_SEL_S 24 +#define TCB_SMAC_SEL_M 0xffULL +#define TCB_SMAC_SEL_V(x) ((x) << TCB_SMAC_SEL_S) + +#define TCB_T_FLAGS_W 1 + +#define TF_CCTRL_CWR_S 61 + +#endif /* __T4_TCB_H */ From 202187c34c7e3efd9662a25977cddef6e7dec572 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:12 +0530 Subject: [PATCH 1185/2177] cxgb4: add tc flower support for ETH-SMAC rewrite Adds support for ETH-SMAC rewrite via TC-PEDIT action. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 10 ++++++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 7c8b0c65934c..34d67a2a86f4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -45,6 +45,8 @@ struct ch_tc_pedit_fields pedits[] = { PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0), PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4), + PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0), + PEDIT_FIELDS(ETH_, SMAC_47_16, 4, smac, 2), }; static struct ch_tc_flower_entry *allocate_flower_entry(void) @@ -291,6 +293,13 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, case PEDIT_ETH_DMAC_47_32_SMAC_15_0: if (~mask & PEDIT_ETH_DMAC_MASK) offload_pedit(fs, val, mask, ETH_DMAC_47_32); + else + offload_pedit(fs, val >> 16, mask >> 16, + ETH_SMAC_15_0); + break; + case PEDIT_ETH_SMAC_47_16: + fs->newsmac = 1; + offload_pedit(fs, val, mask, ETH_SMAC_47_16); } } } @@ -440,6 +449,7 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, switch (offset) { case PEDIT_ETH_DMAC_31_0: case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + case PEDIT_ETH_SMAC_47_16: break; default: netdev_err(dev, "%s: Unsupported pedit field\n", diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 7bf6cfa892aa..a2acb782918f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -57,6 +57,8 @@ struct ch_tc_flower_entry { enum { ETH_DMAC_31_0, /* dmac bits 0.. 31 */ ETH_DMAC_47_32, /* dmac bits 32..47 */ + ETH_SMAC_15_0, /* smac bits 0.. 15 */ + ETH_SMAC_47_16, /* smac bits 16..47 */ }; struct ch_tc_pedit_fields { @@ -72,6 +74,7 @@ struct ch_tc_pedit_fields { #define PEDIT_ETH_DMAC_MASK 0xffff #define PEDIT_ETH_DMAC_31_0 0x0 #define PEDIT_ETH_DMAC_47_32_SMAC_15_0 0x4 +#define PEDIT_ETH_SMAC_47_16 0x8 int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls); From 0ff909946155ed1af2ec8feed3c1bac485201683 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:13 +0530 Subject: [PATCH 1186/2177] cxgb4: introduce fw_filter2_wr to prepare for L3/L4 rewrite support Update driver to use new fw_filter2_wr in order to support rewrite of L3/L4 header fields via filters. Query FW_PARAMS_PARAM_DEV_FILTER2_WR to check whether FW supports this new wr. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 10 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 19 ++++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 +++ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 75 ++++++++++++++++++- 4 files changed, 111 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index f7aa3516bf12..6a1c0b1fe8d0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -367,6 +367,7 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */ u8 fw_caps_support; /* 32-bit Port Capabilities */ + bool filter2_wr_support; /* FW support for FILTER2_WR */ /* MPS Buffer Group Map[per Port]. Bit i is set if buffer group i is * used by the Port @@ -1064,10 +1065,19 @@ struct ch_filter_specification { uint32_t newdmac:1; /* rewrite destination MAC address */ uint32_t newsmac:1; /* rewrite source MAC address */ uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint32_t nat_mode:3; /* specify NAT operation mode */ uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ uint8_t smac[ETH_ALEN]; /* new source MAC address */ uint16_t vlan; /* VLAN Tag to insert */ + u8 nat_lip[16]; /* local IP to use after NAT'ing */ + u8 nat_fip[16]; /* foreign IP to use after NAT'ing */ + u16 nat_lport; /* local port to use after NAT'ing */ + u16 nat_fport; /* foreign port to use after NAT'ing */ + + /* reservation for future additions */ + u8 rsvd[24]; + /* Filter rule value/mask pairs. */ struct ch_filter_tuple val; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index a8084be5e13b..89272f29f807 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -313,7 +313,7 @@ static int del_filter_wr(struct adapter *adapter, int fidx) int set_filter_wr(struct adapter *adapter, int fidx) { struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct fw_filter_wr *fwr; + struct fw_filter2_wr *fwr; struct sk_buff *skb; skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); @@ -359,7 +359,10 @@ int set_filter_wr(struct adapter *adapter, int fidx) * filter specification structure but for now it's easiest to simply * put this fairly direct code in line ... */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + if (adapter->params.filter2_wr_support) + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER2_WR)); + else + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); fwr->tid_to_iq = htonl(FW_FILTER_WR_TID_V(f->tid) | @@ -421,6 +424,18 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->fp = htons(f->fs.val.fport); fwr->fpm = htons(f->fs.mask.fport); + if (adapter->params.filter2_wr_support) { + fwr->natmode_to_ulp_type = + FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : + ULP_MODE_NONE) | + FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode); + memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip)); + memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip)); + fwr->newlport = htons(f->fs.nat_lport); + fwr->newfport = htons(f->fs.nat_fport); + } + /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 796b37de464f..c478291db93f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3910,6 +3910,16 @@ static int adap_init0(struct adapter *adap) 1, params, val); adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0); + /* See if FW supports FW_FILTER2 work request */ + if (is_t4(adap->params.chip)) { + adap->params.filter2_wr_support = 0; + } else { + params[0] = FW_PARAM_DEV(FILTER2_WR); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + adap->params.filter2_wr_support = (ret == 0 && val[0] != 0); + } + /* * Get device capabilities so we can determine what resources we need * to manage. diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ca2756dcefc5..875d4a72b3ef 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -105,7 +105,8 @@ enum fw_wr_opcodes { FW_ISCSI_TX_DATA_WR = 0x45, FW_PTP_TX_PKT_WR = 0x46, FW_CRYPTO_LOOKASIDE_WR = 0X6d, - FW_LASTC2E_WR = 0x70 + FW_LASTC2E_WR = 0x70, + FW_FILTER2_WR = 0x77 }; struct fw_wr_hdr { @@ -201,6 +202,51 @@ struct fw_filter_wr { __u8 sma[6]; }; +struct fw_filter2_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; + __be16 r8; + __u8 filter_type_swapmac; + __u8 natmode_to_ulp_type; + __be16 newlport; + __be16 newfport; + __u8 newlip[16]; + __u8 newfip[16]; + __be32 natseqcheck; + __be32 r9; + __be64 r10; + __be64 r11; + __be64 r12; + __be64 r13; +}; + #define FW_FILTER_WR_TID_S 12 #define FW_FILTER_WR_TID_M 0xfffff #define FW_FILTER_WR_TID_V(x) ((x) << FW_FILTER_WR_TID_S) @@ -385,6 +431,32 @@ struct fw_filter_wr { #define FW_FILTER_WR_RX_RPL_IQ_G(x) \ (((x) >> FW_FILTER_WR_RX_RPL_IQ_S) & FW_FILTER_WR_RX_RPL_IQ_M) +#define FW_FILTER2_WR_FILTER_TYPE_S 1 +#define FW_FILTER2_WR_FILTER_TYPE_M 0x1 +#define FW_FILTER2_WR_FILTER_TYPE_V(x) ((x) << FW_FILTER2_WR_FILTER_TYPE_S) +#define FW_FILTER2_WR_FILTER_TYPE_G(x) \ + (((x) >> FW_FILTER2_WR_FILTER_TYPE_S) & FW_FILTER2_WR_FILTER_TYPE_M) +#define FW_FILTER2_WR_FILTER_TYPE_F FW_FILTER2_WR_FILTER_TYPE_V(1U) + +#define FW_FILTER2_WR_NATMODE_S 5 +#define FW_FILTER2_WR_NATMODE_M 0x7 +#define FW_FILTER2_WR_NATMODE_V(x) ((x) << FW_FILTER2_WR_NATMODE_S) +#define FW_FILTER2_WR_NATMODE_G(x) \ + (((x) >> FW_FILTER2_WR_NATMODE_S) & FW_FILTER2_WR_NATMODE_M) + +#define FW_FILTER2_WR_NATFLAGCHECK_S 4 +#define FW_FILTER2_WR_NATFLAGCHECK_M 0x1 +#define FW_FILTER2_WR_NATFLAGCHECK_V(x) ((x) << FW_FILTER2_WR_NATFLAGCHECK_S) +#define FW_FILTER2_WR_NATFLAGCHECK_G(x) \ + (((x) >> FW_FILTER2_WR_NATFLAGCHECK_S) & FW_FILTER2_WR_NATFLAGCHECK_M) +#define FW_FILTER2_WR_NATFLAGCHECK_F FW_FILTER2_WR_NATFLAGCHECK_V(1U) + +#define FW_FILTER2_WR_ULP_TYPE_S 0 +#define FW_FILTER2_WR_ULP_TYPE_M 0xf +#define FW_FILTER2_WR_ULP_TYPE_V(x) ((x) << FW_FILTER2_WR_ULP_TYPE_S) +#define FW_FILTER2_WR_ULP_TYPE_G(x) \ + (((x) >> FW_FILTER2_WR_ULP_TYPE_S) & FW_FILTER2_WR_ULP_TYPE_M) + #define FW_FILTER_WR_MACI_S 23 #define FW_FILTER_WR_MACI_M 0x1ff #define FW_FILTER_WR_MACI_V(x) ((x) << FW_FILTER_WR_MACI_S) @@ -1127,6 +1199,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A, FW_PARAMS_PARAM_DEV_VPDREV = 0x1B, FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, + FW_PARAMS_PARAM_DEV_FILTER2_WR = 0x1D, FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E, }; From 557ccbf9dfa8de133b9247af42f0c5760bb103f0 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Oct 2017 20:49:14 +0530 Subject: [PATCH 1187/2177] cxgb4: add tc flower support for L3/L4 rewrite Adds support to rewrite L3/L4 fields via TC-PEDIT action. Supported fields for rewrite are: IPv4 src/dst address, IPv6 src/dst address, TCP/UDP sport/dport. Also, process match fields first and then process the action items. Refactor pedit action validation to separate function to avoid excessive code indentation. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 + .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 245 +++++++++++++++--- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 32 +++ 3 files changed, 244 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6a1c0b1fe8d0..92a0b022687e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1097,6 +1097,10 @@ enum { VLAN_REWRITE }; +enum { + NAT_MODE_ALL = 7, /* NAT on entire 4-tuple */ +}; + /* Host shadow copy of ingress filter entry. This is in host native format * and doesn't match the ordering or bit order, etc. of the hardware of the * firmware command. The use of bit-field structure elements is purely to diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 34d67a2a86f4..892dfce1fa63 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -47,6 +47,20 @@ struct ch_tc_pedit_fields pedits[] = { PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4), PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0), PEDIT_FIELDS(ETH_, SMAC_47_16, 4, smac, 2), + PEDIT_FIELDS(IP4_, SRC, 4, nat_fip, 0), + PEDIT_FIELDS(IP4_, DST, 4, nat_lip, 0), + PEDIT_FIELDS(IP6_, SRC_31_0, 4, nat_fip, 0), + PEDIT_FIELDS(IP6_, SRC_63_32, 4, nat_fip, 4), + PEDIT_FIELDS(IP6_, SRC_95_64, 4, nat_fip, 8), + PEDIT_FIELDS(IP6_, SRC_127_96, 4, nat_fip, 12), + PEDIT_FIELDS(IP6_, DST_31_0, 4, nat_lip, 0), + PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4), + PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8), + PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12), + PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0), + PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0), + PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0), + PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0), }; static struct ch_tc_flower_entry *allocate_flower_entry(void) @@ -121,6 +135,11 @@ static void cxgb4_process_flow_match(struct net_device *dev, memcpy(&fs->val.fip[0], &key->src, sizeof(key->src)); memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst)); memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src)); + + /* also initialize nat_lip/fip to same values */ + memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst)); + memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src)); + } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { @@ -138,6 +157,10 @@ static void cxgb4_process_flow_match(struct net_device *dev, memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src)); memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst)); memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src)); + + /* also initialize nat_lip/fip to same values */ + memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst)); + memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src)); } if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) { @@ -153,6 +176,10 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->mask.lport = cpu_to_be16(mask->dst); fs->val.fport = cpu_to_be16(key->src); fs->mask.fport = cpu_to_be16(mask->src); + + /* also initialize nat_lport/fport to same values */ + fs->nat_lport = cpu_to_be16(key->dst); + fs->nat_fport = cpu_to_be16(key->src); } if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { @@ -301,6 +328,70 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, fs->newsmac = 1; offload_pedit(fs, val, mask, ETH_SMAC_47_16); } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + switch (offset) { + case PEDIT_IP4_SRC: + offload_pedit(fs, val, mask, IP4_SRC); + break; + case PEDIT_IP4_DST: + offload_pedit(fs, val, mask, IP4_DST); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + switch (offset) { + case PEDIT_IP6_SRC_31_0: + offload_pedit(fs, val, mask, IP6_SRC_31_0); + break; + case PEDIT_IP6_SRC_63_32: + offload_pedit(fs, val, mask, IP6_SRC_63_32); + break; + case PEDIT_IP6_SRC_95_64: + offload_pedit(fs, val, mask, IP6_SRC_95_64); + break; + case PEDIT_IP6_SRC_127_96: + offload_pedit(fs, val, mask, IP6_SRC_127_96); + break; + case PEDIT_IP6_DST_31_0: + offload_pedit(fs, val, mask, IP6_DST_31_0); + break; + case PEDIT_IP6_DST_63_32: + offload_pedit(fs, val, mask, IP6_DST_63_32); + break; + case PEDIT_IP6_DST_95_64: + offload_pedit(fs, val, mask, IP6_DST_95_64); + break; + case PEDIT_IP6_DST_127_96: + offload_pedit(fs, val, mask, IP6_DST_127_96); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + switch (offset) { + case PEDIT_TCP_SPORT_DPORT: + if (~mask & PEDIT_TCP_UDP_SPORT_MASK) + offload_pedit(fs, cpu_to_be32(val) >> 16, + cpu_to_be32(mask) >> 16, + TCP_SPORT); + else + offload_pedit(fs, cpu_to_be32(val), + cpu_to_be32(mask), TCP_DPORT); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + switch (offset) { + case PEDIT_UDP_SPORT_DPORT: + if (~mask & PEDIT_TCP_UDP_SPORT_MASK) + offload_pedit(fs, cpu_to_be32(val) >> 16, + cpu_to_be32(mask) >> 16, + UDP_SPORT); + else + offload_pedit(fs, cpu_to_be32(val), + cpu_to_be32(mask), UDP_DPORT); + } + fs->nat_mode = NAT_MODE_ALL; } } @@ -365,6 +456,119 @@ static void cxgb4_process_flow_actions(struct net_device *in, } } +static bool valid_l4_mask(u32 mask) +{ + u16 hi, lo; + + /* Either the upper 16-bits (SPORT) OR the lower + * 16-bits (DPORT) can be set, but NOT BOTH. + */ + hi = (mask >> 16) & 0xFFFF; + lo = mask & 0xFFFF; + + return hi && lo ? false : true; +} + +static bool valid_pedit_action(struct net_device *dev, + const struct tc_action *a) +{ + u32 mask, offset; + u8 cmd, htype; + int nkeys, i; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + mask = tcf_pedit_mask(a, i); + offset = tcf_pedit_offset(a, i); + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) { + netdev_err(dev, "%s: Unsupported pedit cmd\n", + __func__); + return false; + } + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + case PEDIT_ETH_SMAC_47_16: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + switch (offset) { + case PEDIT_IP4_SRC: + case PEDIT_IP4_DST: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + switch (offset) { + case PEDIT_IP6_SRC_31_0: + case PEDIT_IP6_SRC_63_32: + case PEDIT_IP6_SRC_95_64: + case PEDIT_IP6_SRC_127_96: + case PEDIT_IP6_DST_31_0: + case PEDIT_IP6_DST_63_32: + case PEDIT_IP6_DST_95_64: + case PEDIT_IP6_DST_127_96: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + switch (offset) { + case PEDIT_TCP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + switch (offset) { + case PEDIT_UDP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit type\n", + __func__); + return false; + } + } + return true; +} + static int cxgb4_validate_flow_actions(struct net_device *dev, struct tc_cls_flower_offload *cls) { @@ -426,43 +630,10 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, } act_vlan = true; } else if (is_tcf_pedit(a)) { - u32 mask, val, offset; - u8 cmd, htype; - int nkeys, i; + bool pedit_valid = valid_pedit_action(dev, a); - nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); - - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) { - netdev_err(dev, "%s: Unsupported pedit cmd\n", - __func__); - return -EOPNOTSUPP; - } - - switch (htype) { - case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - switch (offset) { - case PEDIT_ETH_DMAC_31_0: - case PEDIT_ETH_DMAC_47_32_SMAC_15_0: - case PEDIT_ETH_SMAC_47_16: - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); - return -EOPNOTSUPP; - } - break; - default: - netdev_err(dev, "%s: Unsupported pedit type\n", - __func__); - return -EOPNOTSUPP; - } - } + if (!pedit_valid) + return -EOPNOTSUPP; act_pedit = true; } else { netdev_err(dev, "%s: Unsupported action\n", __func__); @@ -503,8 +674,8 @@ int cxgb4_tc_flower_replace(struct net_device *dev, fs = &ch_flower->fs; fs->hitcnts = 1; - cxgb4_process_flow_actions(dev, cls, fs); cxgb4_process_flow_match(dev, cls, fs); + cxgb4_process_flow_actions(dev, cls, fs); fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); if (fidx < 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index a2acb782918f..202d5c9ec303 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -59,6 +59,25 @@ enum { ETH_DMAC_47_32, /* dmac bits 32..47 */ ETH_SMAC_15_0, /* smac bits 0.. 15 */ ETH_SMAC_47_16, /* smac bits 16..47 */ + + IP4_SRC, /* 32-bit IPv4 src */ + IP4_DST, /* 32-bit IPv4 dst */ + + IP6_SRC_31_0, /* src bits 0.. 31 */ + IP6_SRC_63_32, /* src bits 63.. 32 */ + IP6_SRC_95_64, /* src bits 95.. 64 */ + IP6_SRC_127_96, /* src bits 127..96 */ + + IP6_DST_31_0, /* dst bits 0.. 31 */ + IP6_DST_63_32, /* dst bits 63.. 32 */ + IP6_DST_95_64, /* dst bits 95.. 64 */ + IP6_DST_127_96, /* dst bits 127..96 */ + + TCP_SPORT, /* 16-bit TCP sport */ + TCP_DPORT, /* 16-bit TCP dport */ + + UDP_SPORT, /* 16-bit UDP sport */ + UDP_DPORT, /* 16-bit UDP dport */ }; struct ch_tc_pedit_fields { @@ -72,9 +91,22 @@ struct ch_tc_pedit_fields { offsetof(struct ch_filter_specification, fs_field) + (offset) } #define PEDIT_ETH_DMAC_MASK 0xffff +#define PEDIT_TCP_UDP_SPORT_MASK 0xffff #define PEDIT_ETH_DMAC_31_0 0x0 #define PEDIT_ETH_DMAC_47_32_SMAC_15_0 0x4 #define PEDIT_ETH_SMAC_47_16 0x8 +#define PEDIT_IP4_SRC 0xC +#define PEDIT_IP4_DST 0x10 +#define PEDIT_IP6_SRC_31_0 0x8 +#define PEDIT_IP6_SRC_63_32 0xC +#define PEDIT_IP6_SRC_95_64 0x10 +#define PEDIT_IP6_SRC_127_96 0x14 +#define PEDIT_IP6_DST_31_0 0x18 +#define PEDIT_IP6_DST_63_32 0x1C +#define PEDIT_IP6_DST_95_64 0x20 +#define PEDIT_IP6_DST_127_96 0x24 +#define PEDIT_TCP_SPORT_DPORT 0x0 +#define PEDIT_UDP_SPORT_DPORT 0x0 int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls); From 0843c092ee75bb375fbbb6b97a7c55e0069ae099 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Oct 2017 18:38:08 +0300 Subject: [PATCH 1188/2177] net/sched: Set the net-device for egress device instance Currently the netdevice field is not set and the egdev instance is not functional, fix that. Fixes: 3f55bdda8df ('net: sched: introduce per-egress action device callbacks') Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ac97db92ab68..c67b820a8307 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1300,6 +1300,7 @@ tcf_action_egdev_get(const struct net_device *dev) if (!egdev) return NULL; INIT_LIST_HEAD(&egdev->cb_list); + egdev->dev = dev; tan = net_generic(dev_net(dev), tcf_action_net_id); rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, tcf_action_egdev_ht_params); From ce34435641456a680309d84c85b48e004ef45f41 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:13 +0200 Subject: [PATCH 1189/2177] s390/qeth: rely on kernel for feature recovery When recovering a device, qeth needs to re-run the IPA commands that enable all previously active HW features. Instead of duplicating qeth_set_features(), let netdev_update_features() recover the missing HW features from dev->wanted_features. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 40 ++++++++++++++----------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 91fcadbede80..3b6aa0c48221 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -998,7 +998,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); -int qeth_recover_features(struct net_device *); +void qeth_recover_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); int qeth_vm_request_mac(struct qeth_card *card); int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 68e118f1202e..e10e5b8d476c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6403,32 +6403,29 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } -/* try to restore device features on a device after recovery */ -int qeth_recover_features(struct net_device *dev) +#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO) + +/** + * qeth_recover_features() - Restore device features after recovery + * @dev: the recovering net_device + * + * Caller must hold rtnl lock. + */ +void qeth_recover_features(struct net_device *dev) { + netdev_features_t features = dev->features; struct qeth_card *card = dev->ml_priv; - netdev_features_t recover = dev->features; - if (recover & NETIF_F_IP_CSUM) { - if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM)) - recover ^= NETIF_F_IP_CSUM; - } - if (recover & NETIF_F_RXCSUM) { - if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM)) - recover ^= NETIF_F_RXCSUM; - } - if (recover & NETIF_F_TSO) { - if (qeth_set_ipa_tso(card, 1)) - recover ^= NETIF_F_TSO; - } - - if (recover == dev->features) - return 0; + /* force-off any feature that needs an IPA sequence. + * netdev_update_features() will restart them. + */ + dev->features &= ~QETH_HW_FEATURES; + netdev_update_features(dev); + if (features == dev->features) + return; dev_warn(&card->gdev->dev, "Device recovery failed to restore all offload features\n"); - dev->features = recover; - return -EIO; } EXPORT_SYMBOL_GPL(qeth_recover_features); @@ -6485,8 +6482,7 @@ netdev_features_t qeth_fix_features(struct net_device *dev, /* if the card isn't up, remove features that require hw changes */ if (card->state == CARD_STATE_DOWN || card->state == CARD_STATE_RECOVER) - features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO); + features &= ~QETH_HW_FEATURES; QETH_DBF_HEX(SETUP, 2, &features, sizeof(features)); return features; } From dbb27af91d2afd820c7cb5fab9b7608225e22fea Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 18 Oct 2017 17:40:14 +0200 Subject: [PATCH 1190/2177] s390/drivers: use setup_timer Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/fsm.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index e5dea67f902e..8c14c6c3ad3d 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -170,9 +170,7 @@ fsm_addtimer(fsm_timer *this, int millisec, int event, void *arg) this->fi->name, this, millisec); #endif - init_timer(&this->tl); - this->tl.function = (void *)fsm_expire_timer; - this->tl.data = (long)this; + setup_timer(&this->tl, (void *)fsm_expire_timer, (long)this); this->expire_event = event; this->event_arg = arg; this->tl.expires = jiffies + (millisec * HZ) / 1000; @@ -191,9 +189,7 @@ fsm_modtimer(fsm_timer *this, int millisec, int event, void *arg) #endif del_timer(&this->tl); - init_timer(&this->tl); - this->tl.function = (void *)fsm_expire_timer; - this->tl.data = (long)this; + setup_timer(&this->tl, (void *)fsm_expire_timer, (long)this); this->expire_event = event; this->event_arg = arg; this->tl.expires = jiffies + (millisec * HZ) / 1000; From ed2e93efc3b8d34ba36b0f7b581f3828916a1da7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:15 +0200 Subject: [PATCH 1191/2177] s390/qeth: remove duplicated device matching With commit "s390/ccwgroup: tie a ccwgroup driver to its ccw driver", the ccwgroup core now ensures that a qeth group device only consists of ccw devices which are supported by qeth. Therefore remove qeth's internal device matching, and use .driver_info to determine the card type. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 14 ----------- drivers/s390/net/qeth_core_main.c | 40 +++++++------------------------ drivers/s390/net/qeth_core_mpc.h | 1 - drivers/s390/net/qeth_core_sys.c | 4 +--- 4 files changed, 9 insertions(+), 50 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 3b6aa0c48221..dbdf09a1299a 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -231,20 +231,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa, #define QETH_IDX_FUNC_LEVEL_OSD 0x0101 #define QETH_IDX_FUNC_LEVEL_IQD 0x4108 -#define QETH_MODELLIST_ARRAY \ - {{0x1731, 0x01, 0x1732, QETH_CARD_TYPE_OSD, QETH_MAX_QUEUES, 0}, \ - {0x1731, 0x05, 0x1732, QETH_CARD_TYPE_IQD, QETH_MAX_QUEUES, 0x103}, \ - {0x1731, 0x06, 0x1732, QETH_CARD_TYPE_OSN, QETH_MAX_QUEUES, 0}, \ - {0x1731, 0x02, 0x1732, QETH_CARD_TYPE_OSM, QETH_MAX_QUEUES, 0}, \ - {0x1731, 0x02, 0x1732, QETH_CARD_TYPE_OSX, QETH_MAX_QUEUES, 0}, \ - {0, 0, 0, 0, 0, 0} } -#define QETH_CU_TYPE_IND 0 -#define QETH_CU_MODEL_IND 1 -#define QETH_DEV_TYPE_IND 2 -#define QETH_DEV_MODEL_IND 3 -#define QETH_QUEUE_NO_IND 4 -#define QETH_MULTICAST_IND 5 - #define QETH_REAL_CARD 1 #define QETH_VLAN_CARD 2 #define QETH_BUFSIZE 4096 diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e10e5b8d476c..735b03cae727 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -52,7 +52,6 @@ EXPORT_SYMBOL_GPL(qeth_core_header_cache); static struct kmem_cache *qeth_qdio_outbuf_cache; static struct device *qeth_core_root_dev; -static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY; static struct lock_class_key qdio_out_skb_queue_key; static struct mutex qeth_mod_mutex; @@ -1386,6 +1385,7 @@ static void qeth_init_qdio_info(struct qeth_card *card) QETH_DBF_TEXT(SETUP, 4, "intqdinf"); atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); /* inbound */ + card->qdio.no_in_queues = 1; card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT; if (card->info.type == QETH_CARD_TYPE_IQD) card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT; @@ -1519,34 +1519,17 @@ out: return NULL; } -static int qeth_determine_card_type(struct qeth_card *card) +static void qeth_determine_card_type(struct qeth_card *card) { - int i = 0; - QETH_DBF_TEXT(SETUP, 2, "detcdtyp"); card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT; card->qdio.default_out_queue = QETH_DEFAULT_QUEUE; - while (known_devices[i][QETH_DEV_MODEL_IND]) { - if ((CARD_RDEV(card)->id.dev_type == - known_devices[i][QETH_DEV_TYPE_IND]) && - (CARD_RDEV(card)->id.dev_model == - known_devices[i][QETH_DEV_MODEL_IND])) { - card->info.type = known_devices[i][QETH_DEV_MODEL_IND]; - card->qdio.no_out_queues = - known_devices[i][QETH_QUEUE_NO_IND]; - card->qdio.no_in_queues = 1; - card->info.is_multicast_different = - known_devices[i][QETH_MULTICAST_IND]; - qeth_update_from_chp_desc(card); - return 0; - } - i++; - } - card->info.type = QETH_CARD_TYPE_UNKNOWN; - dev_err(&card->gdev->dev, "The adapter hardware is of an " - "unknown type\n"); - return -ENOENT; + card->info.type = CARD_RDEV(card)->id.driver_info; + card->qdio.no_out_queues = QETH_MAX_QUEUES; + if (card->info.type == QETH_CARD_TYPE_IQD) + card->info.is_multicast_different = 0x0103; + qeth_update_from_chp_desc(card); } static int qeth_clear_channel(struct qeth_channel *channel) @@ -2233,8 +2216,6 @@ static int qeth_cm_setup(struct qeth_card *card) static int qeth_get_initial_mtu_for_card(struct qeth_card *card) { switch (card->info.type) { - case QETH_CARD_TYPE_UNKNOWN: - return 1500; case QETH_CARD_TYPE_IQD: return card->info.max_mtu; case QETH_CARD_TYPE_OSD: @@ -2279,7 +2260,6 @@ static int qeth_mtu_is_valid(struct qeth_card *card, int mtu) return ((mtu >= 576) && (mtu <= card->info.max_mtu)); case QETH_CARD_TYPE_OSN: - case QETH_CARD_TYPE_UNKNOWN: default: return 1; } @@ -5724,11 +5704,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) gdev->cdev[1]->handler = qeth_irq; gdev->cdev[2]->handler = qeth_irq; - rc = qeth_determine_card_type(card); - if (rc) { - QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc); - goto err_card; - } + qeth_determine_card_type(card); rc = qeth_setup_card(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 6a7654df6e78..413f0f268b17 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -44,7 +44,6 @@ extern unsigned char IPA_PDU_HEADER[]; #define IPA_CMD_PRIM_VERSION_NO 0x01 enum qeth_card_types { - QETH_CARD_TYPE_UNKNOWN = 0, QETH_CARD_TYPE_OSD = 1, QETH_CARD_TYPE_IQD = 5, QETH_CARD_TYPE_OSN = 6, diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index d1ee9e30c68b..b22ed2a57acd 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -475,10 +475,8 @@ static ssize_t qeth_dev_isolation_store(struct device *dev, return -EINVAL; mutex_lock(&card->conf_mutex); - /* check for unknown, too, in case we do not yet know who we are */ if (card->info.type != QETH_CARD_TYPE_OSD && - card->info.type != QETH_CARD_TYPE_OSX && - card->info.type != QETH_CARD_TYPE_UNKNOWN) { + card->info.type != QETH_CARD_TYPE_OSX) { rc = -EOPNOTSUPP; dev_err(&card->gdev->dev, "Adapter does not " "support QDIO data connection isolation\n"); From 025d0dfcc1dd90e2489ab9c500c8f5efbe3ce82f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 18 Oct 2017 17:40:16 +0200 Subject: [PATCH 1192/2177] s390/qeth: use kstrtobool() in qeth_bridgeport_hostnotification_store() The sysfs enabled value is a boolean, so kstrtobool() is a better fit for parsing the input string since it does the range checking for us. Signed-off-by: Andy Shevchenko Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_sys.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 4608daedb204..470a4e5f3c62 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -146,18 +146,15 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - int rc = 0; - int enable; + bool enable; + int rc; if (!card) return -EINVAL; - if (sysfs_streq(buf, "0")) - enable = 0; - else if (sysfs_streq(buf, "1")) - enable = 1; - else - return -EINVAL; + rc = kstrtobool(buf, &enable); + if (rc) + return rc; mutex_lock(&card->conf_mutex); From 83cf79a2fec3cf499eb6cb9eb608656fc2a82776 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:17 +0200 Subject: [PATCH 1193/2177] s390/qeth: fix early exit from error path When the allocation of the addr buffer fails, we need to free our refcount on the inetdevice before returning. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 270ac9515f6b..aadd384316a3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1553,7 +1553,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1567,6 +1567,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in_dev_put(in_dev); } @@ -1591,7 +1592,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1606,6 +1607,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in6_dev_put(in6_dev); #endif /* CONFIG_QETH_IPV6 */ } From 6e6f472d92bd4ccd88a9a12e55b484ab417c168c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:18 +0200 Subject: [PATCH 1194/2177] s390/qeth: clean up initial MTU determination 1. Drop the support for Token Ring, 2. use the ETH_DATA_LEN macro for the default L2 MTU, 3. handle OSM via the default case (as OSM is L2-only), and 4. document why the L3 MTU is reduced. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 735b03cae727..acf16fa75c63 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2219,18 +2219,12 @@ static int qeth_get_initial_mtu_for_card(struct qeth_card *card) case QETH_CARD_TYPE_IQD: return card->info.max_mtu; case QETH_CARD_TYPE_OSD: - switch (card->info.link_type) { - case QETH_LINK_TYPE_HSTR: - case QETH_LINK_TYPE_LANE_TR: - return 2000; - default: - return card->options.layer2 ? 1500 : 1492; - } - case QETH_CARD_TYPE_OSM: case QETH_CARD_TYPE_OSX: - return card->options.layer2 ? 1500 : 1492; + if (!card->options.layer2) + return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */ + /* fall through */ default: - return 1500; + return ETH_DATA_LEN; } } From 857d8ee2608831197d9838c8331b87848d05da33 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:19 +0200 Subject: [PATCH 1195/2177] s390/qeth: don't verify device when setting MAC address There's no reason why l2_set_mac_address() should ever be called for a netdevice that's not owned by qeth. It's certainly not required for VLAN devices, which have their own netdev_ops. Also: 1) we don't do such validation for any of the other netdev_ops routines. 2) the code in question clearly has never been actually exercised; it's broken. After determining that the device is not owned by qeth, it would still use dev->ml_priv to write a qeth trace entry. Remove the check, and its helper that walked the global card list. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7b61c2ef4c74..87aa8c2ab5b0 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -38,24 +38,6 @@ static void qeth_l2_vnicc_init(struct qeth_card *card); static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, u32 *timeout); -static int qeth_l2_verify_dev(struct net_device *dev) -{ - struct qeth_card *card; - unsigned long flags; - int rc = 0; - - read_lock_irqsave(&qeth_core_card_list.rwlock, flags); - list_for_each_entry(card, &qeth_core_card_list.list, list) { - if (card->dev == dev) { - rc = QETH_REAL_CARD; - break; - } - } - read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags); - - return rc; -} - static struct net_device *qeth_l2_netdev_by_devno(unsigned char *read_dev_no) { struct qeth_card *card; @@ -568,11 +550,6 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) QETH_CARD_TEXT(card, 3, "setmac"); - if (qeth_l2_verify_dev(dev) != QETH_REAL_CARD) { - QETH_CARD_TEXT(card, 3, "setmcINV"); - return -EOPNOTSUPP; - } - if (card->info.type == QETH_CARD_TYPE_OSN || card->info.type == QETH_CARD_TYPE_OSM || card->info.type == QETH_CARD_TYPE_OSX) { From 9400c53f06b462959ee07594cb96429292031dc6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:20 +0200 Subject: [PATCH 1196/2177] s390/qeth: no VLAN support on OSM Instead of silently discarding VLAN registration requests on OSM, just indicate that this card type doesn't support VLAN. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 87aa8c2ab5b0..d2537c09126d 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -348,10 +348,6 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, QETH_CARD_TEXT_(card, 4, "aid:%d", vid); if (!vid) return 0; - if (card->info.type == QETH_CARD_TYPE_OSM) { - QETH_CARD_TEXT(card, 3, "aidOSM"); - return 0; - } if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { QETH_CARD_TEXT(card, 3, "aidREC"); return 0; @@ -381,10 +377,6 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, int rc = 0; QETH_CARD_TEXT_(card, 4, "kid:%d", vid); - if (card->info.type == QETH_CARD_TYPE_OSM) { - QETH_CARD_TEXT(card, 3, "kidOSM"); - return 0; - } if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { QETH_CARD_TEXT(card, 3, "kidREC"); return 0; @@ -1010,7 +1002,11 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) } else { card->dev->ethtool_ops = &qeth_l2_ethtool_ops; } - card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + if (card->info.type == QETH_CARD_TYPE_OSM) + card->dev->features |= NETIF_F_VLAN_CHALLENGED; + else + card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; card->dev->vlan_features = NETIF_F_SG; @@ -1120,8 +1116,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) goto out_remove; } - if (card->info.type != QETH_CARD_TYPE_OSN && - card->info.type != QETH_CARD_TYPE_OSM) + if (card->info.type != QETH_CARD_TYPE_OSN) qeth_l2_process_vlans(card); netif_tx_disable(card->dev); From b6f72f9698ab8bd79cd0b72a8d9f25dacd786563 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:21 +0200 Subject: [PATCH 1197/2177] s390/qeth: clean up page frag creation Replace the open-coded skb_add_rx_frag(), and use a fall-through to remove some duplicated code. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index acf16fa75c63..1cf4e066955f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5172,10 +5172,11 @@ EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card); static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer, struct qdio_buffer_element *element, - struct sk_buff **pskb, int offset, int *pfrag, - int data_len) + struct sk_buff **pskb, int offset, int data_len) { struct page *page = virt_to_page(element->addr); + unsigned int next_frag; + if (*pskb == NULL) { if (qethbuffer->rx_skb) { /* only if qeth_card.options.cq == QETH_CQ_ENABLED */ @@ -5190,28 +5191,19 @@ static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer, skb_reserve(*pskb, ETH_HLEN); if (data_len <= QETH_RX_PULL_LEN) { skb_put_data(*pskb, element->addr + offset, data_len); + return 0; } else { - get_page(page); skb_put_data(*pskb, element->addr + offset, QETH_RX_PULL_LEN); - skb_fill_page_desc(*pskb, *pfrag, page, - offset + QETH_RX_PULL_LEN, - data_len - QETH_RX_PULL_LEN); - (*pskb)->data_len += data_len - QETH_RX_PULL_LEN; - (*pskb)->len += data_len - QETH_RX_PULL_LEN; - (*pskb)->truesize += data_len - QETH_RX_PULL_LEN; - (*pfrag)++; + data_len -= QETH_RX_PULL_LEN; + offset += QETH_RX_PULL_LEN; + /* fall through to add page frag for remaining data */ } - } else { - get_page(page); - skb_fill_page_desc(*pskb, *pfrag, page, offset, data_len); - (*pskb)->data_len += data_len; - (*pskb)->len += data_len; - (*pskb)->truesize += data_len; - (*pfrag)++; } - + next_frag = skb_shinfo(*pskb)->nr_frags; + get_page(page); + skb_add_rx_frag(*pskb, next_frag, page, offset, data_len, data_len); return 0; } @@ -5234,7 +5226,6 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, int data_len; int headroom = 0; int use_rx_sg = 0; - int frag = 0; /* qeth_hdr must not cross element boundaries */ if (element->length < offset + sizeof(struct qeth_hdr)) { @@ -5286,7 +5277,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, if (data_len) { if (use_rx_sg) { if (qeth_create_skb_frag(qethbuffer, element, - &skb, offset, &frag, data_len)) + &skb, offset, data_len)) goto no_mem; } else { skb_put_data(skb, data_ptr, data_len); From 8d68af6af6b4f76eac4f9abf584b4ea356f638c0 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:22 +0200 Subject: [PATCH 1198/2177] s390/qeth: consolidate skb allocation Move the allocation of SG skbs into the main path. This allows for a little code sharing, and handling ENOMEM from within one place. As side effect, L2 SG skbs now get the proper amount of additional headroom (read: zero) instead of the hard-coded ETH_HLEN. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 71 ++++++++++++++----------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 1cf4e066955f..23b439fb5f2c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5170,41 +5170,27 @@ out: } EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card); -static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer, - struct qdio_buffer_element *element, - struct sk_buff **pskb, int offset, int data_len) +static void qeth_create_skb_frag(struct qdio_buffer_element *element, + struct sk_buff *skb, int offset, int data_len) { struct page *page = virt_to_page(element->addr); unsigned int next_frag; - if (*pskb == NULL) { - if (qethbuffer->rx_skb) { - /* only if qeth_card.options.cq == QETH_CQ_ENABLED */ - *pskb = qethbuffer->rx_skb; - qethbuffer->rx_skb = NULL; - } else { - *pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN); - if (!(*pskb)) - return -ENOMEM; - } + /* first fill the linear space */ + if (!skb->len) { + unsigned int linear = min(data_len, skb_tailroom(skb)); - skb_reserve(*pskb, ETH_HLEN); - if (data_len <= QETH_RX_PULL_LEN) { - skb_put_data(*pskb, element->addr + offset, data_len); - return 0; - } else { - skb_put_data(*pskb, element->addr + offset, - QETH_RX_PULL_LEN); - data_len -= QETH_RX_PULL_LEN; - offset += QETH_RX_PULL_LEN; - /* fall through to add page frag for remaining data */ - } + skb_put_data(skb, element->addr + offset, linear); + data_len -= linear; + if (!data_len) + return; + offset += linear; + /* fall through to add page frag for remaining data */ } - next_frag = skb_shinfo(*pskb)->nr_frags; + next_frag = skb_shinfo(skb)->nr_frags; get_page(page); - skb_add_rx_frag(*pskb, next_frag, page, offset, data_len, data_len); - return 0; + skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len); } static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale) @@ -5220,7 +5206,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, struct qdio_buffer_element *element = *__element; struct qdio_buffer *buffer = qethbuffer->buffer; int offset = *__offset; - struct sk_buff *skb = NULL; + struct sk_buff *skb; int skb_len = 0; void *data_ptr; int data_len; @@ -5261,27 +5247,32 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, if (((skb_len >= card->options.rx_sg_cb) && (!(card->info.type == QETH_CARD_TYPE_OSN)) && (!atomic_read(&card->force_alloc_skb))) || - (card->options.cq == QETH_CQ_ENABLED)) { + (card->options.cq == QETH_CQ_ENABLED)) use_rx_sg = 1; + + if (use_rx_sg && qethbuffer->rx_skb) { + /* QETH_CQ_ENABLED only: */ + skb = qethbuffer->rx_skb; + qethbuffer->rx_skb = NULL; } else { - skb = dev_alloc_skb(skb_len + headroom); - if (!skb) - goto no_mem; - if (headroom) - skb_reserve(skb, headroom); + unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len; + + skb = dev_alloc_skb(linear + headroom); } + if (!skb) + goto no_mem; + if (headroom) + skb_reserve(skb, headroom); data_ptr = element->addr + offset; while (skb_len) { data_len = min(skb_len, (int)(element->length - offset)); if (data_len) { - if (use_rx_sg) { - if (qeth_create_skb_frag(qethbuffer, element, - &skb, offset, data_len)) - goto no_mem; - } else { + if (use_rx_sg) + qeth_create_skb_frag(element, skb, offset, + data_len); + else skb_put_data(skb, data_ptr, data_len); - } } skb_len -= data_len; if (skb_len) { From 864c17c3d83be6a1f6a2888566e93e114986abca Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:23 +0200 Subject: [PATCH 1199/2177] s390/qeth: try harder to get packets from RX buffer Current code bails out when two subsequent buffer elements hold insufficient data to contain a qeth_hdr packet descriptor. This seems reasonable, but it would be legal for quirky hardware to leave a few elements empty and then present packets in a subsequent element. These packets would currently be dropped. So make sure to check all buffer elements, until we hit the LAST_ENTRY indication. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 23b439fb5f2c..8855bc8534d2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5214,13 +5214,11 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, int use_rx_sg = 0; /* qeth_hdr must not cross element boundaries */ - if (element->length < offset + sizeof(struct qeth_hdr)) { + while (element->length < offset + sizeof(struct qeth_hdr)) { if (qeth_is_last_sbale(element)) return NULL; element++; offset = 0; - if (element->length < sizeof(struct qeth_hdr)) - return NULL; } *hdr = element->addr + offset; From 978759e8266d6f778fd7f8ae0d4a5e2ddb0558f7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:24 +0200 Subject: [PATCH 1200/2177] s390/qeth: support GRO flush timer Switch to napi_complete_done(), and thus enable delayed GRO flushing. The timeout is configured via /sys/class/net//gro_flush_timeout. Default timeout is 0, so no change in behaviour. Signed-off-by: Julian Wiedmann Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 8855bc8534d2..98b7e1ce0dd1 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5370,7 +5370,7 @@ int qeth_poll(struct napi_struct *napi, int budget) } } - napi_complete(napi); + napi_complete_done(napi, work_done); if (qdio_start_irq(card->data.ccwdev, 0)) napi_schedule(&card->napi); out: From 52c44d2975433fab645d29f1b5e6c0c7e77dd342 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 18 Oct 2017 17:40:25 +0200 Subject: [PATCH 1201/2177] s390/qeth: don't dump control cmd twice A few lines down, qeth_prepare_control_data() makes further changes to the control cmd buffer, and then also writes a trace entry for it. So the first entry just pollutes the trace file with intermediate data, drop it. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 98b7e1ce0dd1..457a4b4e8212 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2073,7 +2073,6 @@ int qeth_send_control_data(struct qeth_card *card, int len, spin_lock_irqsave(&card->lock, flags); list_add_tail(&reply->list, &card->cmd_waiter_list); spin_unlock_irqrestore(&card->lock, flags); - QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN); while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ; qeth_prepare_control_data(card, len, iob); From f3d9832e56c48e4ca50bab0457e21bcaade4536d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:52 -0700 Subject: [PATCH 1202/2177] ipv6: addrconf: cleanup locking in ipv6_add_addr ipv6_add_addr is called in process context with rtnl lock held (e.g., manual config of an address) or during softirq processing (e.g., autoconf and address from a router advertisement). Currently, ipv6_add_addr calls rcu_read_lock_bh shortly after entry and does not call unlock until exit, minus the call around the address validator notifier. Similarly, addrconf_hash_lock is taken after the validator notifier and held until exit. This forces the allocation of inet6_ifaddr to always be atomic. Refactor ipv6_add_addr as follows: 1. add an input boolean to discriminate the call path (process context or softirq). This new flag controls whether the alloc can be done with GFP_KERNEL or GFP_ATOMIC. 2. Move the rcu_read_lock_bh and unlock calls only around functions that do rcu updates. 3. Remove the in6_dev_hold and put added by 3ad7d2468f79f ("Ipvlan should return an error when an address is already in use."). This was done presumably because rcu_read_unlock_bh needs to be called before calling the validator. Since rcu_read_lock is not needed before the validator runs revert the hold and put added by 3ad7d2468f79f and only do the hold when setting ifp->idev. 4. move duplicate address check and insertion of new address in the global address hash into a helper. The helper is called after an ifa is allocated and filled in. This allows the ifa for manually configured addresses to be done with GFP_KERNEL and reduces the overall amount of time with rcu_read_lock held and hash table spinlock held. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 104 +++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 44 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4603aa488f4f..a8d202b1b919 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -957,18 +957,43 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); } +static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) +{ + unsigned int hash; + int err = 0; + + spin_lock(&addrconf_hash_lock); + + /* Ignore adding duplicate addresses on an interface */ + if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev)) { + ADBG("ipv6_add_addr: already assigned\n"); + err = -EEXIST; + goto out; + } + + /* Add to big hash table */ + hash = inet6_addr_hash(&ifa->addr); + hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + +out: + spin_unlock(&addrconf_hash_lock); + + return err; +} + /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, - int scope, u32 flags, u32 valid_lft, u32 prefered_lft) + int scope, u32 flags, u32 valid_lft, u32 prefered_lft, + bool can_block) { + gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt; + struct rt6_info *rt = NULL; struct in6_validator_info i6vi; - unsigned int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -978,42 +1003,24 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, addr_type & IPV6_ADDR_LOOPBACK)) return ERR_PTR(-EADDRNOTAVAIL); - rcu_read_lock_bh(); - - in6_dev_hold(idev); - if (idev->dead) { err = -ENODEV; /*XXX*/ - goto out2; + goto out; } if (idev->cnf.disable_ipv6) { err = -EACCES; - goto out2; + goto out; } i6vi.i6vi_addr = *addr; i6vi.i6vi_dev = idev; - rcu_read_unlock_bh(); - err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); - - rcu_read_lock_bh(); err = notifier_to_errno(err); - if (err) - goto out2; - - spin_lock(&addrconf_hash_lock); - - /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { - ADBG("ipv6_add_addr: already assigned\n"); - err = -EEXIST; + if (err < 0) goto out; - } - - ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); + ifa = kzalloc(sizeof(*ifa), gfp_flags); if (!ifa) { ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; @@ -1023,6 +1030,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, rt = addrconf_dst_alloc(idev, addr, false); if (IS_ERR(rt)) { err = PTR_ERR(rt); + rt = NULL; goto out; } @@ -1053,16 +1061,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->rt = rt; ifa->idev = idev; + in6_dev_hold(idev); + /* For caller */ refcount_set(&ifa->refcnt, 1); - /* Add to big hash table */ - hash = inet6_addr_hash(addr); + rcu_read_lock_bh(); - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); - spin_unlock(&addrconf_hash_lock); + err = ipv6_add_addr_hash(idev->dev, ifa); + if (err < 0) { + rcu_read_unlock_bh(); + goto out; + } write_lock(&idev->lock); + /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); @@ -1073,21 +1086,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, in6_ifa_hold(ifa); write_unlock(&idev->lock); -out2: + rcu_read_unlock_bh(); - if (likely(err == 0)) - inet6addr_notifier_call_chain(NETDEV_UP, ifa); - else { - kfree(ifa); - in6_dev_put(idev); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); +out: + if (unlikely(err < 0)) { + if (rt) + ip6_rt_put(rt); + if (ifa) { + if (ifa->idev) + in6_dev_put(ifa->idev); + kfree(ifa); + } ifa = ERR_PTR(err); } return ifa; -out: - spin_unlock(&addrconf_hash_lock); - goto out2; } enum cleanup_prefix_rt_t { @@ -1334,7 +1349,7 @@ retry: ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft); + tmp_valid_lft, tmp_prefered_lft, true); if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -2018,7 +2033,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, scope, flags, valid_lft, - preferred_lft); + preferred_lft, false); if (IS_ERR(ifp2)) goto lock_errdad; @@ -2476,7 +2491,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags, valid_lft, - prefered_lft); + prefered_lft, false); if (IS_ERR_OR_NULL(ifp)) return -1; @@ -2845,7 +2860,7 @@ static int inet6_addr_add(struct net *net, int ifindex, } ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, - valid_lft, prefered_lft); + valid_lft, prefered_lft, true); if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { @@ -2960,7 +2975,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, + true); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -3060,7 +3076,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev, #endif ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); From ff7883ea60e7b021bcd6539b8211879554c8db9a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:53 -0700 Subject: [PATCH 1203/2177] net: ipv6: Make inet6addr_validator a blocking notifier inet6addr_validator chain was added by commit 3ad7d2468f79f ("Ipvlan should return an error when an address is already in use") to allow address validation before changes are committed and to be able to fail the address change with an error back to the user. The address validation is not done for addresses received from router advertisements. Handling RAs in softirq context is the only reason for the notifier chain to be atomic versus blocking. Since the only current user, ipvlan, of the validator chain ignores softirq context, the notifier can be made blocking and simply not invoked for softirq path. The blocking option is needed by spectrum for example to validate resources for an adding an address to an interface. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 4 ---- net/ipv6/addrconf.c | 21 ++++++++++++++------- net/ipv6/addrconf_core.c | 9 +++++---- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 3cf67db513e2..6842739b6679 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -808,10 +808,6 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - /* FIXME IPv6 autoconf calls us from bh without RTNL */ - if (in_softirq()) - return NOTIFY_DONE; - if (!netif_is_ipvlan(dev)) return NOTIFY_DONE; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a8d202b1b919..dd9c0c435f71 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -993,7 +993,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt = NULL; - struct in6_validator_info i6vi; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -1013,12 +1012,20 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } - i6vi.i6vi_addr = *addr; - i6vi.i6vi_dev = idev; - err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); - err = notifier_to_errno(err); - if (err < 0) - goto out; + /* validator notifier needs to be blocking; + * do not call in atomic context + */ + if (can_block) { + struct in6_validator_info i6vi = { + .i6vi_addr = *addr, + .i6vi_dev = idev, + }; + + err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); + err = notifier_to_errno(err); + if (err < 0) + goto out; + } ifa = kzalloc(sizeof(*ifa), gfp_flags); if (!ifa) { diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 9e3488d50b15..32b564dfd02a 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr) EXPORT_SYMBOL(__ipv6_addr_type); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); +static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain); int register_inet6addr_notifier(struct notifier_block *nb) { @@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain); int register_inet6addr_validator_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); + return blocking_notifier_chain_register(&inet6addr_validator_chain, nb); } EXPORT_SYMBOL(register_inet6addr_validator_notifier); int unregister_inet6addr_validator_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); + return blocking_notifier_chain_unregister(&inet6addr_validator_chain, + nb); } EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) { - return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); + return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v); } EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); From de95e04791a03de5cb681980a3880db6919e3b4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:54 -0700 Subject: [PATCH 1204/2177] net: Add extack to validator_info structs used for address notifier Add extack to in_validator_info and in6_validator_info. Update the one user of each, ipvlan, to return an error message for failures. Only manual configuration of an address is plumbed in the IPv6 code path. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 10 ++++++++-- include/linux/inetdevice.h | 1 + include/net/addrconf.h | 1 + net/ipv4/devinet.c | 8 +++++--- net/ipv6/addrconf.c | 22 ++++++++++++---------- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 6842739b6679..f0ab55df57f1 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -847,8 +847,11 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, switch (event) { case NETDEV_UP: - if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) + if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { + NL_SET_ERR_MSG(i6vi->extack, + "Address already assigned to an ipvlan device"); return notifier_from_errno(-EADDRINUSE); + } break; } @@ -917,8 +920,11 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused, switch (event) { case NETDEV_UP: - if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) + if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { + NL_SET_ERR_MSG(ivi->extack, + "Address already assigned to an ipvlan device"); return notifier_from_errno(-EADDRINUSE); + } break; } diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 751d051f0bc7..681dff30940b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -154,6 +154,7 @@ struct in_ifaddr { struct in_validator_info { __be32 ivi_addr; struct in_device *ivi_dev; + struct netlink_ext_ack *extack; }; int register_inetaddr_notifier(struct notifier_block *nb); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 87981cd63180..b8b16437c6d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -55,6 +55,7 @@ struct prefix_info { struct in6_validator_info { struct in6_addr i6vi_addr; struct inet6_dev *i6vi_dev; + struct netlink_ext_ack *extack; }; #define IN6_ADDR_HSIZE_SHIFT 4 diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e1e2ec0525e6..a4573bccd6da 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -444,7 +444,7 @@ static void check_lifetime(struct work_struct *work); static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 portid) + u32 portid, struct netlink_ext_ack *extack) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -489,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, */ ivi.ivi_addr = ifa->ifa_address; ivi.ivi_dev = ifa->ifa_dev; + ivi.extack = extack; ret = blocking_notifier_call_chain(&inetaddr_validator_chain, NETDEV_UP, &ivi); ret = notifier_to_errno(ret); @@ -521,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, static int inet_insert_ifa(struct in_ifaddr *ifa) { - return __inet_insert_ifa(ifa, NULL, 0); + return __inet_insert_ifa(ifa, NULL, 0, NULL); } static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) @@ -902,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } } - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, + extack); } else { inet_free_ifa(ifa); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dd9c0c435f71..93f9c0a61911 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -987,7 +987,7 @@ static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags, u32 valid_lft, u32 prefered_lft, - bool can_block) + bool can_block, struct netlink_ext_ack *extack) { gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); @@ -1019,6 +1019,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct in6_validator_info i6vi = { .i6vi_addr = *addr, .i6vi_dev = idev, + .extack = extack, }; err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); @@ -1356,7 +1357,7 @@ retry: ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft, true); + tmp_valid_lft, tmp_prefered_lft, true, NULL); if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -2040,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, scope, flags, valid_lft, - preferred_lft, false); + preferred_lft, false, NULL); if (IS_ERR(ifp2)) goto lock_errdad; @@ -2498,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags, valid_lft, - prefered_lft, false); + prefered_lft, false, NULL); if (IS_ERR_OR_NULL(ifp)) return -1; @@ -2808,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, const struct in6_addr *peer_pfx, unsigned int plen, __u32 ifa_flags, - __u32 prefered_lft, __u32 valid_lft) + __u32 prefered_lft, __u32 valid_lft, + struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2867,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex, } ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, - valid_lft, prefered_lft, true); + valid_lft, prefered_lft, true, extack); if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { @@ -2952,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_lock(); err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL); rtnl_unlock(); return err; } @@ -2983,7 +2985,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, - true); + true, NULL); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -3083,7 +3085,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev, #endif ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); @@ -4586,7 +4588,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, - preferred_lft, valid_lft); + preferred_lft, valid_lft, extack); } if (nlh->nlmsg_flags & NLM_F_EXCL || From 89d5dd2efdb26c78ab83be59390386cc21f8dd71 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:55 -0700 Subject: [PATCH 1205/2177] mlxsw: spectrum: router: Add support for address validator notifier Add support for inetaddr_validator and inet6addr_validator. The notifiers provide a means for validating ipv4 and ipv6 addresses before the addresses are installed and on failure the error is propagated back to the user. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 15 +++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 52 +++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e1e11c726c16..e6519f2906a4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4521,9 +4521,16 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, return notifier_from_errno(err); } +static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_valid_event, +}; + static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_event, - .priority = 10, /* Must be called before FIB notifier block */ +}; + +static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_valid_event, }; static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { @@ -4548,7 +4555,9 @@ static int __init mlxsw_sp_module_init(void) { int err; + register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); @@ -4567,7 +4576,9 @@ err_pci_driver_register: err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -4577,7 +4588,9 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 28feb745a38a..2a2472a09d8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -391,8 +391,12 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_inet6addr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3330120f2f8e..66bab9ce2881 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5781,6 +5781,32 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, struct mlxsw_sp_rif *rif; int err = 0; + /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ + if (event == NETDEV_UP) + goto out; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event); +out: + return notifier_from_errno(err); +} + +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_validator_info *ivi = (struct in_validator_info *) ptr; + struct net_device *dev = ivi->ivi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) goto out; @@ -5833,6 +5859,10 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, struct mlxsw_sp_inet6addr_event_work *inet6addr_work; struct net_device *dev = if6->idev->dev; + /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */ + if (event == NETDEV_UP) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) return NOTIFY_DONE; @@ -5849,6 +5879,28 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, return NOTIFY_DONE; } +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; + struct net_device *dev = i6vi->i6vi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event); +out: + return notifier_from_errno(err); +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { From f8fa9b4e6da77311791c7150a6ecc9368396df3b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:56 -0700 Subject: [PATCH 1206/2177] mlxsw: spectrum_router: Add extack message for RIF and VRF overflow Add extack argument down to mlxsw_sp_rif_create and mlxsw_sp_vr_create to set an error message on RIF or VR overflow. Now on overflow of either resource the user gets an informative message as opposed to failing with EBUSY. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 114 +++++++++++------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 66bab9ce2881..2420f69797a9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -731,14 +731,17 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, } static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, - u32 tb_id) + u32 tb_id, + struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); - if (!vr) + if (!vr) { + NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); + } vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); @@ -775,14 +778,15 @@ static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) vr->fib4 = NULL; } -static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; tb_id = mlxsw_sp_fix_tb_id(tb_id); vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); if (!vr) - vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id); + vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack); return vr; } @@ -948,7 +952,8 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif_params *params); + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack); static struct mlxsw_sp_rif_ipip_lb * mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, @@ -966,7 +971,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; - rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common); + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, NULL); if (IS_ERR(rif)) return ERR_CAST(rif); return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); @@ -3836,7 +3841,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL); if (IS_ERR(vr)) return ERR_CAST(vr); fib = mlxsw_sp_vr_fib(vr, proto); @@ -4875,7 +4880,7 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -4908,7 +4913,7 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -5471,7 +5476,8 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif_params *params) + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) { u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; @@ -5485,14 +5491,16 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); ops = mlxsw_sp->router->rif_ops_arr[type]; - vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) return ERR_CAST(vr); vr->rif_count++; err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces"); goto err_rif_index_alloc; + } rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); if (!rif) { @@ -5579,7 +5587,8 @@ mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, static int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev) + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -5595,7 +5604,7 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, }; mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); - rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); } @@ -5650,7 +5659,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, struct net_device *port_dev, - unsigned long event, u16 vid) + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; @@ -5662,7 +5672,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, switch (event) { case NETDEV_UP: return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, - l3_dev); + l3_dev, extack); case NETDEV_DOWN: mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; @@ -5672,19 +5682,22 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev) || netif_is_ovs_port(port_dev)) return 0; - return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1); + return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1, + extack); } static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, struct net_device *lag_dev, - unsigned long event, u16 vid) + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) { struct net_device *port_dev; struct list_head *iter; @@ -5694,7 +5707,8 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, if (mlxsw_sp_port_dev_check(port_dev)) { err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev, port_dev, - event, vid); + event, vid, + extack); if (err) return err; } @@ -5704,16 +5718,19 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (netif_is_bridge_port(lag_dev)) return 0; - return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1, + extack); } static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); struct mlxsw_sp_rif_params params = { @@ -5723,7 +5740,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, switch (event) { case NETDEV_UP: - rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); break; @@ -5737,7 +5754,8 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); @@ -5747,27 +5765,28 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, - event, vid); + event, vid, extack); else if (netif_is_lag_master(real_dev)) return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, - vid); + vid, extack); else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event); + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack); return 0; } static int __mlxsw_sp_inetaddr_event(struct net_device *dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_inetaddr_port_event(dev, event); + return mlxsw_sp_inetaddr_port_event(dev, event, extack); else if (netif_is_lag_master(dev)) - return mlxsw_sp_inetaddr_lag_event(dev, event); + return mlxsw_sp_inetaddr_lag_event(dev, event, extack); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(dev, event); + return mlxsw_sp_inetaddr_bridge_event(dev, event, extack); else if (is_vlan_dev(dev)) - return mlxsw_sp_inetaddr_vlan_event(dev, event); + return mlxsw_sp_inetaddr_vlan_event(dev, event, extack); else return 0; } @@ -5793,7 +5812,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event); + err = __mlxsw_sp_inetaddr_event(dev, event, NULL); out: return notifier_from_errno(err); } @@ -5815,7 +5834,7 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event); + err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack); out: return notifier_from_errno(err); } @@ -5844,7 +5863,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - __mlxsw_sp_inetaddr_event(dev, event); + __mlxsw_sp_inetaddr_event(dev, event, NULL); out: rtnl_unlock(); dev_put(dev); @@ -5896,7 +5915,7 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event); + err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack); out: return notifier_from_errno(err); } @@ -5973,7 +5992,8 @@ err_rif_edit: } static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev) + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *rif; @@ -5982,9 +6002,9 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, */ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (rif) - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack); - return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP); + return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack); } static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, @@ -5995,7 +6015,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) return; - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL); } int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, @@ -6011,10 +6031,14 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, case NETDEV_PRECHANGEUPPER: return 0; case NETDEV_CHANGEUPPER: - if (info->linking) - err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev); - else + if (info->linking) { + struct netlink_ext_ack *extack; + + extack = netdev_notifier_info_to_extack(&info->info); + err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack); + } else { mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev); + } break; } @@ -6321,7 +6345,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) struct mlxsw_sp_vr *ul_vr; int err; - ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id); + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); From 1fba70e5b6bed53496ba1f1f16127f5be01b5fb6 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Oct 2017 11:22:51 -0700 Subject: [PATCH 1207/2177] tcp: socket option to set TCP fast open key New socket option TCP_FASTOPEN_KEY to allow different keys per listener. The listener by default uses the global key until the socket option is set. The key is a 16 bytes long binary data. This option has no effect on regular non-listener TCP sockets. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 ++ include/net/tcp.h | 5 ++-- include/uapi/linux/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 3 +- net/ipv4/tcp.c | 33 ++++++++++++++++++++++ net/ipv4/tcp_fastopen.c | 56 +++++++++++++++++++++++++++----------- net/ipv4/tcp_ipv4.c | 1 + 7 files changed, 82 insertions(+), 19 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 23e22054aa60..347015515a7d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -150,6 +150,8 @@ struct fastopen_queue { spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ + + struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b3b9b968e2d..1efe8365cb28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1555,9 +1555,10 @@ struct tcp_fastopen_request { int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); - +void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); -int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len); +int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, + void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 15c25eccab2b..69c7493e42f8 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -119,6 +119,7 @@ enum { #define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ #define TCP_ULP 31 /* Attach a ULP to a TCP connection */ #define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ +#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cac8dd309f39..81d218346cf7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -284,7 +284,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, ret = -EINVAL; goto bad_key; } - tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH); + tcp_fastopen_reset_cipher(net, NULL, user_key, + TCP_FASTOPEN_KEY_LENGTH); } bad_key: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b34850d361f..8b1fa4dd4538 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2571,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } + case TCP_FASTOPEN_KEY: { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + if (optlen != sizeof(key)) + return -EINVAL; + + if (copy_from_user(key, optval, optlen)) + return -EFAULT; + + return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key)); + } default: /* fallthru */ break; @@ -3157,6 +3168,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; + case TCP_FASTOPEN_KEY: { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct tcp_fastopen_context *ctx; + + if (get_user(len, optlen)) + return -EFAULT; + + rcu_read_lock(); + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + if (ctx) + memcpy(key, ctx->key, sizeof(key)); + else + len = 0; + rcu_read_unlock(); + + len = min_t(unsigned int, len, sizeof(key)); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, key, len)) + return -EFAULT; + return 0; + } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 7ee4aadcdd71..21075ce19cb6 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -29,7 +29,7 @@ void tcp_fastopen_init_key_once(struct net *net) * for a valid cookie, so this is an acceptable risk. */ get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(net, key, sizeof(key)); + tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key)); } static void tcp_fastopen_ctx_free(struct rcu_head *head) @@ -40,6 +40,16 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head) kfree(ctx); } +void tcp_fastopen_destroy_cipher(struct sock *sk) +{ + struct tcp_fastopen_context *ctx; + + ctx = rcu_dereference_protected( + inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); + if (ctx) + call_rcu(&ctx->rcu, tcp_fastopen_ctx_free); +} + void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; @@ -55,10 +65,12 @@ void tcp_fastopen_ctx_destroy(struct net *net) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); } -int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len) +int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, + void *key, unsigned int len) { - int err; struct tcp_fastopen_context *ctx, *octx; + struct fastopen_queue *q; + int err; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -79,27 +91,39 @@ error: kfree(ctx); } memcpy(ctx->key, key, len); - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + if (sk) { + q = &inet_csk(sk)->icsk_accept_queue.fastopenq; + spin_lock_bh(&q->lock); + octx = rcu_dereference_protected(q->ctx, + lockdep_is_held(&q->lock)); + rcu_assign_pointer(q->ctx, ctx); + spin_unlock_bh(&q->lock); + } else { + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); + octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + } if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); return err; } -static bool __tcp_fastopen_cookie_gen(struct net *net, - const void *path, +static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, struct tcp_fastopen_cookie *foc) { struct tcp_fastopen_context *ctx; bool ok = false; rcu_read_lock(); - ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); + if (!ctx) + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + if (ctx) { crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; @@ -115,7 +139,7 @@ static bool __tcp_fastopen_cookie_gen(struct net *net, * * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. */ -static bool tcp_fastopen_cookie_gen(struct net *net, +static bool tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) @@ -124,7 +148,7 @@ static bool tcp_fastopen_cookie_gen(struct net *net, const struct iphdr *iph = ip_hdr(syn); __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - return __tcp_fastopen_cookie_gen(net, path, foc); + return __tcp_fastopen_cookie_gen(sk, path, foc); } #if IS_ENABLED(CONFIG_IPV6) @@ -132,13 +156,13 @@ static bool tcp_fastopen_cookie_gen(struct net *net, const struct ipv6hdr *ip6h = ipv6_hdr(syn); struct tcp_fastopen_cookie tmp; - if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) { + if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) { struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - return __tcp_fastopen_cookie_gen(net, buf, foc); + return __tcp_fastopen_cookie_gen(sk, buf, foc); } } #endif @@ -313,7 +337,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ - tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) && + tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ecee4ddb24c5..28ca4e177047 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1893,6 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_fastopen_destroy_cipher(sk); tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); From 6eba87c781aaa02f6bf1b64df2f8b12833eee521 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 11:39:13 -0700 Subject: [PATCH 1208/2177] net: ipv4: Change fib notifiers to take a fib_alias All of the notifier data (fib_info, tos, type and table id) are contained in the fib_alias. Pass it to the notifier instead of each data separately shortening the argument list by 3. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index c636650a6a70..aaa1ba09afaa 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -87,32 +87,30 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + int dst_len, struct fib_alias *fa) { struct fib_entry_notifier_info info = { .dst = dst, .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .tb_id = tb_id, + .fi = fa->fa_info, + .tos = fa->fa_tos, + .type = fa->fa_type, + .tb_id = fa->tb_id, }; return call_fib4_notifier(nb, net, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + int dst_len, struct fib_alias *fa) { struct fib_entry_notifier_info info = { .dst = dst, .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .tb_id = tb_id, + .fi = fa->fa_info, + .tos = fa->fa_tos, + .type = fa->fa_type, + .tb_id = fa->tb_id, }; return call_fib4_notifiers(net, event_type, &info.info); } @@ -1216,9 +1214,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->fa_default = -1; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - key, plen, fi, - new_fa->fa_tos, cfg->fc_type, - tb->tb_id); + key, plen, new_fa); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1273,8 +1269,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, - tb->tb_id); + call_fib_entry_notifiers(net, event, key, plen, new_fa); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1574,8 +1569,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, return -ESRCH; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete->fa_info, tos, - fa_to_delete->fa_type, tb->tb_id); + fa_to_delete); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1892,9 +1886,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); + KEYLENGTH - fa->fa_slen, fa); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -1932,8 +1924,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, continue; call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, - fa->fa_type, fa->tb_id); + KEYLENGTH - fa->fa_slen, fa); } } From aec72f3392b1d598a979e89c4fdb131965ae0ab3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2017 12:12:09 -0700 Subject: [PATCH 1209/2177] net-tun: fix panics at dismantle time syzkaller got crashes at dismantle time [1] It is not correct to test (tun->flags & IFF_NAPI) in tun_napi_disable() and tun_napi_del() : Each tun_file can have different mode, depending on how they were created. Similarly I have changed tun_get_user() and tun_poll_controller() to use the new tfile->napi_enabled boolean. [ 154.331360] BUG: unable to handle kernel NULL pointer dereference at (null) [ 154.339220] IP: [] hrtimer_active+0x26/0x60 [ 154.344983] PGD 0 [ 154.347009] Oops: 0000 [#1] SMP [ 154.350680] gsmi: Log Shutdown Reason 0x03 [ 154.379572] task: ffff994719150dc0 ti: ffff99475c0ae000 task.ti: ffff99475c0ae000 [ 154.387043] RIP: 0010:[] [] hrtimer_active+0x26/0x60 [ 154.395232] RSP: 0018:ffff99475c0afce8 EFLAGS: 00010246 [ 154.400542] RAX: ffff994754850ac0 RBX: ffff994753e65408 RCX: ffff994753e65388 [ 154.407666] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff994753e65408 [ 154.414790] RBP: ffff99475c0afce8 R08: 0000000000000000 R09: 0000000000000000 [ 154.421921] R10: ffff99475f6f5910 R11: 0000000000000001 R12: 0000000000000000 [ 154.429044] R13: ffff99417deab668 R14: ffff99417deaa780 R15: ffff99475f45dde0 [ 154.436174] FS: 0000000000000000(0000) GS:ffff994767a00000(0000) knlGS:0000000000000000 [ 154.444249] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 154.449986] CR2: 0000000000000000 CR3: 00000005a8a0e000 CR4: 0000000000022670 [ 154.457110] Stack: [ 154.459120] ffff99475c0afd28 ffffffff9634d614 1000000000000000 0000000000000000 [ 154.466598] ffffe54240000000 ffff994753e65408 ffff994753e653a8 ffff99417deab668 [ 154.474067] ffff99475c0afd48 ffffffff9634d6fd ffff99474c2be678 ffff994753e65398 [ 154.481537] Call Trace: [ 154.483985] [] hrtimer_try_to_cancel+0x24/0xf0 [ 154.490074] [] hrtimer_cancel+0x1d/0x30 [ 154.495563] [] napi_disable+0x3c/0x70 [ 154.500875] [] __tun_detach+0xd2/0x360 [ 154.506272] [] tun_chr_close+0x27/0x40 [ 154.511669] [] __fput+0xd6/0x1e0 [ 154.516548] [] ____fput+0xe/0x10 [ 154.521429] [] task_work_run+0x72/0x90 [ 154.526827] [] do_exit+0x317/0xb60 [ 154.531879] [] do_group_exit+0x3f/0xa0 [ 154.537275] [] SyS_exit_group+0x17/0x20 [ 154.542769] [] entry_SYSCALL_64_fastpath+0x12/0x17 Fixes: 943170998b20 ("net-tun: enable NAPI for TUN/TAP driver") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c64ec19af9b7..3b41c36eae90 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -175,6 +175,7 @@ struct tun_file { unsigned int ifindex; }; struct napi_struct napi; + bool napi_enabled; struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; @@ -276,6 +277,7 @@ static int tun_napi_poll(struct napi_struct *napi, int budget) static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, bool napi_en) { + tfile->napi_enabled = napi_en; if (napi_en) { netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); @@ -286,13 +288,13 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) { - if (tun->flags & IFF_NAPI) + if (tfile->napi_enabled) napi_disable(&tfile->napi); } static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) { - if (tun->flags & IFF_NAPI) + if (tfile->napi_enabled) netif_napi_del(&tfile->napi); } @@ -1055,7 +1057,8 @@ static void tun_poll_controller(struct net_device *dev) rcu_read_lock(); for (i = 0; i < tun->numqueues; i++) { tfile = rcu_dereference(tun->tfiles[i]); - napi_schedule(&tfile->napi); + if (tfile->napi_enabled) + napi_schedule(&tfile->napi); } rcu_read_unlock(); } @@ -1749,7 +1752,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, napi_gro_frags(&tfile->napi); local_bh_enable(); mutex_unlock(&tfile->napi_mutex); - } else if (tun->flags & IFF_NAPI) { + } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; int queue_len; From 6e71b04a82248ccf13a94b85cbc674a9fefe53f5 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:22 -0700 Subject: [PATCH 1210/2177] bpf: Add file mode configuration into bpf maps Introduce the map read/write flags to the eBPF syscalls that returns the map fd. The flags is used to set up the file mode when construct a new file descriptor for bpf maps. To not break the backward capability, the f_flags is set to O_RDWR if the flag passed by syscall is 0. Otherwise it should be O_RDONLY or O_WRONLY. When the userspace want to modify or read the map content, it will check the file mode to see if it is allowed to make the change. Signed-off-by: Chenbo Feng Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 8 ++-- include/uapi/linux/bpf.h | 6 +++ kernel/bpf/arraymap.c | 6 ++- kernel/bpf/devmap.c | 5 ++- kernel/bpf/hashtab.c | 5 ++- kernel/bpf/inode.c | 15 ++++--- kernel/bpf/lpm_trie.c | 3 +- kernel/bpf/sockmap.c | 5 ++- kernel/bpf/stackmap.c | 5 ++- kernel/bpf/syscall.c | 88 +++++++++++++++++++++++++++++++++++----- net/netfilter/xt_bpf.c | 2 +- 11 files changed, 122 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d67ccdc0099f..3e5508f2fa87 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,11 +315,11 @@ void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; -int bpf_map_new_fd(struct bpf_map *map); +int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname, int flags); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); @@ -338,6 +338,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); +int bpf_get_file_flag(int flags); + /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. * Best-effort only. No barriers here, since it _will_ race with concurrent @@ -421,7 +423,7 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } -static inline int bpf_obj_get_user(const char __user *pathname) +static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4303fb6c3817..d83f95ea6a1b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -218,6 +218,10 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -260,6 +264,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ @@ -287,6 +292,7 @@ union bpf_attr { __u32 map_id; }; __u32 next_id; + __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 68d866628be0..988c04c91e10 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -19,6 +19,9 @@ #include "map_in_map.h" +#define ARRAY_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + static void bpf_array_free_percpu(struct bpf_array *array) { int i; @@ -56,7 +59,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE || + attr->value_size == 0 || + attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || (percpu && numa_node != NUMA_NO_NODE)) return ERR_PTR(-EINVAL); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..e5d3de7cff2e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -50,6 +50,9 @@ #include #include +#define DEV_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct bpf_dtab_netdev { struct net_device *dev; struct bpf_dtab *dtab; @@ -80,7 +83,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); dtab = kzalloc(sizeof(*dtab), GFP_USER); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..919955236e63 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -18,8 +18,9 @@ #include "bpf_lru_list.h" #include "map_in_map.h" -#define HTAB_CREATE_FLAG_MASK \ - (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE) +#define HTAB_CREATE_FLAG_MASK \ + (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ + BPF_F_RDONLY | BPF_F_WRONLY) struct bucket { struct hlist_nulls_head head; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index be1dde967208..01aaef1a77c5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -295,7 +295,7 @@ out: } static void *bpf_obj_do_get(const struct filename *pathname, - enum bpf_type *type) + enum bpf_type *type, int flags) { struct inode *inode; struct path path; @@ -307,7 +307,7 @@ static void *bpf_obj_do_get(const struct filename *pathname, return ERR_PTR(ret); inode = d_backing_inode(path.dentry); - ret = inode_permission(inode, MAY_WRITE); + ret = inode_permission(inode, ACC_MODE(flags)); if (ret) goto out; @@ -326,18 +326,23 @@ out: return ERR_PTR(ret); } -int bpf_obj_get_user(const char __user *pathname) +int bpf_obj_get_user(const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; struct filename *pname; int ret = -ENOENT; + int f_flags; void *raw; + f_flags = bpf_get_file_flag(flags); + if (f_flags < 0) + return f_flags; + pname = getname(pathname); if (IS_ERR(pname)) return PTR_ERR(pname); - raw = bpf_obj_do_get(pname, &type); + raw = bpf_obj_do_get(pname, &type, f_flags); if (IS_ERR(raw)) { ret = PTR_ERR(raw); goto out; @@ -346,7 +351,7 @@ int bpf_obj_get_user(const char __user *pathname) if (type == BPF_TYPE_PROG) ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) - ret = bpf_map_new_fd(raw); + ret = bpf_map_new_fd(raw, f_flags); else goto out; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 34d8a690ea05..885e45479680 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -495,7 +495,8 @@ out: #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) -#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE) +#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ + BPF_F_RDONLY | BPF_F_WRONLY) static struct bpf_map *trie_alloc(union bpf_attr *attr) { diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index a298d6666698..86ec846f2d5e 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -40,6 +40,9 @@ #include #include +#define SOCK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct bpf_stab { struct bpf_map map; struct sock **sock_map; @@ -489,7 +492,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); if (attr->value_size > KMALLOC_MAX_SIZE) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 135be433e9a0..a15bc636cc98 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -11,6 +11,9 @@ #include #include "percpu_freelist.h" +#define STACK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct stack_map_bucket { struct pcpu_freelist_node fnode; u32 hash; @@ -60,7 +63,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (attr->map_flags & ~BPF_F_NUMA_NODE) + if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); /* check sanity of attributes */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0e893cac6795..676a06e6b322 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -34,6 +34,8 @@ #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) +#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) + DEFINE_PER_CPU(int, bpf_prog_active); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); @@ -294,17 +296,48 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) } #endif +static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, + loff_t *ppos) +{ + /* We need this handler such that alloc_file() enables + * f_mode with FMODE_CAN_READ. + */ + return -EINVAL; +} + +static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, + size_t siz, loff_t *ppos) +{ + /* We need this handler such that alloc_file() enables + * f_mode with FMODE_CAN_WRITE. + */ + return -EINVAL; +} + static const struct file_operations bpf_map_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_map_show_fdinfo, #endif .release = bpf_map_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, }; -int bpf_map_new_fd(struct bpf_map *map) +int bpf_map_new_fd(struct bpf_map *map, int flags) { return anon_inode_getfd("bpf-map", &bpf_map_fops, map, - O_RDWR | O_CLOEXEC); + flags | O_CLOEXEC); +} + +int bpf_get_file_flag(int flags) +{ + if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) + return -EINVAL; + if (flags & BPF_F_RDONLY) + return O_RDONLY; + if (flags & BPF_F_WRONLY) + return O_WRONLY; + return O_RDWR; } /* helper macro to check that unused fields 'union bpf_attr' are zero */ @@ -344,12 +377,17 @@ static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct bpf_map *map; + int f_flags; int err; err = CHECK_ATTR(BPF_MAP_CREATE); if (err) return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); + if (f_flags < 0) + return f_flags; + if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || !node_online(numa_node))) @@ -375,7 +413,7 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map; - err = bpf_map_new_fd(map); + err = bpf_map_new_fd(map, f_flags); if (err < 0) { /* failed to allocate fd. * bpf_map_put() is needed because the above @@ -490,6 +528,11 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -570,6 +613,11 @@ static int map_update_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -659,6 +707,11 @@ static int map_delete_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -702,6 +755,11 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } + if (ukey) { key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { @@ -908,6 +966,8 @@ static const struct file_operations bpf_prog_fops = { .show_fdinfo = bpf_prog_show_fdinfo, #endif .release = bpf_prog_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, }; int bpf_prog_new_fd(struct bpf_prog *prog) @@ -1117,11 +1177,11 @@ free_prog_nouncharge: return err; } -#define BPF_OBJ_LAST_FIELD bpf_fd +#define BPF_OBJ_LAST_FIELD file_flags static int bpf_obj_pin(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ)) + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) return -EINVAL; return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); @@ -1129,10 +1189,12 @@ static int bpf_obj_pin(const union bpf_attr *attr) static int bpf_obj_get(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || + attr->file_flags & ~BPF_OBJ_FLAG_MASK) return -EINVAL; - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); + return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), + attr->file_flags); } #ifdef CONFIG_CGROUP_BPF @@ -1392,20 +1454,26 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) return fd; } -#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id +#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags static int bpf_map_get_fd_by_id(const union bpf_attr *attr) { struct bpf_map *map; u32 id = attr->map_id; + int f_flags; int fd; - if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) + if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || + attr->open_flags & ~BPF_OBJ_FLAG_MASK) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + f_flags = bpf_get_file_flag(attr->open_flags); + if (f_flags < 0) + return f_flags; + spin_lock_bh(&map_idr_lock); map = idr_find(&map_idr, id); if (map) @@ -1417,7 +1485,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - fd = bpf_map_new_fd(map); + fd = bpf_map_new_fd(map, f_flags); if (fd < 0) bpf_map_put(map); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 29123934887b..041da0d9c06f 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -56,7 +56,7 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) int retval, fd; set_fs(KERNEL_DS); - fd = bpf_obj_get_user(path); + fd = bpf_obj_get_user(path, 0); set_fs(oldfs); if (fd < 0) return fd; From e043325b308745d6968673e7b53080bd7cc39f08 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:23 -0700 Subject: [PATCH 1211/2177] bpf: Add tests for eBPF file mode Two related tests are added into bpf selftest to test read only map and write only map. The tests verified the read only and write only flags are working on hash maps. Signed-off-by: Chenbo Feng Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_maps.c | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index fe3a443a1102..896f23cfe918 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1033,6 +1033,51 @@ static void test_map_parallel(void) assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); } +static void test_map_rdonly(void) +{ + int i, fd, key = 0, value = 0; + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags | BPF_F_RDONLY); + if (fd < 0) { + printf("Failed to create map for read only test '%s'!\n", + strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* Insert key=1 element. */ + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 && + errno == EPERM); + + /* Check that key=2 is not found. */ + assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT); +} + +static void test_map_wronly(void) +{ + int i, fd, key = 0, value = 0; + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags | BPF_F_WRONLY); + if (fd < 0) { + printf("Failed to create map for read only test '%s'!\n", + strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* Insert key=1 element. */ + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0) + + /* Check that key=2 is not found. */ + assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); + assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM); +} + static void run_all_tests(void) { test_hashmap(0, NULL); @@ -1050,6 +1095,9 @@ static void run_all_tests(void) test_map_large(); test_map_parallel(); test_map_stress(); + + test_map_rdonly(); + test_map_wronly(); } int main(void) From afdb09c720b62b8090584c11151d856df330e57d Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:24 -0700 Subject: [PATCH 1212/2177] security: bpf: Add LSM hooks for bpf object related syscall Introduce several LSM hooks for the syscalls that will allow the userspace to access to eBPF object such as eBPF programs and eBPF maps. The security check is aimed to enforce a per object security protection for eBPF object so only processes with the right priviliges can read/write to a specific map or use a specific eBPF program. Besides that, a general security hook is added before the multiplexer of bpf syscall to check the cmd and the attribute used for the command. The actual security module can decide which command need to be checked and how the cmd should be checked. Signed-off-by: Chenbo Feng Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++++ include/linux/lsm_hooks.h | 54 +++++++++++++++++++++++++++++++++++++++ include/linux/security.h | 45 ++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 34 ++++++++++++++++++++++-- security/security.c | 32 +++++++++++++++++++++++ 5 files changed, 169 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3e5508f2fa87..84c192da3e0b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -57,6 +57,9 @@ struct bpf_map { atomic_t usercnt; struct bpf_map *inner_map_meta; char name[BPF_OBJ_NAME_LEN]; +#ifdef CONFIG_SECURITY + void *security; +#endif }; /* function argument constraints */ @@ -193,6 +196,9 @@ struct bpf_prog_aux { struct user_struct *user; u64 load_time; /* ns since boottime */ char name[BPF_OBJ_NAME_LEN]; +#ifdef CONFIG_SECURITY + void *security; +#endif union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c9258124e417..7161d8e7ee79 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1351,6 +1351,40 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. + * + * Security hooks for using the eBPF maps and programs functionalities through + * eBPF syscalls. + * + * @bpf: + * Do a initial check for all bpf syscalls after the attribute is copied + * into the kernel. The actual security module can implement their own + * rules to check the specific cmd they need. + * + * @bpf_map: + * Do a check when the kernel generate and return a file descriptor for + * eBPF maps. + * + * @map: bpf map that we want to access + * @mask: the access flags + * + * @bpf_prog: + * Do a check when the kernel generate and return a file descriptor for + * eBPF programs. + * + * @prog: bpf prog that userspace want to use. + * + * @bpf_map_alloc_security: + * Initialize the security field inside bpf map. + * + * @bpf_map_free_security: + * Clean up the security information stored inside bpf map. + * + * @bpf_prog_alloc_security: + * Initialize the security field inside bpf program. + * + * @bpf_prog_free_security: + * Clean up the security information stored inside bpf prog. + * */ union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); @@ -1682,6 +1716,17 @@ union security_list_options { struct audit_context *actx); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL + int (*bpf)(int cmd, union bpf_attr *attr, + unsigned int size); + int (*bpf_map)(struct bpf_map *map, fmode_t fmode); + int (*bpf_prog)(struct bpf_prog *prog); + int (*bpf_map_alloc_security)(struct bpf_map *map); + void (*bpf_map_free_security)(struct bpf_map *map); + int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); + void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); +#endif /* CONFIG_BPF_SYSCALL */ }; struct security_hook_heads { @@ -1901,6 +1946,15 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_BPF_SYSCALL + struct list_head bpf; + struct list_head bpf_map; + struct list_head bpf_prog; + struct list_head bpf_map_alloc_security; + struct list_head bpf_map_free_security; + struct list_head bpf_prog_alloc_security; + struct list_head bpf_prog_free_security; +#endif /* CONFIG_BPF_SYSCALL */ } __randomize_layout; /* diff --git a/include/linux/security.h b/include/linux/security.h index ce6265960d6c..18800b0911e5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct linux_binprm; struct cred; @@ -1730,6 +1731,50 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#ifdef CONFIG_BPF_SYSCALL +#ifdef CONFIG_SECURITY +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); +extern int security_bpf_prog(struct bpf_prog *prog); +extern int security_bpf_map_alloc(struct bpf_map *map); +extern void security_bpf_map_free(struct bpf_map *map); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +#else +static inline int security_bpf(int cmd, union bpf_attr *attr, + unsigned int size) +{ + return 0; +} + +static inline int security_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + return 0; +} + +static inline int security_bpf_prog(struct bpf_prog *prog) +{ + return 0; +} + +static inline int security_bpf_map_alloc(struct bpf_map *map) +{ + return 0; +} + +static inline void security_bpf_map_free(struct bpf_map *map) +{ } + +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +{ } +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_BPF_SYSCALL */ + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 676a06e6b322..5cb56d06b48d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -212,6 +212,7 @@ static void bpf_map_free_deferred(struct work_struct *work) struct bpf_map *map = container_of(work, struct bpf_map, work); bpf_map_uncharge_memlock(map); + security_bpf_map_free(map); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -325,6 +326,12 @@ static const struct file_operations bpf_map_fops = { int bpf_map_new_fd(struct bpf_map *map, int flags) { + int ret; + + ret = security_bpf_map(map, OPEN_FMODE(flags)); + if (ret < 0) + return ret; + return anon_inode_getfd("bpf-map", &bpf_map_fops, map, flags | O_CLOEXEC); } @@ -405,10 +412,14 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); - err = bpf_map_charge_memlock(map); + err = security_bpf_map_alloc(map); if (err) goto free_map_nouncharge; + err = bpf_map_charge_memlock(map); + if (err) + goto free_map_sec; + err = bpf_map_alloc_id(map); if (err) goto free_map; @@ -430,6 +441,8 @@ static int map_create(union bpf_attr *attr) free_map: bpf_map_uncharge_memlock(map); +free_map_sec: + security_bpf_map_free(map); free_map_nouncharge: map->ops->map_free(map); return err; @@ -914,6 +927,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) free_used_maps(aux); bpf_prog_uncharge_memlock(aux->prog); + security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } @@ -972,6 +986,12 @@ static const struct file_operations bpf_prog_fops = { int bpf_prog_new_fd(struct bpf_prog *prog) { + int ret; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ret; + return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); } @@ -1111,10 +1131,14 @@ static int bpf_prog_load(union bpf_attr *attr) if (!prog) return -ENOMEM; - err = bpf_prog_charge_memlock(prog); + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog_nouncharge; + err = bpf_prog_charge_memlock(prog); + if (err) + goto free_prog_sec; + prog->len = attr->insn_cnt; err = -EFAULT; @@ -1172,6 +1196,8 @@ free_used_maps: free_used_maps(prog->aux); free_prog: bpf_prog_uncharge_memlock(prog); +free_prog_sec: + security_bpf_prog_free(prog->aux); free_prog_nouncharge: bpf_prog_free(prog); return err; @@ -1640,6 +1666,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz if (copy_from_user(&attr, uattr, size) != 0) return -EFAULT; + err = security_bpf(cmd, &attr, size); + if (err < 0) + return err; + switch (cmd) { case BPF_MAP_CREATE: err = map_create(&attr); diff --git a/security/security.c b/security/security.c index 4bf0f571b4ef..1cd8526cb0b7 100644 --- a/security/security.c +++ b/security/security.c @@ -12,6 +12,7 @@ * (at your option) any later version. */ +#include #include #include #include @@ -1703,3 +1704,34 @@ int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, actx); } #endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL +int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ + return call_int_hook(bpf, 0, cmd, attr, size); +} +int security_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + return call_int_hook(bpf_map, 0, map, fmode); +} +int security_bpf_prog(struct bpf_prog *prog) +{ + return call_int_hook(bpf_prog, 0, prog); +} +int security_bpf_map_alloc(struct bpf_map *map) +{ + return call_int_hook(bpf_map_alloc_security, 0, map); +} +int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + return call_int_hook(bpf_prog_alloc_security, 0, aux); +} +void security_bpf_map_free(struct bpf_map *map) +{ + call_void_hook(bpf_map_free_security, map); +} +void security_bpf_prog_free(struct bpf_prog_aux *aux) +{ + call_void_hook(bpf_prog_free_security, aux); +} +#endif /* CONFIG_BPF_SYSCALL */ From ec27c3568a34c7fe5fcf4ac0a354eda77687f7eb Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:25 -0700 Subject: [PATCH 1213/2177] selinux: bpf: Add selinux check for eBPF syscall operations Implement the actual checks introduced to eBPF related syscalls. This implementation use the security field inside bpf object to store a sid that identify the bpf object. And when processes try to access the object, selinux will check if processes have the right privileges. The creation of eBPF object are also checked at the general bpf check hook and new cmd introduced to eBPF domain can also be checked there. Signed-off-by: Chenbo Feng Acked-by: Alexei Starovoitov Reviewed-by: James Morris Signed-off-by: David S. Miller --- security/selinux/hooks.c | 111 ++++++++++++++++++++++++++++ security/selinux/include/classmap.h | 2 + security/selinux/include/objsec.h | 4 + 3 files changed, 117 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5d304736852..12cf7de8cbed 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -85,6 +85,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -6252,6 +6253,106 @@ static void selinux_ib_free_security(void *ib_sec) } #endif +#ifdef CONFIG_BPF_SYSCALL +static int selinux_bpf(int cmd, union bpf_attr *attr, + unsigned int size) +{ + u32 sid = current_sid(); + int ret; + + switch (cmd) { + case BPF_MAP_CREATE: + ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__MAP_CREATE, + NULL); + break; + case BPF_PROG_LOAD: + ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__PROG_LOAD, + NULL); + break; + default: + ret = 0; + break; + } + + return ret; +} + +static u32 bpf_map_fmode_to_av(fmode_t fmode) +{ + u32 av = 0; + + if (fmode & FMODE_READ) + av |= BPF__MAP_READ; + if (fmode & FMODE_WRITE) + av |= BPF__MAP_WRITE; + return av; +} + +static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + u32 sid = current_sid(); + struct bpf_security_struct *bpfsec; + + bpfsec = map->security; + return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + bpf_map_fmode_to_av(fmode), NULL); +} + +static int selinux_bpf_prog(struct bpf_prog *prog) +{ + u32 sid = current_sid(); + struct bpf_security_struct *bpfsec; + + bpfsec = prog->aux->security; + return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + BPF__PROG_RUN, NULL); +} + +static int selinux_bpf_map_alloc(struct bpf_map *map) +{ + struct bpf_security_struct *bpfsec; + + bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); + if (!bpfsec) + return -ENOMEM; + + bpfsec->sid = current_sid(); + map->security = bpfsec; + + return 0; +} + +static void selinux_bpf_map_free(struct bpf_map *map) +{ + struct bpf_security_struct *bpfsec = map->security; + + map->security = NULL; + kfree(bpfsec); +} + +static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + struct bpf_security_struct *bpfsec; + + bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); + if (!bpfsec) + return -ENOMEM; + + bpfsec->sid = current_sid(); + aux->security = bpfsec; + + return 0; +} + +static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) +{ + struct bpf_security_struct *bpfsec = aux->security; + + aux->security = NULL; + kfree(bpfsec); +} +#endif + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6471,6 +6572,16 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(audit_rule_match, selinux_audit_rule_match), LSM_HOOK_INIT(audit_rule_free, selinux_audit_rule_free), #endif + +#ifdef CONFIG_BPF_SYSCALL + LSM_HOOK_INIT(bpf, selinux_bpf), + LSM_HOOK_INIT(bpf_map, selinux_bpf_map), + LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog), + LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc), + LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc), + LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), + LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), +#endif }; static __init int selinux_init(void) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 35ffb29a69cb..0a7023b5f000 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -237,6 +237,8 @@ struct security_class_mapping secclass_map[] = { { "access", NULL } }, { "infiniband_endport", { "manage_subnet", NULL } }, + { "bpf", + {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 1649cd18eb0b..3d54468ce334 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -150,6 +150,10 @@ struct pkey_security_struct { u32 sid; /* SID of pkey */ }; +struct bpf_security_struct { + u32 sid; /*SID of bpf obj creater*/ +}; + extern unsigned int selinux_checkreqprot; #endif /* _SELINUX_OBJSEC_H_ */ From f66e448cfda021b0bcd884f26709796fe19c7cc1 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:26 -0700 Subject: [PATCH 1214/2177] selinux: bpf: Add addtional check for bpf object file receive Introduce a bpf object related check when sending and receiving files through unix domain socket as well as binder. It checks if the receiving process have privilege to read/write the bpf map or use the bpf program. This check is necessary because the bpf maps and programs are using a anonymous inode as their shared inode so the normal way of checking the files and sockets when passing between processes cannot work properly on eBPF object. This check only works when the BPF_SYSCALL is configured. Signed-off-by: Chenbo Feng Acked-by: Stephen Smalley Reviewed-by: James Morris Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ kernel/bpf/syscall.c | 4 ++-- security/selinux/hooks.c | 49 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84c192da3e0b..1e334b248ff6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -288,6 +288,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern const struct file_operations bpf_map_fops; +extern const struct file_operations bpf_prog_fops; + #define BPF_PROG_TYPE(_id, _name) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5cb56d06b48d..323be2473c4b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -315,7 +315,7 @@ static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, return -EINVAL; } -static const struct file_operations bpf_map_fops = { +const struct file_operations bpf_map_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_map_show_fdinfo, #endif @@ -975,7 +975,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) } #endif -static const struct file_operations bpf_prog_fops = { +const struct file_operations bpf_prog_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_prog_show_fdinfo, #endif diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 12cf7de8cbed..2e3a627fc0b1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1815,6 +1815,10 @@ static inline int file_path_has_perm(const struct cred *cred, return inode_has_perm(cred, file_inode(file), av, &ad); } +#ifdef CONFIG_BPF_SYSCALL +static int bpf_fd_pass(struct file *file, u32 sid); +#endif + /* Check whether a task can use an open file descriptor to access an inode in a given way. Check access to the descriptor itself, and then use dentry_has_perm to @@ -1845,6 +1849,12 @@ static int file_has_perm(const struct cred *cred, goto out; } +#ifdef CONFIG_BPF_SYSCALL + rc = bpf_fd_pass(file, cred_sid(cred)); + if (rc) + return rc; +#endif + /* av is zero if only checking access to the descriptor. */ rc = 0; if (av) @@ -2165,6 +2175,12 @@ static int selinux_binder_transfer_file(struct task_struct *from, return rc; } +#ifdef CONFIG_BPF_SYSCALL + rc = bpf_fd_pass(file, sid); + if (rc) + return rc; +#endif + if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; @@ -6288,6 +6304,39 @@ static u32 bpf_map_fmode_to_av(fmode_t fmode) return av; } +/* This function will check the file pass through unix socket or binder to see + * if it is a bpf related object. And apply correspinding checks on the bpf + * object based on the type. The bpf maps and programs, not like other files and + * socket, are using a shared anonymous inode inside the kernel as their inode. + * So checking that inode cannot identify if the process have privilege to + * access the bpf object and that's why we have to add this additional check in + * selinux_file_receive and selinux_binder_transfer_files. + */ +static int bpf_fd_pass(struct file *file, u32 sid) +{ + struct bpf_security_struct *bpfsec; + struct bpf_prog *prog; + struct bpf_map *map; + int ret; + + if (file->f_op == &bpf_map_fops) { + map = file->private_data; + bpfsec = map->security; + ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + bpf_map_fmode_to_av(file->f_mode), NULL); + if (ret) + return ret; + } else if (file->f_op == &bpf_prog_fops) { + prog = file->private_data; + bpfsec = prog->aux->security; + ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + BPF__PROG_RUN, NULL); + if (ret) + return ret; + } + return 0; +} + static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode) { u32 sid = current_sid(); From b886d5f2f2906a866e33212734d204dfe35d50d9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 19 Oct 2017 16:07:10 +0200 Subject: [PATCH 1215/2177] ipv6: start fib6 gc on RTF_CACHE dst creation After the commit 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache"), the fib6 gc is not started after the creation of a RTF_CACHE via a redirect or pmtu update, since fib6_add() isn't invoked anymore for such dsts. We need the fib6 gc to run periodically to clean the RTF_CACHE, or the dst will stay there forever. Fix it by explicitly calling fib6_force_start_gc() on successful exception creation. gc_args->more accounting will ensure that the gc timer will run for whatever time needed to properly clean the table. v2 -> v3: - clarified the commit message Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") Signed-off-by: Paolo Abeni Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2e8842fa6450..efecdcff5055 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1340,8 +1340,10 @@ out: spin_unlock_bh(&rt6_exception_lock); /* Update fn->fn_sernum to invalidate all cached dst */ - if (!err) + if (!err) { fib6_update_sernum(ort); + fib6_force_start_gc(net); + } return err; } From 1859bac04fb696b858dbbff59503c945ec871bd9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 19 Oct 2017 16:07:11 +0200 Subject: [PATCH 1216/2177] ipv6: remove from fib tree aged out RTF_CACHE dst The commit 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") partially reverted the commit 1e2ea8ad37be ("ipv6: set dst.obsolete when a cached route has expired"). As a result, RTF_CACHE dst referenced outside the fib tree will not be removed until the next sernum change; dst_check() does not fail on aged-out dst, and dst->__refcnt can't decrease: the aged out dst will stay valid for a potentially unlimited time after the timeout expiration. This change explicitly removes RTF_CACHE dst from the fib tree when aged out. The rt6_remove_exception() logic will then obsolete the dst and other entities will drop the related reference on next dst_check(). pMTU exceptions are not aged-out, and are removed from the exception table only when the - usually considerably longer - ip6_rt_mtu_expires timeout expires. v1 -> v2: - do not touch dst.obsolete in rt6_remove_exception(), not needed v2 -> v3: - take care of pMTU exceptions, too Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") Signed-off-by: Paolo Abeni Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index efecdcff5055..074fac966018 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1575,7 +1575,13 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, { struct rt6_info *rt = rt6_ex->rt6i; - if (atomic_read(&rt->dst.__refcnt) == 1 && + /* we are pruning and obsoleting aged-out and non gateway exceptions + * even if others have still references to them, so that on next + * dst_check() such references can be dropped. + * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when + * expired, independently from their aging, as per RFC 8201 section 4 + */ + if (!(rt->rt6i_flags & RTF_EXPIRES) && time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { RT6_TRACE("aging clone %p\n", rt); rt6_remove_exception(bucket, rt6_ex); @@ -1595,6 +1601,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, rt6_remove_exception(bucket, rt6_ex); return; } + } else if (__rt6_check_expired(rt)) { + RT6_TRACE("purging expired route %p\n", rt); + rt6_remove_exception(bucket, rt6_ex); + return; } gc_args->more++; } From ba233b34741a1dc88d1e94db7deeb7b079ef4b9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2017 14:20:30 -0700 Subject: [PATCH 1217/2177] tcp: fix tcp_send_syn_data() syn_data was allocated by sk_stream_alloc_skb(), meaning its destructor and _skb_refdst fields are mangled. We need to call tcp_skb_tsorted_anchor_cleanup() before calling kfree_skb() or kernel crashes. Bug was reported by syzkaller bot. Fixes: e2080072ed2d ("tcp: new list for sent but unacked skbs for RACK recovery") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53dc1267c85e..988733f289c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3383,6 +3383,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) int copied = copy_from_iter(skb_put(syn_data, space), space, &fo->data->msg_iter); if (unlikely(!copied)) { + tcp_skb_tsorted_anchor_cleanup(syn_data); kfree_skb(syn_data); goto fallback; } From 3c75f9b1b4de0a14f11b7e71a6bdc30b65648f20 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 15:01:38 -0700 Subject: [PATCH 1218/2177] spectrum: Convert fib event handlers to use container_of on info arg Use container_of to convert the generic fib_notifier_info into the event specific data structure. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2420f69797a9..12d471d2a90b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5209,25 +5209,35 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { + struct fib_entry_notifier_info *fen_info; + struct fib_rule_notifier_info *fr_info; + struct fib_nh_notifier_info *fnh_info; + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); - /* Take referece on fib_info to prevent it from being + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being * freed while work is queued. Release it afterwards. */ fib_info_hold(fib_work->fen_info.fi); break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fr_info = container_of(info, struct fib_rule_notifier_info, + info); + fib_work->fr_info = *fr_info; fib_rule_get(fib_work->fr_info.rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work->fnh_info = *fnh_info; fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -5236,16 +5246,23 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { + struct fib6_entry_notifier_info *fen6_info; + struct fib_rule_notifier_info *fr_info; + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + fen6_info = container_of(info, struct fib6_entry_notifier_info, + info); + fib_work->fen6_info = *fen6_info; rt6_hold(fib_work->fen6_info.rt); break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fr_info = container_of(info, struct fib_rule_notifier_info, + info); + fib_work->fr_info = *fr_info; fib_rule_get(fib_work->fr_info.rule); break; } From 164a5e7ad531e181334a3d3f03d0d5ad20d6faea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2017 17:02:03 -0700 Subject: [PATCH 1219/2177] ipv4: ipv4_default_advmss() should use route mtu ipv4_default_advmss() incorrectly uses the device MTU instead of the route provided one. IPv6 has the proper behavior, lets harmonize the two protocols. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4306db827374..bc40bd411196 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1250,7 +1250,7 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, + unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, ip_rt_min_advmss); return min(advmss, IPV4_MAX_PMTU - header_size); From b65f164d37cf6d4aac59b0e13c2e5c4cfe293fd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 19 Oct 2017 09:31:43 +0200 Subject: [PATCH 1220/2177] ipv6: let trace_fib6_table_lookup() dereference the fib table The perf traces for ipv6 routing code show a relevant cost around trace_fib6_table_lookup(), even if no trace is enabled. This is due to the fib6_table de-referencing currently performed by the caller. Let's the tracing code pay this overhead, passing to the trace helper the table pointer. This gives small but measurable performance improvement under UDP flood. Signed-off-by: Paolo Abeni Acked-by: Steven Rostedt (VMware) Acked-by: David Ahern Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/trace/events/fib6.h | 6 +++--- net/ipv6/route.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index d60096cddb2a..b34bed17abc7 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -12,9 +12,9 @@ TRACE_EVENT(fib6_table_lookup, TP_PROTO(const struct net *net, const struct rt6_info *rt, - u32 tb_id, const struct flowi6 *flp), + struct fib6_table *table, const struct flowi6 *flp), - TP_ARGS(net, rt, tb_id, flp), + TP_ARGS(net, rt, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) @@ -34,7 +34,7 @@ TRACE_EVENT(fib6_table_lookup, TP_fast_assign( struct in6_addr *in6; - __entry->tb_id = tb_id; + __entry->tb_id = table->tb6_id; __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->tos = ip6_tclass(flp->flowlabel); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 074fac966018..46c59a53c53f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -945,7 +945,7 @@ restart: rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; @@ -1682,7 +1682,7 @@ redo_rt6_select: if (rt == net->ipv6.ip6_null_entry) { rcu_read_unlock(); dst_hold(&rt->dst); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (rt->rt6i_flags & RTF_CACHE) { if (ip6_hold_safe(net, &rt, true)) { @@ -1690,7 +1690,7 @@ redo_rt6_select: rt6_dst_from_metrics_check(rt); } rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1726,7 +1726,7 @@ redo_rt6_select: } uncached_rt_out: - trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; } else { @@ -1754,7 +1754,7 @@ uncached_rt_out: } local_bh_enable(); rcu_read_unlock(); - trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, pcpu_rt, table, fl6); return pcpu_rt; } } @@ -2195,7 +2195,7 @@ out: rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; }; From 8c4083b30e56fc71b0e94c26374b32d95d5ea461 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:29 +0200 Subject: [PATCH 1221/2177] net: sched: add block bind/unbind notif. and extended block_get/put Introduce new type of ndo_setup_tc message to propage binding/unbinding of a block to driver. Call this ndo whenever qdisc gets/puts a block. Alongside with this, there's need to propagate binder type from qdisc code down to the notifier. So introduce extended variants of block_get/put in order to pass this info. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 40 ++++++++++++++++++++++++++++ net/sched/cls_api.c | 56 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 94 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf014afcb914..4de5b08ee0fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -775,6 +775,7 @@ enum tc_setup_type { TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, + TC_SETUP_BLOCK, }; /* These structures hold the attributes of xdp state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 49a143e0fe65..41bc7d774047 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,13 +17,27 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +enum tcf_block_binder_type { + TCF_BLOCK_BINDER_TYPE_UNSPEC, +}; + +struct tcf_block_ext_info { + enum tcf_block_binder_type binder_type; +}; + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); +int tcf_block_get_ext(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei); void tcf_block_put(struct tcf_block *block); +void tcf_block_put_ext(struct tcf_block *block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei); static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { @@ -46,10 +60,25 @@ int tcf_block_get(struct tcf_block **p_block, return 0; } +static inline +int tcf_block_get_ext(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + return 0; +} + static inline void tcf_block_put(struct tcf_block *block) { } +static inline +void tcf_block_put_ext(struct tcf_block *block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ +} + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { return NULL; @@ -434,6 +463,17 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop); +enum tc_block_command { + TC_BLOCK_BIND, + TC_BLOCK_UNBIND, +}; + +struct tc_block_offload { + enum tc_block_command command; + enum tcf_block_binder_type binder_type; + struct tcf_block *block; +}; + struct tc_cls_common_offload { u32 chain_index; __be16 protocol; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2e8e87fd9d97..92dce26d10e3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -240,8 +240,36 @@ tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, chain->p_filter_chain = p_filter_chain; } -int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) +static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + enum tc_block_command command) +{ + struct net_device *dev = q->dev_queue->dev; + struct tc_block_offload bo = {}; + + if (!tc_can_offload(dev)) + return; + bo.command = command; + bo.binder_type = ei->binder_type; + bo.block = block; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); +} + +static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND); +} + +static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); +} + +int tcf_block_get_ext(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); struct tcf_chain *chain; @@ -259,6 +287,7 @@ int tcf_block_get(struct tcf_block **p_block, tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); block->net = qdisc_net(q); block->q = q; + tcf_block_offload_bind(block, q, ei); *p_block = block; return 0; @@ -266,15 +295,28 @@ err_chain_create: kfree(block); return err; } +EXPORT_SYMBOL(tcf_block_get_ext); + +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) +{ + struct tcf_block_ext_info ei = {0, }; + + return tcf_block_get_ext(p_block, p_filter_chain, q, &ei); +} EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +void tcf_block_put_ext(struct tcf_block *block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct tcf_block_ext_info *ei) { struct tcf_chain *chain, *tmp; if (!block) return; + tcf_block_offload_unbind(block, q, ei); + /* XXX: Standalone actions are not allowed to jump to any chain, and * bound actions should be all removed after flushing. However, * filters are destroyed in RCU callbacks, we have to hold the chains @@ -302,6 +344,14 @@ void tcf_block_put(struct tcf_block *block) tcf_chain_put(chain); kfree(block); } +EXPORT_SYMBOL(tcf_block_put_ext); + +void tcf_block_put(struct tcf_block *block) +{ + struct tcf_block_ext_info ei = {0, }; + + tcf_block_put_ext(block, NULL, block->q, &ei); +} EXPORT_SYMBOL(tcf_block_put); /* Main classifier routine: scans classifier chain attached From 6e40cf2d4dee9dc22ff398041ce876bef8172dea Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:30 +0200 Subject: [PATCH 1222/2177] net: sched: use extended variants of block_get/put in ingress and clsact qdiscs Use previously introduced extended variants of block get and put functions. This allows to specify a binder types specific to clsact ingress/egress which is useful for drivers to distinguish who actually got the block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ net/sched/sch_ingress.c | 36 +++++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41bc7d774047..5c50af8f7183 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -19,6 +19,8 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); enum tcf_block_binder_type { TCF_BLOCK_BINDER_TYPE_UNSPEC, + TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS, }; struct tcf_block_ext_info { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 9ccc1b89b0d9..b599db26d34b 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -20,6 +20,7 @@ struct ingress_sched_data { struct tcf_block *block; + struct tcf_block_ext_info block_info; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) @@ -59,7 +60,10 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->block, &dev->ingress_cl_list, sch); + q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + + err = tcf_block_get_ext(&q->block, &dev->ingress_cl_list, + sch, &q->block_info); if (err) return err; @@ -72,8 +76,10 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_block_put(q->block); + tcf_block_put_ext(q->block, &dev->ingress_cl_list, + sch, &q->block_info); net_dec_ingress_queue(); } @@ -114,6 +120,8 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { struct clsact_sched_data { struct tcf_block *ingress_block; struct tcf_block *egress_block; + struct tcf_block_ext_info ingress_block_info; + struct tcf_block_ext_info egress_block_info; }; static unsigned long clsact_find(struct Qdisc *sch, u32 classid) @@ -153,13 +161,19 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list, sch); + q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + + err = tcf_block_get_ext(&q->ingress_block, &dev->ingress_cl_list, + sch, &q->ingress_block_info); if (err) return err; - err = tcf_block_get(&q->egress_block, &dev->egress_cl_list, sch); + q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; + + err = tcf_block_get_ext(&q->egress_block, &dev->egress_cl_list, + sch, &q->egress_block_info); if (err) - return err; + goto err_egress_block_get; net_inc_ingress_queue(); net_inc_egress_queue(); @@ -167,14 +181,22 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_CPUSTATS; return 0; + +err_egress_block_get: + tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, + sch, &q->ingress_block_info); + return err; } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_block_put(q->egress_block); - tcf_block_put(q->ingress_block); + tcf_block_put_ext(q->egress_block, &dev->egress_cl_list, + sch, &q->egress_block_info); + tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, + sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); From acb674428c3d57bccbe3f4a1a7a009f6d73e9f41 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:31 +0200 Subject: [PATCH 1223/2177] net: sched: introduce per-block callbacks Introduce infrastructure that allows drivers to register callbacks that are called whenever tc would offload inserted rule for a specific block. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 81 +++++++++++++++++++++++++++++ include/net/sch_generic.h | 1 + net/sched/cls_api.c | 105 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5c50af8f7183..4bc6b1cc245d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -27,6 +27,8 @@ struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; }; +struct tcf_block_cb; + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); @@ -51,6 +53,21 @@ static inline struct net_device *tcf_block_dev(struct tcf_block *block) return tcf_block_q(block)->dev_queue->dev; } +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); +void tcf_block_cb_incref(struct tcf_block_cb *block_cb); +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb); +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv); +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb); +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident); + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -91,6 +108,70 @@ static inline struct net_device *tcf_block_dev(struct tcf_block *block) return NULL; } +static inline +int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ + return 0; +} + +static inline +void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb, + void *cb_priv) +{ +} + +static inline +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return NULL; +} + +static inline +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + return NULL; +} + +static inline +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ +} + +static inline +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return 0; +} + +static inline +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return NULL; +} + +static inline +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + return 0; +} + +static inline +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ +} + +static inline +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0aea9e23e97a..031dffd5836c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -272,6 +272,7 @@ struct tcf_block { struct list_head chain_list; struct net *net; struct Qdisc *q; + struct list_head cb_list; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 92dce26d10e3..b16c79c47be5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -278,6 +278,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, if (!block) return -ENOMEM; INIT_LIST_HEAD(&block->chain_list); + INIT_LIST_HEAD(&block->cb_list); + /* Create chain 0 by default, it has to be always present. */ chain = tcf_chain_create(block, 0); if (!chain) { @@ -354,6 +356,109 @@ void tcf_block_put(struct tcf_block *block) } EXPORT_SYMBOL(tcf_block_put); +struct tcf_block_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_ident; + void *cb_priv; + unsigned int refcnt; +}; + +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return block_cb->cb_priv; +} +EXPORT_SYMBOL(tcf_block_cb_priv); + +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ struct tcf_block_cb *block_cb; + + list_for_each_entry(block_cb, &block->cb_list, list) + if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) + return block_cb; + return NULL; +} +EXPORT_SYMBOL(tcf_block_cb_lookup); + +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ + block_cb->refcnt++; +} +EXPORT_SYMBOL(tcf_block_cb_incref); + +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return --block_cb->refcnt; +} +EXPORT_SYMBOL(tcf_block_cb_decref); + +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); + if (!block_cb) + return NULL; + block_cb->cb = cb; + block_cb->cb_ident = cb_ident; + block_cb->cb_priv = cb_priv; + list_add(&block_cb->list, &block->cb_list); + return block_cb; +} +EXPORT_SYMBOL(__tcf_block_cb_register); + +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); + return block_cb ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(tcf_block_cb_register); + +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ + list_del(&block_cb->list); + kfree(block_cb); +} +EXPORT_SYMBOL(__tcf_block_cb_unregister); + +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + struct tcf_block_cb *block_cb; + + block_cb = tcf_block_cb_lookup(block, cb, cb_ident); + if (!block_cb) + return; + __tcf_block_cb_unregister(block_cb); +} +EXPORT_SYMBOL(tcf_block_cb_unregister); + +static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_block_cb *block_cb; + int ok_count = 0; + int err; + + list_for_each_entry(block_cb, &block->cb_list, list) { + err = block_cb->cb(type, type_data, block_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. From 208c0f4b5237f1d6611b2c679a8022d6901577d6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:32 +0200 Subject: [PATCH 1224/2177] net: sched: use tc_setup_cb_call to call per-block callbacks Extend the tc_setup_cb_call entrypoint function originally used only for action egress devices callbacks to call per-block callbacks as well. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- net/sched/cls_api.c | 21 ++++++++++++++++++--- net/sched/cls_flower.c | 9 ++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4bc6b1cc245d..fcca5a9d9880 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -543,8 +543,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ -int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop); +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop); enum tc_block_command { TC_BLOCK_BIND, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b16c79c47be5..cdfdc24b89cf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1206,10 +1206,25 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, return ok_count; } -int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type, - void *type_data, bool err_stop) +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop) { - return tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); + int ok_count; + int ret; + + ret = tcf_block_cb_call(block, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count = ret; + + if (!exts) + return ok_count; + ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; + + return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5b7bb968d1d4..76b4e0a1c92f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -201,6 +201,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_DESTROY; @@ -209,7 +210,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) if (tc_can_offload(dev)) dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); - tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); } @@ -220,6 +221,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); int err; @@ -242,7 +244,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, } } - err = tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { fl_hw_destroy_filter(tp, f); @@ -261,6 +263,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_STATS; @@ -270,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) if (tc_can_offload(dev)) dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); - tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER, + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); } From 2447a96f88ee3c082603c2dd38ae51f66977c11d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:33 +0200 Subject: [PATCH 1225/2177] net: sched: cls_matchall: call block callbacks for offload Use the newly introduced callbacks infrastructure and call block callbacks alongside with the existing per-netdev ndo_setup_tc. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 72 +++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index eeac606c95ab..5278534c7e87 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -50,12 +50,32 @@ static void mall_destroy_rcu(struct rcu_head *rcu) kfree(head); } +static void mall_destroy_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_DESTROY; + cls_mall.cookie = cookie; + + if (tc_can_offload(dev)) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, + &cls_mall); + tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); +} + static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(head->flags); int err; tc_cls_common_offload_init(&cls_mall.common, tp); @@ -63,37 +83,40 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.exts = &head->exts; cls_mall.cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, - &cls_mall); - if (!err) + if (tc_can_offload(dev)) { + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, + &cls_mall); + if (err) { + if (skip_sw) + return err; + } else { + head->flags |= TCA_CLS_FLAGS_IN_HW; + } + } + + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, + &cls_mall, skip_sw); + if (err < 0) { + mall_destroy_hw_filter(tp, head, cookie); + return err; + } else if (err > 0) { head->flags |= TCA_CLS_FLAGS_IN_HW; + } - return err; -} + if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; -static void mall_destroy_hw_filter(struct tcf_proto *tp, - struct cls_mall_head *head, - unsigned long cookie) -{ - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_matchall_offload cls_mall = {}; - - tc_cls_common_offload_init(&cls_mall.common, tp); - cls_mall.command = TC_CLSMATCHALL_DESTROY; - cls_mall.cookie = cookie; - - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall); + return 0; } static void mall_destroy(struct tcf_proto *tp) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; if (!head) return; - if (tc_should_offload(dev, head->flags)) + if (!tc_skip_hw(head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); call_rcu(&head->rcu, mall_destroy_rcu); @@ -133,7 +156,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, void **arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; u32 flags = 0; @@ -173,14 +195,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (err) goto err_set_parms; - if (tc_should_offload(dev, flags)) { + if (!tc_skip_hw(new->flags)) { err = mall_replace_hw_filter(tp, new, (unsigned long) new); - if (err) { - if (tc_skip_sw(flags)) - goto err_replace_hw_filter; - else - err = 0; - } + if (err) + goto err_replace_hw_filter; } if (!tc_in_hw(new->flags)) From 77460411929d8904823419fca73bdad666a659d3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:34 +0200 Subject: [PATCH 1226/2177] net: sched: cls_u32: swap u32_remove_hw_knode and u32_remove_hw_hnode Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 49d96b45a8ce..c3ea684b8e91 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -462,7 +462,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; @@ -471,8 +471,10 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) return; tc_cls_common_offload_init(&cls_u32.common, tp); - cls_u32.command = TC_CLSU32_DELETE_KNODE; - cls_u32.knode.handle = handle; + cls_u32.command = TC_CLSU32_DELETE_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); } @@ -500,7 +502,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload cls_u32 = {}; @@ -509,10 +511,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) return; tc_cls_common_offload_init(&cls_u32.common, tp); - cls_u32.command = TC_CLSU32_DELETE_HNODE; - cls_u32.hnode.divisor = h->divisor; - cls_u32.hnode.handle = h->handle; - cls_u32.hnode.prio = h->prio; + cls_u32.command = TC_CLSU32_DELETE_KNODE; + cls_u32.knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); } From 245dc5121a9bf6a0a12ac1e72f47822fc3fa8cae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:35 +0200 Subject: [PATCH 1227/2177] net: sched: cls_u32: call block callbacks for offload Use the newly introduced callbacks infrastructure and call block callbacks alongside with the existing per-netdev ndo_setup_tc. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 72 ++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c3ea684b8e91..d53da7968eda 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -465,39 +465,57 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - if (!tc_should_offload(dev, 0)) - return; - tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); + if (tc_can_offload(dev)) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); + bool offloaded = false; int err; - if (!tc_should_offload(dev, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); - if (tc_skip_sw(flags)) + if (tc_can_offload(dev)) { + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, + &cls_u32); + if (err) { + if (skip_sw) + return err; + } else { + offloaded = true; + } + } + + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_clear_hw_hnode(tp, h); return err; + } else if (err > 0) { + offloaded = true; + } + + if (skip_sw && !offloaded) + return -EINVAL; return 0; } @@ -505,28 +523,27 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - if (!tc_should_offload(dev, 0)) - return; - tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = handle; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); + if (tc_can_offload(dev)) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); int err; - if (!tc_should_offload(dev, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - tc_cls_common_offload_init(&cls_u32.common, tp); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; @@ -543,13 +560,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = n->ht_down->handle; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); - if (!err) - n->flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_can_offload(dev)) { + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, + &cls_u32); + if (err) { + if (skip_sw) + return err; + } else { + n->flags |= TCA_CLS_FLAGS_IN_HW; + } + } - if (tc_skip_sw(flags)) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_remove_hw_knode(tp, n->handle); return err; + } else if (err > 0) { + n->flags |= TCA_CLS_FLAGS_IN_HW; + } + + if (skip_sw && !(n->flags && TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; return 0; } From 3f7889c4c79b0afc012026431d4db4bad4e84473 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:36 +0200 Subject: [PATCH 1228/2177] net: sched: cls_bpf: call block callbacks for offload Use the newly introduced callbacks infrastructure and call block callbacks alongside with the existing per-netdev ndo_setup_tc. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6c6b21f6ba62..e379fdf928bd 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -147,7 +147,10 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd) { + bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; struct net_device *dev = tp->q->dev_queue->dev; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(prog->gen_flags); struct tc_cls_bpf_offload cls_bpf = {}; int err; @@ -159,17 +162,38 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = prog->exts_integrated; cls_bpf.gen_flags = prog->gen_flags; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf); - if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) - prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_can_offload(dev)) { + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, + &cls_bpf); + if (addorrep) { + if (err) { + if (skip_sw) + return err; + } else { + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + } + } + } - return err; + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); + if (addorrep) { + if (err < 0) { + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + return err; + } else if (err > 0) { + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + } + } + + if (addorrep && skip_sw && !(prog->gen_flags && TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; + + return 0; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; bool skip_sw; @@ -179,7 +203,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, prog->gen_flags)) { + if (!tc_skip_hw(prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; @@ -188,14 +212,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, return -EINVAL; } } else { - if (!tc_should_offload(dev, prog->gen_flags)) + if (tc_skip_hw(prog->gen_flags)) return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } ret = cls_bpf_offload_cmd(tp, obj, cmd); if (ret) - return skip_sw ? ret : 0; + return ret; obj->offloaded = true; if (oldprog) From eb49cfaa6bfc164e53db72cde0058ef4768e05a8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:37 +0200 Subject: [PATCH 1229/2177] mlxsw: spectrum: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for matchall and flower offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 82 ++++++++++++++----- 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e6519f2906a4..7b60c75d96f4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1697,17 +1697,9 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *f) + struct tc_cls_matchall_offload *f, + bool ingress) { - bool ingress; - - if (is_classid_clsact_ingress(f->common.classid)) - ingress = true; - else if (is_classid_clsact_egress(f->common.classid)) - ingress = false; - else - return -EOPNOTSUPP; - if (f->common.chain_index) return -EOPNOTSUPP; @@ -1725,17 +1717,9 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + bool ingress) { - bool ingress; - - if (is_classid_clsact_ingress(f->common.classid)) - ingress = true; - else if (is_classid_clsact_egress(f->common.classid)) - ingress = false; - else - return -EOPNOTSUPP; - switch (f->command) { case TC_CLSFLOWER_REPLACE: return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f); @@ -1749,6 +1733,59 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, } } +static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port = cb_priv; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data, + ingress); + case TC_SETUP_CLSFLOWER: + return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data, + ingress); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, true); +} + +static int mlxsw_sp_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, false); +} + +static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_block_offload *f) +{ + tc_setup_cb_t *cb; + + if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + cb = mlxsw_sp_setup_tc_block_cb_ig; + else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + cb = mlxsw_sp_setup_tc_block_cb_eg; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cb, mlxsw_sp_port, + mlxsw_sp_port); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -1756,9 +1793,10 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_CLSMATCHALL: - return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data); case TC_SETUP_CLSFLOWER: - return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } From d6c862baaf9fb27a385a101c7e86b6112bfd8f07 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:38 +0200 Subject: [PATCH 1230/2177] mlx5e: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for flower offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 45 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 24 +++++----- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ca8845b641c0..e613ce02216d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1056,8 +1056,8 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); -int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data); +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv); /* mlx5e generic netdev management API */ struct net_device* diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3a1969a6d509..e8108688a7cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3083,13 +3083,10 @@ out: } #ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_setup_tc_cls_flower(struct net_device *dev, +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *cls_flower) { - struct mlx5e_priv *priv = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { @@ -3103,6 +3100,40 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev, return -EOPNOTSUPP; } } + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb, + priv, priv); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb, + priv); + return 0; + default: + return -EOPNOTSUPP; + } +} #endif int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -3111,7 +3142,9 @@ int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { #ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(dev, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return mlx5e_setup_tc_block(dev, type_data); #endif case TC_SETUP_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 765fc74fbb1b..4edd92d9e1de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -691,14 +691,6 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct net_device *dev = cb_priv; - - return mlx5e_setup_tc(dev, type, type_data); -} - bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -987,6 +979,7 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_priv *rpriv; struct net_device *netdev; + struct mlx5e_priv *upriv; int err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); @@ -1018,8 +1011,9 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } - err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb, - mlx5_eswitch_get_uplink_netdev(esw)); + upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw)); + err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb, + upriv); if (err) goto err_neigh_cleanup; @@ -1033,8 +1027,8 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) return 0; err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, - mlx5_eswitch_get_uplink_netdev(esw)); + tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + upriv); err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1055,10 +1049,12 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; void *ppriv = priv->ppriv; + struct mlx5e_priv *upriv; unregister_netdev(rep->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, - mlx5_eswitch_get_uplink_netdev(esw)); + upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw)); + tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); mlx5e_destroy_netdev(priv); From 9e0fd15dd6c981931a9e9f11dc0c940d17d6e051 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:39 +0200 Subject: [PATCH 1231/2177] bnxt: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for flower offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 37 +++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 3 +- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 43 +++++++++++++++++-- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5ba49938ba55..4dde2b816092 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7295,15 +7295,40 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) return 0; } -static int bnxt_setup_flower(struct net_device *dev, - struct tc_cls_flower_offload *cls_flower) +static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { - struct bnxt *bp = netdev_priv(dev); + struct bnxt *bp = cb_priv; if (BNXT_VF(bp)) return -EOPNOTSUPP; - return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower); + switch (type) { + case TC_SETUP_CLSFLOWER: + return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct bnxt *bp = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb, + bp, bp); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp); + return 0; + default: + return -EOPNOTSUPP; + } } static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -7311,7 +7336,9 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, { switch (type) { case TC_SETUP_CLSFLOWER: - return bnxt_setup_flower(dev, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return bnxt_setup_tc_block(dev, type_data); case TC_SETUP_MQPRIO: { struct tc_mqprio_qopt *mqprio = type_data; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 4730c048ed9b..a9cb653b4d29 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -748,8 +748,7 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, { int rc = 0; - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index e75db04c6cdc..cc278d7b56a4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -115,10 +115,11 @@ bnxt_vf_rep_get_stats64(struct net_device *dev, stats->tx_bytes = vf_rep->tx_stats.bytes; } -static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, + void *cb_priv) { - struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + struct bnxt_vf_rep *vf_rep = cb_priv; struct bnxt *bp = vf_rep->bp; int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid; @@ -130,6 +131,42 @@ static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int bnxt_vf_rep_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + bnxt_vf_rep_setup_tc_block_cb, + vf_rep, vf_rep); + return 0; + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + bnxt_vf_rep_setup_tc_block_cb, vf_rep); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return bnxt_vf_rep_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code) { u16 vf_idx; From cd019e91a837d83f599e0216794bdf1ad6fec053 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:40 +0200 Subject: [PATCH 1232/2177] cxgb4: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for flower and u32 offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c478291db93f..3cd82aea3bb6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2889,8 +2889,7 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) static int cxgb_setup_tc_flower(struct net_device *dev, struct tc_cls_flower_offload *cls_flower) { - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { @@ -2908,8 +2907,7 @@ static int cxgb_setup_tc_flower(struct net_device *dev, static int cxgb_setup_tc_cls_u32(struct net_device *dev, struct tc_cls_u32_offload *cls_u32) { - if (!is_classid_clsact_ingress(cls_u32->common.classid) || - cls_u32->common.chain_index) + if (cls_u32->common.chain_index) return -EOPNOTSUPP; switch (cls_u32->command) { @@ -2923,9 +2921,10 @@ static int cxgb_setup_tc_cls_u32(struct net_device *dev, } } -static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { + struct net_device *dev = cb_priv; struct port_info *pi = netdev2pinfo(dev); struct adapter *adap = netdev2adap(dev); @@ -2946,6 +2945,40 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int cxgb_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct port_info *pi = netdev2pinfo(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb, + pi, dev); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_CLSU32: + case TC_SETUP_CLSFLOWER: + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return cxgb_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + static netdev_features_t cxgb_fix_features(struct net_device *dev, netdev_features_t features) { From 6ea30f8a972c063c1072d7e7accc6a550a36770a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:41 +0200 Subject: [PATCH 1233/2177] ixgbe: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for u32 offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3e83edd10e23..38e01e0c8314 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9365,13 +9365,10 @@ free_jump: return err; } -static int ixgbe_setup_tc_cls_u32(struct net_device *dev, +static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter, struct tc_cls_u32_offload *cls_u32) { - struct ixgbe_adapter *adapter = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_u32->common.classid) || - cls_u32->common.chain_index) + if (cls_u32->common.chain_index) return -EOPNOTSUPP; switch (cls_u32->command) { @@ -9390,6 +9387,40 @@ static int ixgbe_setup_tc_cls_u32(struct net_device *dev, } } +static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct ixgbe_adapter *adapter = cb_priv; + + switch (type) { + case TC_SETUP_CLSU32: + return ixgbe_setup_tc_cls_u32(adapter, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int ixgbe_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int ixgbe_setup_tc_mqprio(struct net_device *dev, struct tc_mqprio_qopt *mqprio) { @@ -9402,7 +9433,9 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, { switch (type) { case TC_SETUP_CLSU32: - return ixgbe_setup_tc_cls_u32(dev, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return ixgbe_setup_tc_block(dev, type_data); case TC_SETUP_MQPRIO: return ixgbe_setup_tc_mqprio(dev, type_data); default: From 855afa0932086851e6c7df39ac3d2cd2247cc8ee Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:42 +0200 Subject: [PATCH 1234/2177] mlx5e_rep: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for flower offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 44 ++++++++++++++++--- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4edd92d9e1de..f59d81aa8a0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -659,13 +659,10 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, } static int -mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *cls_flower) { - struct mlx5e_priv *priv = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { @@ -680,12 +677,47 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, } } +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, + priv, priv); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(dev, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return mlx5e_rep_setup_tc_block(dev, type_data); default: return -EOPNOTSUPP; } From 363fc53b8b5803ce43debc1e66cebaaf509fd20a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:43 +0200 Subject: [PATCH 1235/2177] nfp: flower: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for flower offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/offload.c | 56 ++++++++++++++++--- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 6f239c27964e..f8523df827a6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -449,6 +449,10 @@ static int nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, struct tc_cls_flower_offload *flower) { + if (!eth_proto_is_802_3(flower->common.protocol) || + flower->common.chain_index) + return -EOPNOTSUPP; + switch (flower->command) { case TC_CLSFLOWER_REPLACE: return nfp_flower_add_offload(app, netdev, flower); @@ -461,16 +465,52 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } +static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct nfp_net *nn = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(nn->app, nn->port->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_flower_setup_tc_block(struct net_device *netdev, + struct tc_block_offload *f) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + nfp_flower_setup_tc_block_cb, + nn, nn); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + nfp_flower_setup_tc_block_cb, + nn); + return 0; + default: + return -EOPNOTSUPP; + } +} + int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data) { - struct tc_cls_flower_offload *cls_flower = type_data; - - if (type != TC_SETUP_CLSFLOWER || - !is_classid_clsact_ingress(cls_flower->common.classid) || - !eth_proto_is_802_3(cls_flower->common.protocol) || - cls_flower->common.chain_index) + switch (type) { + case TC_SETUP_CLSFLOWER: + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return nfp_flower_setup_tc_block(netdev, type_data); + default: return -EOPNOTSUPP; - - return nfp_flower_repr_offload(app, netdev, cls_flower); + } } From 90d97315b3e774450f06c035903fed246781fe35 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:44 +0200 Subject: [PATCH 1236/2177] nfp: bpf: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for bpf offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 60 +++++++++++++++---- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 6e74f8db1cc1..64f97b3f8949 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -114,22 +114,58 @@ static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) kfree(nn->app_priv); } +static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct tc_cls_bpf_offload *cls_bpf = type_data; + struct nfp_net *nn = cb_priv; + + switch (type) { + case TC_SETUP_CLSBPF: + if (!nfp_net_ebpf_capable(nn) || + cls_bpf->common.protocol != htons(ETH_P_ALL) || + cls_bpf->common.chain_index) + return -EOPNOTSUPP; + return nfp_net_bpf_offload(nn, cls_bpf); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_bpf_setup_tc_block(struct net_device *netdev, + struct tc_block_offload *f) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + nfp_bpf_setup_tc_block_cb, + nn, nn); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + nfp_bpf_setup_tc_block_cb, + nn); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data) { - struct tc_cls_bpf_offload *cls_bpf = type_data; - struct nfp_net *nn = netdev_priv(netdev); - - if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) || - !is_classid_clsact_ingress(cls_bpf->common.classid) || - cls_bpf->common.protocol != htons(ETH_P_ALL) || - cls_bpf->common.chain_index) + switch (type) { + case TC_SETUP_CLSBPF: + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return nfp_bpf_setup_tc_block(netdev, type_data); + default: return -EOPNOTSUPP; - - if (nn->dp.bpf_offload_xdp) - return -EBUSY; - - return nfp_net_bpf_offload(nn, cls_bpf); + } } static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) From 6b3eb752b4b9481868b3393f06a236a1aedfa43f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:45 +0200 Subject: [PATCH 1237/2177] dsa: Convert ndo_setup_tc offloads to block callbacks Benefit from the newly introduced block callback infrastructure and convert ndo_setup_tc calls for matchall offloads to block callbacks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dsa/slave.c | 64 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6906de0f0050..80142918d5d1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -777,17 +777,9 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, } static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls) + struct tc_cls_matchall_offload *cls, + bool ingress) { - bool ingress; - - if (is_classid_clsact_ingress(cls->common.classid)) - ingress = true; - else if (is_classid_clsact_egress(cls->common.classid)) - ingress = false; - else - return -EOPNOTSUPP; - if (cls->common.chain_index) return -EOPNOTSUPP; @@ -802,12 +794,62 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, } } +static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) +{ + struct net_device *dev = cb_priv; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); + default: + return -EOPNOTSUPP; + } +} + +static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true); +} + +static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false); +} + +static int dsa_slave_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + tc_setup_cb_t *cb; + + if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + cb = dsa_slave_setup_tc_block_cb_ig; + else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + cb = dsa_slave_setup_tc_block_cb_eg; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cb, dev, dev); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cb, dev); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_CLSMATCHALL: - return dsa_slave_setup_tc_cls_matchall(dev, type_data); + return 0; /* will be removed after conversion from ndo */ + case TC_SETUP_BLOCK: + return dsa_slave_setup_tc_block(dev, type_data); default: return -EOPNOTSUPP; } From 8d26d5636dff9fca30816579910aaa9a55b4d96d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:46 +0200 Subject: [PATCH 1238/2177] net: sched: avoid ndo_setup_tc calls for TC_SETUP_CLS* All drivers are converted to use block callbacks for TC_SETUP_CLS*. So it is now safe to remove the calls to ndo_setup_tc from cls_* Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 2 -- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 -- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 -- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 2 -- .../net/ethernet/mellanox/mlxsw/spectrum.c | 3 -- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 -- .../ethernet/netronome/nfp/flower/offload.c | 2 -- net/dsa/slave.c | 2 -- net/sched/cls_bpf.c | 14 --------- net/sched/cls_flower.c | 20 ------------ net/sched/cls_matchall.c | 16 ---------- net/sched/cls_u32.c | 31 ------------------- 14 files changed, 103 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4dde2b816092..22a94b16ebfa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7335,8 +7335,6 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return bnxt_setup_tc_block(dev, type_data); case TC_SETUP_MQPRIO: { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index cc278d7b56a4..6dff5aa57f16 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -158,8 +158,6 @@ static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return bnxt_vf_rep_setup_tc_block(dev, type_data); default: diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3cd82aea3bb6..e16078ddb39f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2969,9 +2969,6 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSU32: - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return cxgb_setup_tc_block(dev, type_data); default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 38e01e0c8314..7f503d35eb1c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9432,8 +9432,6 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSU32: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return ixgbe_setup_tc_block(dev, type_data); case TC_SETUP_MQPRIO: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e8108688a7cf..560b208c0483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3141,8 +3141,6 @@ int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, { switch (type) { #ifdef CONFIG_MLX5_ESWITCH - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return mlx5e_setup_tc_block(dev, type_data); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f59d81aa8a0c..0edb7065d811 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -714,8 +714,6 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return mlx5e_rep_setup_tc_block(dev, type_data); default: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7b60c75d96f4..4d73a6f7759e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1792,9 +1792,6 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); switch (type) { - case TC_SETUP_CLSMATCHALL: - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); default: diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 64f97b3f8949..fa0ac90ed956 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -159,8 +159,6 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSBPF: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return nfp_bpf_setup_tc_block(netdev, type_data); default: diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index f8523df827a6..c47753fdb55b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -506,8 +506,6 @@ int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return nfp_flower_setup_tc_block(netdev, type_data); default: diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 80142918d5d1..d0ae7010ea45 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -846,8 +846,6 @@ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSMATCHALL: - return 0; /* will be removed after conversion from ndo */ case TC_SETUP_BLOCK: return dsa_slave_setup_tc_block(dev, type_data); default: diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index e379fdf928bd..0f8b51061c39 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -148,7 +148,6 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd) { bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(prog->gen_flags); struct tc_cls_bpf_offload cls_bpf = {}; @@ -162,19 +161,6 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = prog->exts_integrated; cls_bpf.gen_flags = prog->gen_flags; - if (tc_can_offload(dev)) { - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, - &cls_bpf); - if (addorrep) { - if (err) { - if (skip_sw) - return err; - } else { - prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; - } - } - } - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); if (addorrep) { if (err < 0) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 76b4e0a1c92f..16f58abaa697 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -200,16 +200,12 @@ static void fl_destroy_filter(struct rcu_head *head) static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; - if (tc_can_offload(dev)) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - &cls_flower); tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); } @@ -219,7 +215,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct fl_flow_key *mask, struct cls_fl_filter *f) { - struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); @@ -233,17 +228,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; - if (tc_can_offload(dev)) { - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - &cls_flower); - if (err) { - if (skip_sw) - return err; - } else { - f->flags |= TCA_CLS_FLAGS_IN_HW; - } - } - err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { @@ -262,7 +246,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { struct tc_cls_flower_offload cls_flower = {}; - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; tc_cls_common_offload_init(&cls_flower.common, tp); @@ -270,9 +253,6 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; - if (tc_can_offload(dev)) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - &cls_flower); tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 5278534c7e87..70e78d74f6d3 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -54,7 +54,6 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie) { - struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; @@ -62,9 +61,6 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - if (tc_can_offload(dev)) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, - &cls_mall); tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); } @@ -72,7 +68,6 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie) { - struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(head->flags); @@ -83,17 +78,6 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.exts = &head->exts; cls_mall.cookie = cookie; - if (tc_can_offload(dev)) { - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, - &cls_mall); - if (err) { - if (skip_sw) - return err; - } else { - head->flags |= TCA_CLS_FLAGS_IN_HW; - } - } - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); if (err < 0) { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d53da7968eda..9ff17159fb61 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -464,7 +464,6 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; @@ -474,15 +473,12 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - if (tc_can_offload(dev)) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); @@ -495,17 +491,6 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - if (tc_can_offload(dev)) { - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, - &cls_u32); - if (err) { - if (skip_sw) - return err; - } else { - offloaded = true; - } - } - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { u32_clear_hw_hnode(tp, h); @@ -522,7 +507,6 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; @@ -530,15 +514,12 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = handle; - if (tc_can_offload(dev)) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); @@ -560,18 +541,6 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = n->ht_down->handle; - - if (tc_can_offload(dev)) { - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, - &cls_u32); - if (err) { - if (skip_sw) - return err; - } else { - n->flags |= TCA_CLS_FLAGS_IN_HW; - } - } - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { u32_remove_hw_knode(tp, n->handle); From d58d31a118690b578897749feda48416ac10ca43 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:47 +0200 Subject: [PATCH 1239/2177] net: sched: remove unused classid field from tc_cls_common_offload It is no longer used by the drivers, so remove it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index fcca5a9d9880..04caa246e747 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -561,7 +561,6 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; - u32 classid; }; static inline void @@ -571,7 +570,6 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; - cls_common->classid = tp->classid; } struct tc_cls_u32_knode { From fa71212e91811ac67014ad19d4fc3b3c3446ccf7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:48 +0200 Subject: [PATCH 1240/2177] net: sched: remove unused is_classid_clsact_ingress/egress helpers These helpers are no longer in use by drivers, so remove them. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2d234af15f3e..b8ecafce4ba1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -135,19 +135,6 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } -static inline bool is_classid_clsact_ingress(u32 classid) -{ - /* This also returns true for ingress qdisc */ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); -} - -static inline bool is_classid_clsact_egress(u32 classid) -{ - return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && - TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); -} - static inline struct net *qdisc_net(struct Qdisc *q) { return dev_net(q->dev_queue->dev); From cb4dc41eaad0cb336bb5ddd379ae0d2cc89cb62b Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 19 Oct 2017 16:42:04 +0200 Subject: [PATCH 1241/2177] tipc: fix broken tipc_poll() function In commit ae236fb208a6 ("tipc: receive group membership events via member socket") we broke the tipc_poll() function by checking the state of the receive queue before the call to poll_sock_wait(), while relying that state afterwards, when it might have changed. We restore this in this commit. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2bbab4fe2f53..357954ceb25c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -714,7 +714,6 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; u32 revents = 0; @@ -733,7 +732,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, /* fall thru' */ case TIPC_LISTEN: case TIPC_CONNECTING: - if (skb) + if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: @@ -742,7 +741,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; - if (!skb) + if (skb_queue_empty(&sk->sk_receive_queue)) break; revents |= POLLIN | POLLRDNORM; break; From 3c467bf399106030d5a97d844ee119caec04e817 Mon Sep 17 00:00:00 2001 From: Steve Lin Date: Thu, 19 Oct 2017 10:45:56 -0400 Subject: [PATCH 1242/2177] bnxt: Move generic devlink code to new file Moving generic devlink code (registration) out of VF-R code into new bnxt_devlink file, in preparation for future work to add additional devlink functionality to bnxt. Signed-off-by: Steve Lin Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/Makefile | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 65 +++++++++++++++++++ .../net/ethernet/broadcom/bnxt/bnxt_devlink.h | 39 +++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 53 +-------------- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h | 37 +---------- 6 files changed, 112 insertions(+), 85 deletions(-) create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index 457201f409a7..59c8ec9c1cad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o +bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 22a94b16ebfa..b31bdec26fce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -61,6 +61,7 @@ #include "bnxt_xdp.h" #include "bnxt_vfr.h" #include "bnxt_tc.h" +#include "bnxt_devlink.h" #define BNXT_TX_TIMEOUT (5 * HZ) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c new file mode 100644 index 000000000000..f3f6aa868d6c --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -0,0 +1,65 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include +#include +#include "bnxt_hsi.h" +#include "bnxt.h" +#include "bnxt_vfr.h" +#include "bnxt_devlink.h" + +static const struct devlink_ops bnxt_dl_ops = { +#ifdef CONFIG_BNXT_SRIOV + .eswitch_mode_set = bnxt_dl_eswitch_mode_set, + .eswitch_mode_get = bnxt_dl_eswitch_mode_get, +#endif /* CONFIG_BNXT_SRIOV */ +}; + +int bnxt_dl_register(struct bnxt *bp) +{ + struct devlink *dl; + int rc; + + if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) + return 0; + + if (bp->hwrm_spec_code < 0x10800) { + netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); + return -ENOTSUPP; + } + + dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + if (!dl) { + netdev_warn(bp->dev, "devlink_alloc failed"); + return -ENOMEM; + } + + bnxt_link_bp_to_dl(bp, dl); + bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + rc = devlink_register(dl, &bp->pdev->dev); + if (rc) { + bnxt_link_bp_to_dl(bp, NULL); + devlink_free(dl); + netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); + return rc; + } + + return 0; +} + +void bnxt_dl_unregister(struct bnxt *bp) +{ + struct devlink *dl = bp->dl; + + if (!dl) + return; + + devlink_unregister(dl); + devlink_free(dl); +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h new file mode 100644 index 000000000000..e92a35d8b642 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -0,0 +1,39 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_DEVLINK_H +#define BNXT_DEVLINK_H + +/* Struct to hold housekeeping info needed by devlink interface */ +struct bnxt_dl { + struct bnxt *bp; /* back ptr to the controlling dev */ +}; + +static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) +{ + return ((struct bnxt_dl *)devlink_priv(dl))->bp; +} + +/* To clear devlink pointer from bp, pass NULL dl */ +static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) +{ + bp->dl = dl; + + /* add a back pointer in dl to bp */ + if (dl) { + struct bnxt_dl *bp_dl = devlink_priv(dl); + + bp_dl->bp = bp; + } +} + +int bnxt_dl_register(struct bnxt *bp); +void bnxt_dl_unregister(struct bnxt *bp); + +#endif /* BNXT_DEVLINK_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 6dff5aa57f16..4ae935999ebe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -16,6 +16,7 @@ #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_vfr.h" +#include "bnxt_devlink.h" #include "bnxt_tc.h" #ifdef CONFIG_BNXT_SRIOV @@ -451,7 +452,7 @@ err: } /* Devlink related routines */ -static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) +int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct bnxt *bp = bnxt_get_bp_from_dl(devlink); @@ -459,7 +460,7 @@ static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) return 0; } -static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode) +int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode) { struct bnxt *bp = bnxt_get_bp_from_dl(devlink); int rc = 0; @@ -497,52 +498,4 @@ done: return rc; } -static const struct devlink_ops bnxt_dl_ops = { - .eswitch_mode_set = bnxt_dl_eswitch_mode_set, - .eswitch_mode_get = bnxt_dl_eswitch_mode_get -}; - -int bnxt_dl_register(struct bnxt *bp) -{ - struct devlink *dl; - int rc; - - if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) - return 0; - - if (bp->hwrm_spec_code < 0x10800) { - netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); - return -ENOTSUPP; - } - - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); - if (!dl) { - netdev_warn(bp->dev, "devlink_alloc failed"); - return -ENOMEM; - } - - bnxt_link_bp_to_dl(bp, dl); - bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); - if (rc) { - bnxt_link_bp_to_dl(bp, NULL); - devlink_free(dl); - netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); - return rc; - } - - return 0; -} - -void bnxt_dl_unregister(struct bnxt *bp) -{ - struct devlink *dl = bp->dl; - - if (!dl) - return; - - devlink_unregister(dl); - devlink_free(dl); -} - #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h index 7787cd24606a..fb06bbe70e42 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -14,31 +14,6 @@ #define MAX_CFA_CODE 65536 -/* Struct to hold housekeeping info needed by devlink interface */ -struct bnxt_dl { - struct bnxt *bp; /* back ptr to the controlling dev */ -}; - -static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) -{ - return ((struct bnxt_dl *)devlink_priv(dl))->bp; -} - -/* To clear devlink pointer from bp, pass NULL dl */ -static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) -{ - bp->dl = dl; - - /* add a back pointer in dl to bp */ - if (dl) { - struct bnxt_dl *bp_dl = devlink_priv(dl); - - bp_dl->bp = bp; - } -} - -int bnxt_dl_register(struct bnxt *bp); -void bnxt_dl_unregister(struct bnxt *bp); void bnxt_vf_reps_destroy(struct bnxt *bp); void bnxt_vf_reps_close(struct bnxt *bp); void bnxt_vf_reps_open(struct bnxt *bp); @@ -53,17 +28,11 @@ static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev) return bp->pf.vf[vf_rep->vf_idx].fw_fid; } +int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode); + #else -static inline int bnxt_dl_register(struct bnxt *bp) -{ - return 0; -} - -static inline void bnxt_dl_unregister(struct bnxt *bp) -{ -} - static inline void bnxt_vf_reps_close(struct bnxt *bp) { } From e28101a37c3e6b74aeebe4a340636b1782deb830 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Oct 2017 12:43:08 -0500 Subject: [PATCH 1243/2177] net: netrom: nr_in: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/netrom/nr_in.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 80dbd0beb516..fbfdae452ff9 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, case NR_DISCREQ: nr_write_internal(sk, NR_DISCACK); - + /* fall through */ case NR_DISCACK: nr_disconnect(sk, 0); break; From 279badc2a85be83e0187b8c566e3b476b76a87a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Oct 2017 12:55:03 -0500 Subject: [PATCH 1244/2177] openvswitch: conntrack: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I placed a "fall through" comment on its own line, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fe861e2f0deb..b27c5c6d9cab 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } } /* Non-ICMP, fall thru to initialize if needed. */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. From a05b8c43ac3cc156600a60bf6dcd30109bd42a73 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Oct 2017 13:03:51 -0500 Subject: [PATCH 1245/2177] net: rose: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/rose/rose_in.c | 1 + net/rose/rose_route.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 0a6394754e81..9bbbfe325c5a 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety switch (frametype) { case ROSE_RESET_REQUEST: rose_write_internal(sk, ROSE_RESET_CONFIRMATION); + /* fall through */ case ROSE_RESET_CONFIRMATION: rose_stop_timer(sk); rose_start_idletimer(sk); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 65921cd10323..8ca3124df83f 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, case 0: rose_node->neighbour[0] = rose_node->neighbour[1]; + /* fall through */ case 1: rose_node->neighbour[1] = rose_node->neighbour[2]; @@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev) switch (i) { case 0: t->neighbour[0] = t->neighbour[1]; + /* fall through */ case 1: t->neighbour[1] = t->neighbour[2]; case 2: From f3ae608edb3be2e9a3f668d47aced3553eaf6c14 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Oct 2017 16:28:24 -0500 Subject: [PATCH 1246/2177] net: sched: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_hfsc.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_multiq.c | 1 + net/sched/sch_prio.c | 1 + net/sched/sch_qfq.c | 1 + net/sched/sch_sfb.c | 1 + net/sched/sch_sfq.c | 1 + 10 files changed, 10 insertions(+) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index c3b92d62190e..6361be7881f1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -255,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 753dc7a77b60..5bbcef3dcd8c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -321,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 3c40edeff1e8..0305d791ea94 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index a692184bd333..d04068a97d81 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1144,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 57be73c0e1d2..fa0380730ff0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -244,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 31e0a284eeff..012216386c0b 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 95fad348c8d7..2c79559a0d31 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8694c7b6d2b1..6962b37a3ad3 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -709,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 487d375f5a06..0678debdd856 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return false; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 5a9c95149c5c..890f4a4564e7 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -190,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } From 9cbe1f581d17baff7e93936feb041c90b29eb6a8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:19 -0700 Subject: [PATCH 1247/2177] tools: bpftool: add pointer to file argument to print_hex() Make print_hex() able to print to any file instead of standard output only, and rename it to fprint_hex(). The function can now be called with the info() macro, for example, without splitting the output between standard and error outputs. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/main.c | 8 ++++---- tools/bpf/bpftool/main.h | 2 +- tools/bpf/bpftool/map.c | 20 ++++++++++---------- tools/bpf/bpftool/prog.c | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index e02d00d6e00b..8662199ee050 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -100,7 +100,7 @@ bool is_prefix(const char *pfx, const char *str) return !memcmp(str, pfx, strlen(pfx)); } -void print_hex(void *arg, unsigned int n, const char *sep) +void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep) { unsigned char *data = arg; unsigned int i; @@ -111,13 +111,13 @@ void print_hex(void *arg, unsigned int n, const char *sep) if (!i) /* nothing */; else if (!(i % 16)) - printf("\n"); + fprintf(f, "\n"); else if (!(i % 8)) - printf(" "); + fprintf(f, " "); else pfx = sep; - printf("%s%02hhx", i ? pfx : "", data[i]); + fprintf(f, "%s%02hhx", i ? pfx : "", data[i]); } } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 844e4ef6db56..41e6c7d3fcad 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -67,7 +67,7 @@ enum bpf_obj_type { extern const char *bin_name; bool is_prefix(const char *pfx, const char *str); -void print_hex(void *arg, unsigned int n, const char *sep); +void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); struct cmd { diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 0528a5379e6c..b1dad76215ed 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -216,12 +216,12 @@ static void print_entry(struct bpf_map_info *info, unsigned char *key, !break_names; printf("key:%c", break_names ? '\n' : ' '); - print_hex(key, info->key_size, " "); + fprint_hex(stdout, key, info->key_size, " "); printf(single_line ? " " : "\n"); printf("value:%c", break_names ? '\n' : ' '); - print_hex(value, info->value_size, " "); + fprint_hex(stdout, value, info->value_size, " "); printf("\n"); } else { @@ -230,13 +230,13 @@ static void print_entry(struct bpf_map_info *info, unsigned char *key, n = get_possible_cpus(); printf("key:\n"); - print_hex(key, info->key_size, " "); + fprint_hex(stdout, key, info->key_size, " "); printf("\n"); for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - print_hex(value + i * info->value_size, - info->value_size, " "); + fprint_hex(stdout, value + i * info->value_size, + info->value_size, " "); printf("\n"); } } @@ -492,8 +492,8 @@ static int do_dump(int argc, char **argv) print_entry(&info, key, value); } else { info("can't lookup element with key: "); - print_hex(key, info.key_size, " "); - printf("\n"); + fprint_hex(stderr, key, info.key_size, " "); + fprintf(stderr, "\n"); } prev_key = key; @@ -587,7 +587,7 @@ static int do_lookup(int argc, char **argv) print_entry(&info, key, value); } else if (errno == ENOENT) { printf("key:\n"); - print_hex(key, info.key_size, " "); + fprint_hex(stdout, key, info.key_size, " "); printf("\n\nNot found\n"); } else { err("lookup failed: %s\n", strerror(errno)); @@ -642,14 +642,14 @@ static int do_getnext(int argc, char **argv) if (key) { printf("key:\n"); - print_hex(key, info.key_size, " "); + fprint_hex(stdout, key, info.key_size, " "); printf("\n"); } else { printf("key: None\n"); } printf("next key:\n"); - print_hex(nextkey, info.key_size, " "); + fprint_hex(stdout, nextkey, info.key_size, " "); printf("\n"); exit_free: diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d60f5307b6e2..aa6d72ea3807 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -224,7 +224,7 @@ static int show_prog(int fd) printf("name %s ", info.name); printf("tag "); - print_hex(info.tag, BPF_TAG_SIZE, ""); + fprint_hex(stdout, info.tag, BPF_TAG_SIZE, ""); printf("\n"); if (info.load_time) { @@ -319,7 +319,7 @@ static void dump_xlated(void *buf, unsigned int len, bool opcodes) if (opcodes) { printf(" "); - print_hex(insn + i, 8, " "); + fprint_hex(stdout, insn + i, 8, " "); printf("\n"); } From 1739c26da72c4170c86c368c75133adbb740efef Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:20 -0700 Subject: [PATCH 1248/2177] tools: bpftool: fix return value when all eBPF programs have been shown Change the program to have a more consistent return code. Specifically, do not make bpftool return an error code simply because it reaches the end of the list of the eBPF programs to show. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index aa6d72ea3807..ede7957adcd9 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -275,8 +275,10 @@ static int do_show(int argc, char **argv) while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { - if (errno == ENOENT) + if (errno == ENOENT) { + err = 0; break; + } err("can't get next program: %s\n", strerror(errno)); if (errno == EINVAL) err("kernel too old?\n"); From 1d84487e2a2b98892c3dec7934604e9b76577aa6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:21 -0700 Subject: [PATCH 1249/2177] tools: bpftool: use err() instead of info() if there are too many insns Make error messages and return codes more consistent. Specifically, replace the use of info() macro with err() when too many eBPF instructions are received to be dumped, given that bpftool returns with a non-null exit value in that case. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ede7957adcd9..6c03d2ea3f79 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -416,7 +416,7 @@ static int do_dump(int argc, char **argv) } if (*member_len > buf_size) { - info("too many instructions returned\n"); + err("too many instructions returned\n"); goto err_free; } From 9f606179c84930ada1e347b6d84bb913c8492fec Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:22 -0700 Subject: [PATCH 1250/2177] tools: bpftool: add `bpftool prog help` as real command i.r.t exit code Make error messages and return codes more consistent. Specifically, make `bpftool prog help` a real command, instead of printing usage by default for a non-recognized "help" command. Output is the same, but this makes bpftool return with a success value instead of an error. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 6c03d2ea3f79..355c14325622 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -475,6 +475,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, + { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, { 0 } From d9c0b48db9f4b870535b6d7255638347e770633f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:23 -0700 Subject: [PATCH 1251/2177] tools: bpftool: print only one error message on byte parsing failure Make error messages more consistent. Specifically, when bpftool fails at parsing map key bytes, make it print a single error message to stderr and return from the function, instead of (always) printing a second error message afterwards. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b1dad76215ed..e1004d825392 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -252,7 +252,7 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, val[i] = strtoul(argv[i], &endptr, 0); if (*endptr) { err("error parsing byte: %s\n", argv[i]); - break; + return NULL; } i++; } From 9e2308c133a92ff98d1397149c8483858bcf8fc0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:24 -0700 Subject: [PATCH 1252/2177] tools: bpftool: print all relevant byte opcodes for "load double word" The eBPF instruction permitting to load double words (8 bytes) into a register need 8-byte long "immediate" field, and thus occupy twice the space of other instructions. bpftool was aware of this and would increment the instruction counter only once on meeting such instruction, but it would only print the first four bytes of the immediate value to load. Make it able to dump the whole 16 byte-long double instruction instead (as would `llvm-objdump -d `). Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 355c14325622..57edbea2fbe8 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -313,20 +313,29 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) static void dump_xlated(void *buf, unsigned int len, bool opcodes) { struct bpf_insn *insn = buf; + bool double_insn = false; unsigned int i; for (i = 0; i < len / sizeof(*insn); i++) { + if (double_insn) { + double_insn = false; + continue; + } + + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); + printf("% 4d: ", i); print_bpf_insn(print_insn, NULL, insn + i, true); if (opcodes) { printf(" "); fprint_hex(stdout, insn + i, 8, " "); + if (double_insn && i < len - 1) { + printf(" "); + fprint_hex(stdout, insn + i + 1, 8, " "); + } printf("\n"); } - - if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) - i++; } } From 8dfbc6d1d213df340e5dcfdcdc76ad9407a29273 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:25 -0700 Subject: [PATCH 1253/2177] tools: bpftool: show that `opcodes` or `file FILE` should be exclusive For the `bpftool prog dump { jited | xlated } ...` command, adding `opcodes` keyword (to request opcodes to be printed) will have no effect if `file FILE` (to write binary output to FILE) is provided. The manual page and the help message to be displayed in the terminal should reflect that, and indicate that these options should be mutually exclusive. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 8 ++++---- tools/bpf/bpftool/prog.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 3968f0bd37db..69b3770370c8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -11,8 +11,8 @@ SYNOPSIS ======== | **bpftool** prog show [*PROG*] -| **bpftool** prog dump xlated *PROG* [file *FILE*] [opcodes] -| **bpftool** prog dump jited *PROG* [file *FILE*] [opcodes] +| **bpftool** prog dump xlated *PROG* [{file *FILE* | opcodes }] +| **bpftool** prog dump jited *PROG* [{file *FILE* | opcodes }] | **bpftool** prog pin *PROG* *FILE* | **bpftool** prog help | @@ -28,14 +28,14 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). - **bpftool prog dump xlated** *PROG* [**file** *FILE*] [**opcodes**] + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }] Dump eBPF instructions of the program from the kernel. If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. **opcodes** controls if raw opcodes will be printed. - **bpftool prog dump jited** *PROG* [**file** *FILE*] [**opcodes**] + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 57edbea2fbe8..7838206a455b 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -469,8 +469,8 @@ static int do_help(int argc, char **argv) { fprintf(stderr, "Usage: %s %s show [PROG]\n" - " %s %s dump xlated PROG [file FILE] [opcodes]\n" - " %s %s dump jited PROG [file FILE] [opcodes]\n" + " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" + " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" " %s %s help\n" "\n" From 821cfbb0dcfbb24506dc6958361ca2b80b928049 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 19 Oct 2017 15:46:26 -0700 Subject: [PATCH 1254/2177] tools: bpftool: add a command to display bpftool version This command can be used to print the version of the tool, which is in fact the version from Linux taken from usr/include/linux/version.h. Example usage: $ bpftool version bpftool v4.14.0 Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/Documentation/bpftool.rst | 2 ++ tools/bpf/bpftool/main.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index f1df1893fb54..45ad8baf1915 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -14,6 +14,8 @@ SYNOPSIS **bpftool** batch file *FILE* + **bpftool** version + *OBJECT* := { **map** | **program** } *MAP-COMMANDS* := diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 8662199ee050..814d19e1b53f 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -62,13 +63,23 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s OBJECT { COMMAND | help }\n" " %s batch file FILE\n" + " %s version\n" "\n" " OBJECT := { prog | map }\n", - bin_name, bin_name); + bin_name, bin_name, bin_name); return 0; } +static int do_version(int argc, char **argv) +{ + printf("%s v%d.%d.%d\n", bin_name, + LINUX_VERSION_CODE >> 16, + LINUX_VERSION_CODE >> 8 & 0xf, + LINUX_VERSION_CODE & 0xf); + return 0; +} + int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)) { @@ -128,6 +139,7 @@ static const struct cmd cmds[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "version", do_version }, { 0 } }; From d43e5aca8780e4084cad0969c71669cf99dc6030 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 20 Oct 2017 10:19:21 +0800 Subject: [PATCH 1255/2177] net: hns3: Refactor the skb receiving and transmitting function This patch refactors the skb receiving and transmitting functions and export them in order to support the ethtool's mac loopback selftest. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 17 ++++++++++++----- .../ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 4 ++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 8fa4e658b273..8383d6726ae4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -900,8 +900,7 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig) } } -static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, - struct net_device *netdev) +netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_nic_ring_data *ring_data = @@ -1943,6 +1942,11 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, } } +static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb) +{ + napi_gro_receive(&ring->tqp_vector->napi, skb); +} + static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, struct sk_buff **out_skb, int *out_bnum) { @@ -2077,7 +2081,9 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, return 0; } -static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget) +int hns3_clean_rx_ring( + struct hns3_enet_ring *ring, int budget, + void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)) { #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 struct net_device *netdev = ring->tqp->handle->kinfo.netdev; @@ -2115,7 +2121,7 @@ static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget) /* Do update ip stack process */ skb->protocol = eth_type_trans(skb, netdev); - (void)napi_gro_receive(&ring->tqp_vector->napi, skb); + rx_fn(ring, skb); recv_pkts++; } @@ -2258,7 +2264,8 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget) rx_budget = max(budget / tqp_vector->num_tqps, 1); hns3_for_each_ring(ring, tqp_vector->rx_group) { - int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget); + int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget, + hns3_rx_skb); if (rx_cleaned >= rx_budget) clean_complete = false; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 66599890b4d4..6228b2603d93 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -597,6 +597,10 @@ void hns3_ethtool_set_ops(struct net_device *netdev); int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); int hns3_init_all_ring(struct hns3_nic_priv *priv); int hns3_uninit_all_ring(struct hns3_nic_priv *priv); +netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev); +int hns3_clean_rx_ring( + struct hns3_enet_ring *ring, int budget, + void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)); #ifdef CONFIG_HNS3_DCB void hns3_dcbnl_setup(struct hnae3_handle *handle); From c39c4d98dc658f5d44b96982333f3611d9cc2be7 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 20 Oct 2017 10:19:22 +0800 Subject: [PATCH 1256/2177] net: hns3: Add mac loopback selftest support in hns3 driver This patch adds mac loopback selftest support for ethtool cmd by checking if a transmitted packet can be received correctly when mac loopback is enabled. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 54 ++++ .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 273 ++++++++++++++++++ 2 files changed, 327 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6e93943c489a..8508521c26e8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3149,6 +3149,59 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) "mac enable fail, ret =%d.\n", ret); } +static int hclge_set_loopback(struct hnae3_handle *handle, + enum hnae3_loop loop_mode, bool en) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_config_mac_mode_cmd *req; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u32 loop_en; + int ret; + + switch (loop_mode) { + case HNAE3_MAC_INTER_LOOP_MAC: + req = (struct hclge_config_mac_mode_cmd *)&desc.data[0]; + /* 1 Read out the MAC mode config at first */ + hclge_cmd_setup_basic_desc(&desc, + HCLGE_OPC_CONFIG_MAC_MODE, + true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "mac loopback get fail, ret =%d.\n", + ret); + return ret; + } + + /* 2 Then setup the loopback flag */ + loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en); + if (en) + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 1); + else + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0); + + req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); + + /* 3 Config mac work mode with loopback flag + * and its original configure parameters + */ + hclge_cmd_reuse_desc(&desc, false); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "mac loopback set fail, ret =%d.\n", ret); + break; + default: + ret = -ENOTSUPP; + dev_err(&hdev->pdev->dev, + "loop_mode %d is not supported\n", loop_mode); + break; + } + + return ret; +} + static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, int stream_id, bool enable) { @@ -4485,6 +4538,7 @@ static const struct hnae3_ae_ops hclge_ops = { .unmap_ring_from_vector = hclge_unmap_ring_from_vector, .get_vector = hclge_get_vector, .set_promisc_mode = hclge_set_promisc_mode, + .set_loopback = hclge_set_loopback, .start = hclge_ae_start, .stop = hclge_ae_stop, .get_status = hclge_get_status, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index ddbd7f30c6a4..6c469e49a04f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -59,6 +59,16 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT) +#define HNS3_SELF_TEST_TPYE_NUM 1 +#define HNS3_NIC_LB_TEST_PKT_NUM 1 +#define HNS3_NIC_LB_TEST_RING_ID 0 +#define HNS3_NIC_LB_TEST_PACKET_SIZE 128 + +/* Nic loopback test err */ +#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 +#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 +#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 + struct hns3_link_mode_mapping { u32 hns3_link_mode; u32 ethtool_link_mode; @@ -77,6 +87,268 @@ static const struct hns3_link_mode_mapping hns3_lm_map[] = { {HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT}, }; +static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->set_loopback || + !h->ae_algo->ops->set_promisc_mode) + return -EOPNOTSUPP; + + switch (loop) { + case HNAE3_MAC_INTER_LOOP_MAC: + ret = h->ae_algo->ops->set_loopback(h, loop, true); + break; + case HNAE3_MAC_LOOP_NONE: + ret = h->ae_algo->ops->set_loopback(h, + HNAE3_MAC_INTER_LOOP_MAC, false); + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret) + return ret; + + if (loop == HNAE3_MAC_LOOP_NONE) + h->ae_algo->ops->set_promisc_mode(h, ndev->flags & IFF_PROMISC); + else + h->ae_algo->ops->set_promisc_mode(h, 1); + + return ret; +} + +static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->start) + return -EOPNOTSUPP; + + ret = h->ae_algo->ops->start(h); + if (ret) { + netdev_err(ndev, + "hns3_lb_up ae start return error: %d\n", ret); + return ret; + } + + ret = hns3_lp_setup(ndev, loop_mode); + usleep_range(10000, 20000); + + return ret; +} + +static int hns3_lp_down(struct net_device *ndev) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->stop) + return -EOPNOTSUPP; + + ret = hns3_lp_setup(ndev, HNAE3_MAC_LOOP_NONE); + if (ret) { + netdev_err(ndev, "lb_setup return error: %d\n", ret); + return ret; + } + + h->ae_algo->ops->stop(h); + usleep_range(10000, 20000); + + return 0; +} + +static void hns3_lp_setup_skb(struct sk_buff *skb) +{ + struct net_device *ndev = skb->dev; + unsigned char *packet; + struct ethhdr *ethh; + unsigned int i; + + skb_reserve(skb, NET_IP_ALIGN); + ethh = skb_put(skb, sizeof(struct ethhdr)); + packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE); + + memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_ARP); + skb_reset_mac_header(skb); + + for (i = 0; i < HNS3_NIC_LB_TEST_PACKET_SIZE; i++) + packet[i] = (unsigned char)(i & 0xff); +} + +static void hns3_lb_check_skb_data(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + struct hns3_enet_tqp_vector *tqp_vector = ring->tqp_vector; + unsigned char *packet = skb->data; + u32 i; + + for (i = 0; i < skb->len; i++) + if (packet[i] != (unsigned char)(i & 0xff)) + break; + + /* The packet is correctly received */ + if (i == skb->len) + tqp_vector->rx_group.total_packets++; + else + print_hex_dump(KERN_ERR, "selftest:", DUMP_PREFIX_OFFSET, 16, 1, + skb->data, skb->len, true); + + dev_kfree_skb_any(skb); +} + +static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget) +{ + struct hnae3_handle *h = priv->ae_handle; + struct hnae3_knic_private_info *kinfo; + u32 i, rcv_good_pkt_total = 0; + + kinfo = &h->kinfo; + for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) { + struct hns3_enet_ring *ring = priv->ring_data[i].ring; + struct hns3_enet_ring_group *rx_group; + u64 pre_rx_pkt; + + rx_group = &ring->tqp_vector->rx_group; + pre_rx_pkt = rx_group->total_packets; + + hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data); + + rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt); + rx_group->total_packets = pre_rx_pkt; + } + return rcv_good_pkt_total; +} + +static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid, + u32 end_ringid, u32 budget) +{ + u32 i; + + for (i = start_ringid; i <= end_ringid; i++) { + struct hns3_enet_ring *ring = priv->ring_data[i].ring; + + hns3_clean_tx_ring(ring, budget); + } +} + +/** + * hns3_lp_run_test - run loopback test + * @ndev: net device + * @mode: loopback type + */ +static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct sk_buff *skb; + u32 i, good_cnt; + int ret_val = 0; + + skb = alloc_skb(HNS3_NIC_LB_TEST_PACKET_SIZE + ETH_HLEN + NET_IP_ALIGN, + GFP_KERNEL); + if (!skb) + return HNS3_NIC_LB_TEST_NO_MEM_ERR; + + skb->dev = ndev; + hns3_lp_setup_skb(skb); + skb->queue_mapping = HNS3_NIC_LB_TEST_RING_ID; + + good_cnt = 0; + for (i = 0; i < HNS3_NIC_LB_TEST_PKT_NUM; i++) { + netdev_tx_t tx_ret; + + skb_get(skb); + tx_ret = hns3_nic_net_xmit(skb, ndev); + if (tx_ret == NETDEV_TX_OK) + good_cnt++; + else + netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n", + tx_ret); + } + if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) { + ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR; + netdev_err(ndev, "mode %d sent fail, cnt=0x%x, budget=0x%x\n", + mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM); + goto out; + } + + /* Allow 200 milliseconds for packets to go from Tx to Rx */ + msleep(200); + + good_cnt = hns3_lb_check_rx_ring(priv, HNS3_NIC_LB_TEST_PKT_NUM); + if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) { + ret_val = HNS3_NIC_LB_TEST_RX_CNT_ERR; + netdev_err(ndev, "mode %d recv fail, cnt=0x%x, budget=0x%x\n", + mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM); + } + +out: + hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID, + HNS3_NIC_LB_TEST_RING_ID, + HNS3_NIC_LB_TEST_PKT_NUM); + + kfree_skb(skb); + return ret_val; +} + +/** + * hns3_nic_self_test - self test + * @ndev: net device + * @eth_test: test cmd + * @data: test result + */ +static void hns3_self_test(struct net_device *ndev, + struct ethtool_test *eth_test, u64 *data) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + int st_param[HNS3_SELF_TEST_TPYE_NUM][2]; + bool if_running = netif_running(ndev); + int test_index = 0; + u32 i; + + /* Only do offline selftest, or pass by default */ + if (eth_test->flags != ETH_TEST_FL_OFFLINE) + return; + + st_param[HNAE3_MAC_INTER_LOOP_MAC][0] = HNAE3_MAC_INTER_LOOP_MAC; + st_param[HNAE3_MAC_INTER_LOOP_MAC][1] = + h->flags & HNAE3_SUPPORT_MAC_LOOPBACK; + + if (if_running) + dev_close(ndev); + + set_bit(HNS3_NIC_STATE_TESTING, &priv->state); + + for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) { + enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0]; + + if (!st_param[i][1]) + continue; + + data[test_index] = hns3_lp_up(ndev, loop_type); + if (!data[test_index]) { + data[test_index] = hns3_lp_run_test(ndev, loop_type); + hns3_lp_down(ndev); + } + + if (data[test_index]) + eth_test->flags |= ETH_TEST_FL_FAILED; + + test_index++; + } + + clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); + + if (if_running) + dev_open(ndev); +} + static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd, bool is_advertised) { @@ -553,6 +825,7 @@ static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) } static const struct ethtool_ops hns3_ethtool_ops = { + .self_test = hns3_self_test, .get_drvinfo = hns3_get_drvinfo, .get_link = hns3_get_link, .get_ringparam = hns3_get_ringparam, From eaf6ab76430881c30695a9195ce37d6b11b04997 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:35 +0300 Subject: [PATCH 1257/2177] drivers, net, ethernet: convert clip_entry.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable clip_entry.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c | 13 ++++++------- drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h | 4 +++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 3103ef9b561d..290039026ece 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -96,7 +96,8 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); - goto found; + refcount_inc(&ce->refcnt); + return 0; } } read_unlock_bh(&ctbl->lock); @@ -108,7 +109,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) list_del(&ce->list); INIT_LIST_HEAD(&ce->list); spin_lock_init(&ce->lock); - atomic_set(&ce->refcnt, 0); + refcount_set(&ce->refcnt, 0); atomic_dec(&ctbl->nfree); list_add_tail(&ce->list, &ctbl->hash_list[hash]); if (v6) { @@ -138,9 +139,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) return -ENOMEM; } write_unlock_bh(&ctbl->lock); -found: - atomic_inc(&ce->refcnt); - + refcount_set(&ce->refcnt, 1); return 0; } EXPORT_SYMBOL(cxgb4_clip_get); @@ -179,7 +178,7 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6) found: write_lock_bh(&ctbl->lock); spin_lock_bh(&ce->lock); - if (atomic_dec_and_test(&ce->refcnt)) { + if (refcount_dec_and_test(&ce->refcnt)) { list_del(&ce->list); INIT_LIST_HEAD(&ce->list); list_add_tail(&ce->list, &ctbl->ce_free_head); @@ -266,7 +265,7 @@ int clip_tbl_show(struct seq_file *seq, void *v) ip[0] = '\0'; sprintf(ip, "%pISc", &ce->addr); seq_printf(seq, "%-25s %u\n", ip, - atomic_read(&ce->refcnt)); + refcount_read(&ce->refcnt)); } } seq_printf(seq, "Free clip entries : %d\n", atomic_read(&ctbl->nfree)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h index 35eb43c6bcbb..a0e0ae19649f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h @@ -10,9 +10,11 @@ * release for licensing terms and conditions. */ +#include + struct clip_entry { spinlock_t lock; /* Hold while modifying clip reference */ - atomic_t refcnt; + refcount_t refcnt; struct list_head list; union { struct sockaddr_in addr; From c6d4e63e065e796d2f2734c1e4e13f85f1c1c5e4 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:36 +0300 Subject: [PATCH 1258/2177] drivers, net, ethernet: convert mtk_eth.dma_refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mtk_eth.dma_refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 +++++--- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5e81a7263654..54adfd967858 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1817,7 +1817,7 @@ static int mtk_open(struct net_device *dev) struct mtk_eth *eth = mac->hw; /* we run 2 netdevs on the same dma ring so we only bring it up once */ - if (!atomic_read(ð->dma_refcnt)) { + if (!refcount_read(ð->dma_refcnt)) { int err = mtk_start_dma(eth); if (err) @@ -1827,8 +1827,10 @@ static int mtk_open(struct net_device *dev) napi_enable(ð->rx_napi); mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); mtk_rx_irq_enable(eth, MTK_RX_DONE_INT); + refcount_set(ð->dma_refcnt, 1); } - atomic_inc(ð->dma_refcnt); + else + refcount_inc(ð->dma_refcnt); phy_start(dev->phydev); netif_start_queue(dev); @@ -1868,7 +1870,7 @@ static int mtk_stop(struct net_device *dev) phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ - if (!atomic_dec_and_test(ð->dma_refcnt)) + if (!refcount_dec_and_test(ð->dma_refcnt)) return 0; mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 3d3c24a28112..a3af4660de81 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -15,6 +15,8 @@ #ifndef MTK_ETH_H #define MTK_ETH_H +#include + #define MTK_QDMA_PAGE_SIZE 2048 #define MTK_MAX_RX_LENGTH 1536 #define MTK_TX_DMA_BUF_LEN 0x3fff @@ -632,7 +634,7 @@ struct mtk_eth { struct regmap *pctl; u32 chip_id; bool hwlro; - atomic_t dma_refcnt; + refcount_t dma_refcnt; struct mtk_tx_ring tx_ring; struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct mtk_rx_ring rx_ring_qdma; From ff61b5e3f041c2f1aa8d7c700af3007889973889 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:37 +0300 Subject: [PATCH 1259/2177] drivers, net, mlx4: convert mlx4_cq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_cq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 8 ++++---- include/linux/mlx4/device.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 72eb50cd5ecd..d8e9a323122e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -69,7 +69,7 @@ void mlx4_cq_tasklet_cb(unsigned long data) list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); mcq->tasklet_ctx.comp(mcq); - if (atomic_dec_and_test(&mcq->refcount)) + if (refcount_dec_and_test(&mcq->refcount)) complete(&mcq->free); if (time_after(jiffies, end)) break; @@ -92,7 +92,7 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) * still arrive. */ if (list_empty_careful(&cq->tasklet_ctx.list)) { - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); kick = list_empty(&tasklet_ctx->list); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); if (kick) @@ -344,7 +344,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->cons_index = 0; cq->arm_sn = 1; cq->uar = uar; - atomic_set(&cq->refcount, 1); + refcount_set(&cq->refcount, 1); init_completion(&cq->free); cq->comp = mlx4_add_cq_to_tasklet; cq->tasklet_ctx.priv = @@ -386,7 +386,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) priv->eq_table.eq[MLX4_EQ_ASYNC].irq) synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b0a57e043fa3..daac2e3a1a58 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -40,7 +40,7 @@ #include #include -#include +#include #include @@ -751,7 +751,7 @@ struct mlx4_cq { int cqn; unsigned vector; - atomic_t refcount; + refcount_t refcount; struct completion free; struct { struct list_head list; From 0068895ff845c38e9e2b65c002c53c623379e436 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:38 +0300 Subject: [PATCH 1260/2177] drivers, net, mlx4: convert mlx4_qp.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_qp.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/qp.c | 8 ++++---- include/linux/mlx4/device.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 203320923340..769598f7b6c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -55,7 +55,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) qp = __mlx4_qp_lookup(dev, qpn); if (qp) - atomic_inc(&qp->refcount); + refcount_inc(&qp->refcount); spin_unlock(&qp_table->lock); @@ -66,7 +66,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) qp->event(qp, event_type); - if (atomic_dec_and_test(&qp->refcount)) + if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); } @@ -420,7 +420,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) if (err) goto err_icm; - atomic_set(&qp->refcount, 1); + refcount_set(&qp->refcount, 1); init_completion(&qp->free); return 0; @@ -520,7 +520,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) { - if (atomic_dec_and_test(&qp->refcount)) + if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); wait_for_completion(&qp->free); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index daac2e3a1a58..b8e19c4d6caa 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -768,7 +768,7 @@ struct mlx4_qp { int qpn; - atomic_t refcount; + refcount_t refcount; struct completion free; u8 usage; }; From 17ac99b2b8d08ed40f4525491d6eff330329a6d2 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:39 +0300 Subject: [PATCH 1261/2177] drivers, net, mlx4: convert mlx4_srq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_srq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/srq.c | 8 ++++---- include/linux/mlx4/device.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index bedf52126824..cbe4d9746ddf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -49,7 +49,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); rcu_read_unlock(); if (srq) - atomic_inc(&srq->refcount); + refcount_inc(&srq->refcount); else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; @@ -57,7 +57,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) srq->event(srq, event_type); - if (atomic_dec_and_test(&srq->refcount)) + if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); } @@ -203,7 +203,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - atomic_set(&srq->refcount, 1); + refcount_set(&srq->refcount, 1); init_completion(&srq->free); return 0; @@ -232,7 +232,7 @@ void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) radix_tree_delete(&srq_table->tree, srq->srqn); spin_unlock_irq(&srq_table->lock); - if (atomic_dec_and_test(&srq->refcount)) + if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b8e19c4d6caa..a9b5fed8f7c6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -781,7 +781,7 @@ struct mlx4_srq { int max_gs; int wqe_shift; - atomic_t refcount; + refcount_t refcount; struct completion free; }; From a4b51a9f83c6d359ff8fc0c66009283b6fdeeaf8 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:40 +0300 Subject: [PATCH 1262/2177] drivers, net, mlx5: convert mlx5_cq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx5_cq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 16 ++++++++-------- include/linux/mlx5/cq.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 336d4738b807..1016e05c7ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); mcq->tasklet_ctx.comp(mcq); - if (atomic_dec_and_test(&mcq->refcount)) + if (refcount_dec_and_test(&mcq->refcount)) complete(&mcq->free); if (time_after(jiffies, end)) break; @@ -80,7 +80,7 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) * still arrive. */ if (list_empty_careful(&cq->tasklet_ctx.list)) { - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); @@ -94,7 +94,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) spin_lock(&table->lock); cq = radix_tree_lookup(&table->tree, cqn); if (likely(cq)) - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); spin_unlock(&table->lock); if (!cq) { @@ -106,7 +106,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) cq->comp(cq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); } @@ -119,7 +119,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) cq = radix_tree_lookup(&table->tree, cqn); if (cq) - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); spin_unlock(&table->lock); @@ -130,7 +130,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) cq->event(cq, event_type); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); } @@ -159,7 +159,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; - atomic_set(&cq->refcount, 1); + refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) cq->comp = mlx5_add_cq_to_tasklet; @@ -222,7 +222,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) synchronize_irq(cq->irqn); mlx5_debug_cq_remove(dev, cq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..6a57ec2f1ef7 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -35,7 +35,7 @@ #include #include - +#include struct mlx5_core_cq { u32 cqn; @@ -43,7 +43,7 @@ struct mlx5_core_cq { __be32 *set_ci_db; __be32 *arm_db; struct mlx5_uars_page *uar; - atomic_t refcount; + refcount_t refcount; struct completion free; unsigned vector; unsigned int irqn; From dd8e19456d60a519de1852ae4b1be7d62690d2e0 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:41 +0300 Subject: [PATCH 1263/2177] drivers, net, mlx5: convert fs_node.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable fs_node.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 3 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f77e496f7053..c7fa1389bace 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -188,7 +188,7 @@ static void tree_init_node(struct fs_node *node, void (*del_hw_func)(struct fs_node *), void (*del_sw_func)(struct fs_node *)) { - atomic_set(&node->refcount, 1); + refcount_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); init_rwsem(&node->lock); @@ -200,7 +200,7 @@ static void tree_init_node(struct fs_node *node, static void tree_add_node(struct fs_node *node, struct fs_node *parent) { if (parent) - atomic_inc(&parent->refcount); + refcount_inc(&parent->refcount); node->parent = parent; /* Parent is the root */ @@ -212,7 +212,7 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent) static int tree_get_node(struct fs_node *node) { - return atomic_add_unless(&node->refcount, 1, 0); + return refcount_inc_not_zero(&node->refcount); } static void nested_down_read_ref_node(struct fs_node *node, @@ -220,7 +220,7 @@ static void nested_down_read_ref_node(struct fs_node *node, { if (node) { down_read_nested(&node->lock, class); - atomic_inc(&node->refcount); + refcount_inc(&node->refcount); } } @@ -229,7 +229,7 @@ static void nested_down_write_ref_node(struct fs_node *node, { if (node) { down_write_nested(&node->lock, class); - atomic_inc(&node->refcount); + refcount_inc(&node->refcount); } } @@ -237,19 +237,19 @@ static void down_write_ref_node(struct fs_node *node) { if (node) { down_write(&node->lock); - atomic_inc(&node->refcount); + refcount_inc(&node->refcount); } } static void up_read_ref_node(struct fs_node *node) { - atomic_dec(&node->refcount); + refcount_dec(&node->refcount); up_read(&node->lock); } static void up_write_ref_node(struct fs_node *node) { - atomic_dec(&node->refcount); + refcount_dec(&node->refcount); up_write(&node->lock); } @@ -257,7 +257,7 @@ static void tree_put_node(struct fs_node *node) { struct fs_node *parent_node = node->parent; - if (atomic_dec_and_test(&node->refcount)) { + if (refcount_dec_and_test(&node->refcount)) { if (node->del_hw_func) node->del_hw_func(node); if (parent_node) { @@ -280,8 +280,8 @@ static void tree_put_node(struct fs_node *node) static int tree_remove_node(struct fs_node *node) { - if (atomic_read(&node->refcount) > 1) { - atomic_dec(&node->refcount); + if (refcount_read(&node->refcount) > 1) { + refcount_dec(&node->refcount); return -EEXIST; } tree_put_node(node); @@ -1184,7 +1184,7 @@ static void destroy_flow_handle(struct fs_fte *fte, int i) { for (; --i >= 0;) { - if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) { + if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) { fte->dests_size--; list_del(&handle->rule[i]->node.list); kfree(handle->rule[i]); @@ -1215,7 +1215,7 @@ create_flow_handle(struct fs_fte *fte, if (dest) { rule = find_flow_rule(fte, dest + i); if (rule) { - atomic_inc(&rule->node.refcount); + refcount_inc(&rule->node.refcount); goto rule_found; } } @@ -1466,7 +1466,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, trace_mlx5_fs_set_fte(fte, false); for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) { + if (refcount_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 80f6f3c714c8..397d24a621a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -33,6 +33,7 @@ #ifndef _MLX5_FS_CORE_ #define _MLX5_FS_CORE_ +#include #include #include @@ -84,7 +85,7 @@ struct fs_node { struct fs_node *root; /* lock the node for writing and traversing */ struct rw_semaphore lock; - atomic_t refcount; + refcount_t refcount; bool active; void (*del_hw_func)(struct fs_node *); void (*del_sw_func)(struct fs_node *); From 956cc1e70c55ba4d5f87b9605ee727bbd225d56d Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:42 +0300 Subject: [PATCH 1264/2177] drivers, net, hamradio: convert sixpack.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable sixpack.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index bbc7b7808a31..32f49c4ce457 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #define SIXPACK_VERSION "Revision: 0.3.0" @@ -120,7 +120,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; - atomic_t refcnt; + refcount_t refcnt; struct semaphore dead_sem; spinlock_t lock; }; @@ -381,7 +381,7 @@ static struct sixpack *sp_get(struct tty_struct *tty) read_lock(&disc_data_lock); sp = tty->disc_data; if (sp) - atomic_inc(&sp->refcnt); + refcount_inc(&sp->refcnt); read_unlock(&disc_data_lock); return sp; @@ -389,7 +389,7 @@ static struct sixpack *sp_get(struct tty_struct *tty) static void sp_put(struct sixpack *sp) { - if (atomic_dec_and_test(&sp->refcnt)) + if (refcount_dec_and_test(&sp->refcnt)) up(&sp->dead_sem); } @@ -576,7 +576,7 @@ static int sixpack_open(struct tty_struct *tty) sp->dev = dev; spin_lock_init(&sp->lock); - atomic_set(&sp->refcnt, 1); + refcount_set(&sp->refcnt, 1); sema_init(&sp->dead_sem, 0); /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ @@ -670,7 +670,7 @@ static void sixpack_close(struct tty_struct *tty) * We have now ensured that nobody can start using ap from now on, but * we have to wait for all existing users to finish. */ - if (!atomic_dec_and_test(&sp->refcnt)) + if (!refcount_dec_and_test(&sp->refcnt)) down(&sp->dead_sem); /* We must stop the queue to avoid potentially scribbling From e187246f0f7522e306d1d156a2d5edcaf82ba840 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:43 +0300 Subject: [PATCH 1265/2177] drivers, net: convert masces_rx_sa.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable masces_rx_sa.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/macsec.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ccbe4eaffe4d..733e1c240bf5 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -146,7 +147,7 @@ struct macsec_rx_sa { struct macsec_key key; spinlock_t lock; u32 next_pn; - atomic_t refcnt; + refcount_t refcnt; bool active; struct macsec_rx_sa_stats __percpu *stats; struct macsec_rx_sc *sc; @@ -314,7 +315,7 @@ static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) if (!sa || !sa->active) return NULL; - if (!atomic_inc_not_zero(&sa->refcnt)) + if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; @@ -350,7 +351,7 @@ static void free_rxsa(struct rcu_head *head) static void macsec_rxsa_put(struct macsec_rx_sa *sa) { - if (atomic_dec_and_test(&sa->refcnt)) + if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_rxsa); } @@ -1339,7 +1340,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, rx_sa->active = false; rx_sa->next_pn = 1; - atomic_set(&rx_sa->refcnt, 1); + refcount_set(&rx_sa->refcnt, 1); spin_lock_init(&rx_sa->lock); return 0; From 8676d76f087f9b2499f056aa0c5d857cffde6806 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:44 +0300 Subject: [PATCH 1266/2177] drivers, net: convert masces_rx_sc.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable masces_rx_sc.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/macsec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 733e1c240bf5..e0aeb51a5dab 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -172,7 +172,7 @@ struct macsec_rx_sc { bool active; struct macsec_rx_sa __rcu *sa[MACSEC_NUM_AN]; struct pcpu_rx_sc_stats __percpu *stats; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; }; @@ -331,12 +331,12 @@ static void free_rx_sc_rcu(struct rcu_head *head) static struct macsec_rx_sc *macsec_rxsc_get(struct macsec_rx_sc *sc) { - return atomic_inc_not_zero(&sc->refcnt) ? sc : NULL; + return refcount_inc_not_zero(&sc->refcnt) ? sc : NULL; } static void macsec_rxsc_put(struct macsec_rx_sc *sc) { - if (atomic_dec_and_test(&sc->refcnt)) + if (refcount_dec_and_test(&sc->refcnt)) call_rcu(&sc->rcu_head, free_rx_sc_rcu); } @@ -1411,7 +1411,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) rx_sc->sci = sci; rx_sc->active = true; - atomic_set(&rx_sc->refcnt, 1); + refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; rcu_assign_pointer(rx_sc->next, secy->rx_sc); From 28206cdb3bc079a97d24c33b4427395f9e406fad Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:45 +0300 Subject: [PATCH 1267/2177] drivers, net: convert masces_tx_sa.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable masces_tx_sa.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/macsec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e0aeb51a5dab..8948b6adc0c5 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -188,7 +188,7 @@ struct macsec_tx_sa { struct macsec_key key; spinlock_t lock; u32 next_pn; - atomic_t refcnt; + refcount_t refcnt; bool active; struct macsec_tx_sa_stats __percpu *stats; struct rcu_head rcu; @@ -362,7 +362,7 @@ static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) if (!sa || !sa->active) return NULL; - if (!atomic_inc_not_zero(&sa->refcnt)) + if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; @@ -379,7 +379,7 @@ static void free_txsa(struct rcu_head *head) static void macsec_txsa_put(struct macsec_tx_sa *sa) { - if (atomic_dec_and_test(&sa->refcnt)) + if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_txsa); } @@ -1437,7 +1437,7 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, } tx_sa->active = false; - atomic_set(&tx_sa->refcnt, 1); + refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); return 0; From 313a912155c78ed87ad6fca175dc56b75fd00a58 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:46 +0300 Subject: [PATCH 1268/2177] drivers, net, ppp: convert asyncppp.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable asyncppp.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_async.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 814fd8fae67d..1b28e6e702f5 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -69,7 +69,7 @@ struct asyncppp { struct tasklet_struct tsk; - atomic_t refcnt; + refcount_t refcnt; struct semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; @@ -140,14 +140,14 @@ static struct asyncppp *ap_get(struct tty_struct *tty) read_lock(&disc_data_lock); ap = tty->disc_data; if (ap != NULL) - atomic_inc(&ap->refcnt); + refcount_inc(&ap->refcnt); read_unlock(&disc_data_lock); return ap; } static void ap_put(struct asyncppp *ap) { - if (atomic_dec_and_test(&ap->refcnt)) + if (refcount_dec_and_test(&ap->refcnt)) up(&ap->dead_sem); } @@ -185,7 +185,7 @@ ppp_asynctty_open(struct tty_struct *tty) skb_queue_head_init(&ap->rqueue); tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); - atomic_set(&ap->refcnt, 1); + refcount_set(&ap->refcnt, 1); sema_init(&ap->dead_sem, 0); ap->chan.private = ap; @@ -234,7 +234,7 @@ ppp_asynctty_close(struct tty_struct *tty) * our channel ops (i.e. ppp_async_send/ioctl) are in progress * by the time it returns. */ - if (!atomic_dec_and_test(&ap->refcnt)) + if (!refcount_dec_and_test(&ap->refcnt)) down(&ap->dead_sem); tasklet_kill(&ap->tsk); From d780cd44e3cea119a3346e6d7c04d35b9c50d54b Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:47 +0300 Subject: [PATCH 1269/2177] drivers, net, ppp: convert ppp_file.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable ppp_file.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e365866600ba..6566107cef84 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -84,7 +85,7 @@ struct ppp_file { struct sk_buff_head xq; /* pppd transmit queue */ struct sk_buff_head rq; /* receive queue for pppd */ wait_queue_head_t rwait; /* for poll on reading /dev/ppp */ - atomic_t refcnt; /* # refs (incl /dev/ppp attached) */ + refcount_t refcnt; /* # refs (incl /dev/ppp attached) */ int hdrlen; /* space to leave for headers */ int index; /* interface unit / channel number */ int dead; /* unit/channel has been shut down */ @@ -408,7 +409,7 @@ static int ppp_release(struct inode *unused, struct file *file) unregister_netdevice(ppp->dev); rtnl_unlock(); } - if (atomic_dec_and_test(&pf->refcnt)) { + if (refcount_dec_and_test(&pf->refcnt)) { switch (pf->kind) { case INTERFACE: ppp_destroy_interface(PF_TO_PPP(pf)); @@ -881,7 +882,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, mutex_lock(&pn->all_ppp_mutex); ppp = ppp_find_unit(pn, unit); if (ppp) { - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); file->private_data = &ppp->file; err = 0; } @@ -896,7 +897,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, spin_lock_bh(&pn->all_channels_lock); chan = ppp_find_channel(pn, unit); if (chan) { - atomic_inc(&chan->file.refcnt); + refcount_inc(&chan->file.refcnt); file->private_data = &chan->file; err = 0; } @@ -1348,7 +1349,7 @@ static int ppp_dev_init(struct net_device *dev) * that ppp_destroy_interface() won't run before the device gets * unregistered. */ - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); return 0; } @@ -1377,7 +1378,7 @@ static void ppp_dev_priv_destructor(struct net_device *dev) struct ppp *ppp; ppp = netdev_priv(dev); - if (atomic_dec_and_test(&ppp->file.refcnt)) + if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); } @@ -2676,7 +2677,7 @@ ppp_unregister_channel(struct ppp_channel *chan) pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); - if (atomic_dec_and_test(&pch->file.refcnt)) + if (refcount_dec_and_test(&pch->file.refcnt)) ppp_destroy_channel(pch); } @@ -3046,7 +3047,7 @@ init_ppp_file(struct ppp_file *pf, int kind) pf->kind = kind; skb_queue_head_init(&pf->xq); skb_queue_head_init(&pf->rq); - atomic_set(&pf->refcnt, 1); + refcount_set(&pf->refcnt, 1); init_waitqueue_head(&pf->rwait); } @@ -3164,7 +3165,7 @@ ppp_connect_channel(struct channel *pch, int unit) list_add_tail(&pch->clist, &ppp->channels); ++ppp->n_channels; pch->ppp = ppp; - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); ppp_unlock(ppp); ret = 0; @@ -3195,7 +3196,7 @@ ppp_disconnect_channel(struct channel *pch) if (--ppp->n_channels == 0) wake_up_interruptible(&ppp->file.rwait); ppp_unlock(ppp); - if (atomic_dec_and_test(&ppp->file.refcnt)) + if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); err = 0; } From 709c89b45b874b2f81a074b8802a736009873f48 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:48 +0300 Subject: [PATCH 1270/2177] drivers, net, ppp: convert syncppp.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable syncppp.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_synctty.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 7868c29071d4..7196f00f0991 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -72,7 +73,7 @@ struct syncppp { struct tasklet_struct tsk; - atomic_t refcnt; + refcount_t refcnt; struct completion dead_cmp; struct ppp_channel chan; /* interface to generic ppp layer */ }; @@ -141,14 +142,14 @@ static struct syncppp *sp_get(struct tty_struct *tty) read_lock(&disc_data_lock); ap = tty->disc_data; if (ap != NULL) - atomic_inc(&ap->refcnt); + refcount_inc(&ap->refcnt); read_unlock(&disc_data_lock); return ap; } static void sp_put(struct syncppp *ap) { - if (atomic_dec_and_test(&ap->refcnt)) + if (refcount_dec_and_test(&ap->refcnt)) complete(&ap->dead_cmp); } @@ -182,7 +183,7 @@ ppp_sync_open(struct tty_struct *tty) skb_queue_head_init(&ap->rqueue); tasklet_init(&ap->tsk, ppp_sync_process, (unsigned long) ap); - atomic_set(&ap->refcnt, 1); + refcount_set(&ap->refcnt, 1); init_completion(&ap->dead_cmp); ap->chan.private = ap; @@ -232,7 +233,7 @@ ppp_sync_close(struct tty_struct *tty) * our channel ops (i.e. ppp_sync_send/ioctl) are in progress * by the time it returns. */ - if (!atomic_dec_and_test(&ap->refcnt)) + if (!refcount_dec_and_test(&ap->refcnt)) wait_for_completion(&ap->dead_cmp); tasklet_kill(&ap->tsk); From e65f7ee39b4d7604a78b03ed35d723e1001fc241 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:49 +0300 Subject: [PATCH 1271/2177] drivers, connector: convert cn_callback_entry.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable cn_callback_entry.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- drivers/connector/cn_queue.c | 4 ++-- drivers/connector/connector.c | 2 +- include/linux/connector.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index 1f8bf054d11c..9c54fdf7acea 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -45,7 +45,7 @@ cn_queue_alloc_callback_entry(struct cn_queue_dev *dev, const char *name, return NULL; } - atomic_set(&cbq->refcnt, 1); + refcount_set(&cbq->refcnt, 1); atomic_inc(&dev->refcnt); cbq->pdev = dev; @@ -58,7 +58,7 @@ cn_queue_alloc_callback_entry(struct cn_queue_dev *dev, const char *name, void cn_queue_release_callback(struct cn_callback_entry *cbq) { - if (!atomic_dec_and_test(&cbq->refcnt)) + if (!refcount_dec_and_test(&cbq->refcnt)) return; atomic_dec(&cbq->pdev->refcnt); diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 25693b045371..8615594bd065 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -157,7 +157,7 @@ static int cn_call_callback(struct sk_buff *skb) spin_lock_bh(&dev->cbdev->queue_lock); list_for_each_entry(i, &dev->cbdev->queue_list, callback_entry) { if (cn_cb_equal(&i->id.id, &msg->id)) { - atomic_inc(&i->refcnt); + refcount_inc(&i->refcnt); cbq = i; break; } diff --git a/include/linux/connector.h b/include/linux/connector.h index f8fe8637d771..032102b19645 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -22,7 +22,7 @@ #define __CONNECTOR_H -#include +#include #include #include @@ -49,7 +49,7 @@ struct cn_callback_id { struct cn_callback_entry { struct list_head callback_entry; - atomic_t refcnt; + refcount_t refcnt; struct cn_queue_dev *pdev; struct cn_callback_id id; From 0d5fcebf3c370eb27f9a0e8db454625e73cd1cb4 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 20 Oct 2017 11:21:32 +0200 Subject: [PATCH 1272/2177] tipc: refactor tipc_sk_timeout() function The function tipc_sk_timeout() is more complex than necessary, and even seems to contain an undetected bug. At one of the occurences where we renew the timer we just order it with (HZ / 20), instead of (jiffies + HZ / 20); In this commit we clean up the function. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 51 ++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 357954ceb25c..b3b72d8e9543 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -48,7 +48,7 @@ #include "group.h" #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 @@ -1472,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); @@ -2533,43 +2533,40 @@ static int tipc_shutdown(struct socket *sock, int how) static void tipc_sk_timeout(unsigned long data) { struct tipc_sock *tsk = (struct tipc_sock *)data; - struct sock *sk = &tsk->sk; - struct sk_buff *skb = NULL; - u32 peer_port, peer_node; + u32 peer_port = tsk_peer_port(tsk); + u32 peer_node = tsk_peer_node(tsk); u32 own_node = tsk_own_node(tsk); + u32 own_port = tsk->portid; + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct sk_buff *skb = NULL; bh_lock_sock(sk); - if (!tipc_sk_connected(sk)) { - bh_unlock_sock(sk); + if (!tipc_sk_connected(sk)) + goto exit; + + /* Try again later if socket is busy */ + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); goto exit; } - peer_port = tsk_peer_port(tsk); - peer_node = tsk_peer_node(tsk); if (tsk->probe_unacked) { - if (!sock_owned_by_user(sk)) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), - tsk_peer_port(tsk)); - sk->sk_state_change(sk); - } else { - /* Try again later */ - sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); - } - - bh_unlock_sock(sk); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, peer_node, peer_port); + sk->sk_state_change(sk); goto exit; } - - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); + /* Send new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + peer_node, own_node, peer_port, own_port, + TIPC_OK); tsk->probe_unacked = true; - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +exit: bh_unlock_sock(sk); if (skb) - tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); -exit: + tipc_node_xmit_skb(net, skb, peer_node, own_port); sock_put(sk); } From ab335349b85229f58c6d65f058a8c98d2612b920 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Fri, 20 Oct 2017 12:19:09 +0200 Subject: [PATCH 1273/2177] net: dsa: lan9303: Add port_fast_age and port_fdb_dump methods Add DSA method port_fast_age as a step to STP support. Add low level functions for accessing the lan9303 ALR (Address Logic Resolution). Added DSA method port_fdb_dump Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 160 +++++++++++++++++++++++++++++++++ drivers/net/dsa/lan9303.h | 2 + 2 files changed, 162 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 09a748327fc6..48cae87bdcb7 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -124,6 +124,21 @@ #define LAN9303_MAC_RX_CFG_2 0x0c01 #define LAN9303_MAC_TX_CFG_2 0x0c40 #define LAN9303_SWE_ALR_CMD 0x1800 +# define LAN9303_ALR_CMD_MAKE_ENTRY BIT(2) +# define LAN9303_ALR_CMD_GET_FIRST BIT(1) +# define LAN9303_ALR_CMD_GET_NEXT BIT(0) +#define LAN9303_SWE_ALR_WR_DAT_0 0x1801 +#define LAN9303_SWE_ALR_WR_DAT_1 0x1802 +# define LAN9303_ALR_DAT1_VALID BIT(26) +# define LAN9303_ALR_DAT1_END_OF_TABL BIT(25) +# define LAN9303_ALR_DAT1_AGE_OVERRID BIT(25) +# define LAN9303_ALR_DAT1_STATIC BIT(24) +# define LAN9303_ALR_DAT1_PORT_BITOFFS 16 +# define LAN9303_ALR_DAT1_PORT_MASK (7 << LAN9303_ALR_DAT1_PORT_BITOFFS) +#define LAN9303_SWE_ALR_RD_DAT_0 0x1805 +#define LAN9303_SWE_ALR_RD_DAT_1 0x1806 +#define LAN9303_SWE_ALR_CMD_STS 0x1808 +# define ALR_STS_MAKE_PEND BIT(0) #define LAN9303_SWE_VLAN_CMD 0x180b # define LAN9303_SWE_VLAN_CMD_RNW BIT(5) # define LAN9303_SWE_VLAN_CMD_PVIDNVLAN BIT(4) @@ -478,6 +493,122 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip) return 0; } +/* Map ALR-port bits to port bitmap, and back */ +static const int alrport_2_portmap[] = {1, 2, 4, 0, 3, 5, 6, 7 }; +static const int portmap_2_alrport[] = {3, 0, 1, 4, 2, 5, 6, 7 }; + +/* Wait a while until mask & reg == value. Otherwise return timeout. */ +static int lan9303_csr_reg_wait(struct lan9303 *chip, int regno, + int mask, char value) +{ + int i; + + for (i = 0; i < 0x1000; i++) { + u32 reg; + + lan9303_read_switch_reg(chip, regno, ®); + if ((reg & mask) == value) + return 0; + usleep_range(1000, 2000); + } + return -ETIMEDOUT; +} + +static int lan9303_alr_make_entry_raw(struct lan9303 *chip, u32 dat0, u32 dat1) +{ + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_WR_DAT_0, dat0); + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_WR_DAT_1, dat1); + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, + LAN9303_ALR_CMD_MAKE_ENTRY); + lan9303_csr_reg_wait(chip, LAN9303_SWE_ALR_CMD_STS, ALR_STS_MAKE_PEND, + 0); + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0); + + return 0; +} + +typedef void alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1, + int portmap, void *ctx); + +static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) +{ + int i; + + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, + LAN9303_ALR_CMD_GET_FIRST); + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0); + + for (i = 1; i < LAN9303_NUM_ALR_RECORDS; i++) { + u32 dat0, dat1; + int alrport, portmap; + + lan9303_read_switch_reg(chip, LAN9303_SWE_ALR_RD_DAT_0, &dat0); + lan9303_read_switch_reg(chip, LAN9303_SWE_ALR_RD_DAT_1, &dat1); + if (dat1 & LAN9303_ALR_DAT1_END_OF_TABL) + break; + + alrport = (dat1 & LAN9303_ALR_DAT1_PORT_MASK) >> + LAN9303_ALR_DAT1_PORT_BITOFFS; + portmap = alrport_2_portmap[alrport]; + + cb(chip, dat0, dat1, portmap, ctx); + + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, + LAN9303_ALR_CMD_GET_NEXT); + lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0); + } +} + +static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6]) +{ + mac[0] = (dat0 >> 0) & 0xff; + mac[1] = (dat0 >> 8) & 0xff; + mac[2] = (dat0 >> 16) & 0xff; + mac[3] = (dat0 >> 24) & 0xff; + mac[4] = (dat1 >> 0) & 0xff; + mac[5] = (dat1 >> 8) & 0xff; +} + +struct del_port_learned_ctx { + int port; +}; + +/* Clear learned (non-static) entry on given port */ +static void alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) +{ + struct del_port_learned_ctx *del_ctx = ctx; + int port = del_ctx->port; + + if (((BIT(port) & portmap) == 0) || (dat1 & LAN9303_ALR_DAT1_STATIC)) + return; + + /* learned entries has only one port, we can just delete */ + dat1 &= ~LAN9303_ALR_DAT1_VALID; /* delete entry */ + lan9303_alr_make_entry_raw(chip, dat0, dat1); +} + +struct port_fdb_dump_ctx { + int port; + void *data; + dsa_fdb_dump_cb_t *cb; +}; + +static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) +{ + struct port_fdb_dump_ctx *dump_ctx = ctx; + u8 mac[ETH_ALEN]; + bool is_static; + + if ((BIT(dump_ctx->port) & portmap) == 0) + return; + + alr_reg_to_mac(dat0, dat1, mac); + is_static = !!(dat1 & LAN9303_ALR_DAT1_STATIC); + dump_ctx->cb(mac, 0, is_static, dump_ctx->data); +} + static int lan9303_disable_processing_port(struct lan9303 *chip, unsigned int port) { @@ -923,6 +1054,33 @@ static void lan9303_port_stp_state_set(struct dsa_switch *ds, int port, /* else: touching SWE_PORT_STATE would break port separation */ } +static void lan9303_port_fast_age(struct dsa_switch *ds, int port) +{ + struct lan9303 *chip = ds->priv; + struct del_port_learned_ctx del_ctx = { + .port = port, + }; + + dev_dbg(chip->dev, "%s(%d)\n", __func__, port); + lan9303_alr_loop(chip, alr_loop_cb_del_port_learned, &del_ctx); +} + +static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct lan9303 *chip = ds->priv; + struct port_fdb_dump_ctx dump_ctx = { + .port = port, + .data = data, + .cb = cb, + }; + + dev_dbg(chip->dev, "%s(%d)\n", __func__, port); + lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx); + + return 0; +} + static const struct dsa_switch_ops lan9303_switch_ops = { .get_tag_protocol = lan9303_get_tag_protocol, .setup = lan9303_setup, @@ -937,6 +1095,8 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .port_bridge_join = lan9303_port_bridge_join, .port_bridge_leave = lan9303_port_bridge_leave, .port_stp_state_set = lan9303_port_stp_state_set, + .port_fast_age = lan9303_port_fast_age, + .port_fdb_dump = lan9303_port_fdb_dump, }; static int lan9303_register_switch(struct lan9303 *chip) diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index 68ecd544b658..4db323d65741 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -11,6 +11,8 @@ struct lan9303_phy_ops { int regnum, u16 val); }; +#define LAN9303_NUM_ALR_RECORDS 512 + struct lan9303 { struct device *dev; struct regmap *regmap; From 0620427ea0d6497615fb9313a9e51e8322fcc029 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Fri, 20 Oct 2017 12:19:10 +0200 Subject: [PATCH 1274/2177] net: dsa: lan9303: Add fdb/mdb manipulation Add functions for managing the lan9303 ALR (Address Logic Resolution). Implement DSA methods: port_fdb_add, port_fdb_del, port_mdb_prepare, port_mdb_add and port_mdb_del. Since the lan9303 do not offer reading specific ALR entry, the driver caches all static entries - in a flat table. Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 173 +++++++++++++++++++++++++++++++++ drivers/net/dsa/lan9303.h | 9 ++ 2 files changed, 182 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 48cae87bdcb7..87f919f0e641 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "lan9303.h" @@ -497,6 +498,37 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip) static const int alrport_2_portmap[] = {1, 2, 4, 0, 3, 5, 6, 7 }; static const int portmap_2_alrport[] = {3, 0, 1, 4, 2, 5, 6, 7 }; +/* Return pointer to first free ALR cache entry, return NULL if none */ +static struct lan9303_alr_cache_entry * +lan9303_alr_cache_find_free(struct lan9303 *chip) +{ + int i; + struct lan9303_alr_cache_entry *entr = chip->alr_cache; + + for (i = 0; i < LAN9303_NUM_ALR_RECORDS; i++, entr++) + if (entr->port_map == 0) + return entr; + + return NULL; +} + +/* Return pointer to ALR cache entry matching MAC address */ +static struct lan9303_alr_cache_entry * +lan9303_alr_cache_find_mac(struct lan9303 *chip, const u8 *mac_addr) +{ + int i; + struct lan9303_alr_cache_entry *entr = chip->alr_cache; + + BUILD_BUG_ON_MSG(sizeof(struct lan9303_alr_cache_entry) & 1, + "ether_addr_equal require u16 alignment"); + + for (i = 0; i < LAN9303_NUM_ALR_RECORDS; i++, entr++) + if (ether_addr_equal(entr->mac_addr, mac_addr)) + return entr; + + return NULL; +} + /* Wait a while until mask & reg == value. Otherwise return timeout. */ static int lan9303_csr_reg_wait(struct lan9303 *chip, int regno, int mask, char value) @@ -609,6 +641,73 @@ static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, dump_ctx->cb(mac, 0, is_static, dump_ctx->data); } +/* Set a static ALR entry. Delete entry if port_map is zero */ +static void lan9303_alr_set_entry(struct lan9303 *chip, const u8 *mac, + u8 port_map, bool stp_override) +{ + u32 dat0, dat1, alr_port; + + dev_dbg(chip->dev, "%s(%pM, %d)\n", __func__, mac, port_map); + dat1 = LAN9303_ALR_DAT1_STATIC; + if (port_map) + dat1 |= LAN9303_ALR_DAT1_VALID; + /* otherwise no ports: delete entry */ + if (stp_override) + dat1 |= LAN9303_ALR_DAT1_AGE_OVERRID; + + alr_port = portmap_2_alrport[port_map & 7]; + dat1 &= ~LAN9303_ALR_DAT1_PORT_MASK; + dat1 |= alr_port << LAN9303_ALR_DAT1_PORT_BITOFFS; + + dat0 = 0; + dat0 |= (mac[0] << 0); + dat0 |= (mac[1] << 8); + dat0 |= (mac[2] << 16); + dat0 |= (mac[3] << 24); + + dat1 |= (mac[4] << 0); + dat1 |= (mac[5] << 8); + + lan9303_alr_make_entry_raw(chip, dat0, dat1); +} + +/* Add port to static ALR entry, create new static entry if needed */ +static int lan9303_alr_add_port(struct lan9303 *chip, const u8 *mac, int port, + bool stp_override) +{ + struct lan9303_alr_cache_entry *entr; + + entr = lan9303_alr_cache_find_mac(chip, mac); + if (!entr) { /*New entry */ + entr = lan9303_alr_cache_find_free(chip); + if (!entr) + return -ENOSPC; + ether_addr_copy(entr->mac_addr, mac); + } + entr->port_map |= BIT(port); + entr->stp_override = stp_override; + lan9303_alr_set_entry(chip, mac, entr->port_map, stp_override); + + return 0; +} + +/* Delete static port from ALR entry, delete entry if last port */ +static int lan9303_alr_del_port(struct lan9303 *chip, const u8 *mac, int port) +{ + struct lan9303_alr_cache_entry *entr; + + entr = lan9303_alr_cache_find_mac(chip, mac); + if (!entr) + return 0; /* no static entry found */ + + entr->port_map &= ~BIT(port); + if (entr->port_map == 0) /* zero means its free again */ + eth_zero_addr(&entr->port_map); + lan9303_alr_set_entry(chip, mac, entr->port_map, entr->stp_override); + + return 0; +} + static int lan9303_disable_processing_port(struct lan9303 *chip, unsigned int port) { @@ -1065,6 +1164,32 @@ static void lan9303_port_fast_age(struct dsa_switch *ds, int port) lan9303_alr_loop(chip, alr_loop_cb_del_port_learned, &del_ctx); } +static int lan9303_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid); + if (vid) + return -EOPNOTSUPP; + + return lan9303_alr_add_port(chip, addr, port, false); +} + +static int lan9303_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) + +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid); + if (vid) + return -EOPNOTSUPP; + lan9303_alr_del_port(chip, addr, port); + + return 0; +} + static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { @@ -1081,6 +1206,49 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, return 0; } +static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr, + mdb->vid); + if (mdb->vid) + return -EOPNOTSUPP; + if (lan9303_alr_cache_find_mac(chip, mdb->addr)) + return 0; + if (!lan9303_alr_cache_find_free(chip)) + return -ENOSPC; + + return 0; +} + +static void lan9303_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr, + mdb->vid); + lan9303_alr_add_port(chip, mdb->addr, port, false); +} + +static int lan9303_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct lan9303 *chip = ds->priv; + + dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr, + mdb->vid); + if (mdb->vid) + return -EOPNOTSUPP; + lan9303_alr_del_port(chip, mdb->addr, port); + + return 0; +} + static const struct dsa_switch_ops lan9303_switch_ops = { .get_tag_protocol = lan9303_get_tag_protocol, .setup = lan9303_setup, @@ -1096,7 +1264,12 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .port_bridge_leave = lan9303_port_bridge_leave, .port_stp_state_set = lan9303_port_stp_state_set, .port_fast_age = lan9303_port_fast_age, + .port_fdb_add = lan9303_port_fdb_add, + .port_fdb_del = lan9303_port_fdb_del, .port_fdb_dump = lan9303_port_fdb_dump, + .port_mdb_prepare = lan9303_port_mdb_prepare, + .port_mdb_add = lan9303_port_mdb_add, + .port_mdb_del = lan9303_port_mdb_del, }; static int lan9303_register_switch(struct lan9303 *chip) diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index 4db323d65741..d807b1be35f2 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -12,6 +12,11 @@ struct lan9303_phy_ops { }; #define LAN9303_NUM_ALR_RECORDS 512 +struct lan9303_alr_cache_entry { + u8 mac_addr[ETH_ALEN]; + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ +}; struct lan9303 { struct device *dev; @@ -25,6 +30,10 @@ struct lan9303 { const struct lan9303_phy_ops *ops; bool is_bridged; /* true if port 1 and 2 are bridged */ u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + /* LAN9303 do not offer reading specific ALR entry. Cache all + * static entries in a flat table + **/ + struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; }; extern const struct regmap_access_table lan9303_register_set; From 3fa5f11de1ab98a6c54693b78880167a2693cee1 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 20 Oct 2017 13:31:36 +0200 Subject: [PATCH 1275/2177] geneve: Get rid of is_all_zero(), streamline is_tnl_info_zero() No need to re-invent memchr_inv() with !is_all_zero(). While at it, replace conditional and return clauses with a single return clause in is_tnl_info_zero(). Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/geneve.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f6404074b7b0..01f7355ad277 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1140,24 +1140,11 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, return t; } -static bool is_all_zero(const u8 *fp, size_t size) -{ - int i; - - for (i = 0; i < size; i++) - if (fp[i]) - return false; - return true; -} - static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { - if (info->key.tun_id || info->key.tun_flags || info->key.tos || - info->key.ttl || info->key.label || info->key.tp_src || - !is_all_zero((const u8 *)&info->key.u, sizeof(info->key.u))) - return false; - else - return true; + return !(info->key.tun_id || info->key.tun_flags || info->key.tos || + info->key.ttl || info->key.label || info->key.tp_src || + memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, From 110af3acb8cfd79dcb5676a01e07cb2b6afa4c04 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Oct 2017 12:05:30 -0500 Subject: [PATCH 1276/2177] net: af_unix: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7f46bab4ce5c..a9ee634f3c42 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, */ case SOCK_RAW: sock->type = SOCK_DGRAM; + /* fall through */ case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; From 0cea8e28df721f929ee3e1fa32489c012100f5a1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Oct 2017 12:37:52 -0500 Subject: [PATCH 1277/2177] net: x25: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/x25/x25_facilities.c | 2 +- net/x25/x25_in.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 997ff7b2509b..ad1734d36ed7 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_REVERSE; break; } - + /*fall through */ case X25_FAC_THROUGHPUT: facilities->throughput = p[1]; *vc_fac_mask |= X25_MASK_THROUGHPUT; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 7ac50098a375..3c12cae32001 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp case X25_RESET_REQUEST: x25_write_internal(sk, X25_RESET_CONFIRMATION); + /* fall through */ case X25_RESET_CONFIRMATION: { x25_stop_timer(sk); x25->condition = 0x00; From 62d3f60b4d065c09a3ccb9e862e71ae870c2d27b Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 20 Oct 2017 19:49:52 +0200 Subject: [PATCH 1278/2177] nfp: use struct fields for 8 bit-wide access Use direct access struct fields rather than PREP_FIELD() macros to manipulate the jump ID and length, both of which are exactly 8-bits wide. This simplifies the code somewhat. Signed-off-by: Simon Horman Signed-off-by: Pieter Jansen van Vuuren Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 87 ++++++------------- .../net/ethernet/netronome/nfp/flower/cmsg.h | 26 +++--- 2 files changed, 39 insertions(+), 74 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1194c47ef827..0a5fc9f8545f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -47,13 +47,9 @@ static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { size_t act_size = sizeof(struct nfp_fl_pop_vlan); - u16 tmp_pop_vlan_op; - tmp_pop_vlan_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN); - - pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op); + pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN; + pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; pop_vlan->reserved = 0; } @@ -64,14 +60,9 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, size_t act_size = sizeof(struct nfp_fl_push_vlan); struct tcf_vlan *vlan = to_vlan(action); u16 tmp_push_vlan_tci; - u16 tmp_push_vlan_op; - tmp_push_vlan_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN); - - push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op); - /* Set action push vlan parameters. */ + push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; + push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; push_vlan->reserved = 0; push_vlan->vlan_tpid = tcf_vlan_push_proto(action); @@ -101,16 +92,12 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, int *tun_out_cnt) { size_t act_size = sizeof(struct nfp_fl_output); - u16 tmp_output_op, tmp_flags; struct net_device *out_dev; + u16 tmp_flags; int ifindex; - /* Set action opcode to output action. */ - tmp_output_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT); - - output->a_op = cpu_to_be16(tmp_output_op); + output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT; + output->head.len_lw = act_size >> NFP_FL_LW_SIZ; ifindex = tcf_mirred_ifindex(action); out_dev = __dev_get_by_index(dev_net(in_dev), ifindex); @@ -161,7 +148,6 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) { size_t act_size = sizeof(struct nfp_fl_pre_tunnel); struct nfp_fl_pre_tunnel *pre_tun_act; - u16 tmp_pre_tun_op; /* Pre_tunnel action must be first on action list. * If other actions already exist they need pushed forward. @@ -173,11 +159,8 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) memset(pre_tun_act, 0, act_size); - tmp_pre_tun_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PRE_TUNNEL); - - pre_tun_act->a_op = cpu_to_be16(tmp_pre_tun_op); + pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL; + pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; return pre_tun_act; } @@ -190,7 +173,6 @@ nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, struct ip_tunnel_info *vxlan = tcf_tunnel_info(action); size_t act_size = sizeof(struct nfp_fl_set_vxlan); u32 tmp_set_vxlan_type_index = 0; - u16 tmp_set_vxlan_op; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; @@ -199,12 +181,8 @@ nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, return -EOPNOTSUPP; } - tmp_set_vxlan_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, - NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL); - - set_vxlan->a_op = cpu_to_be16(tmp_set_vxlan_op); + set_vxlan->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_vxlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; /* Set tunnel type and pre-tunnel index. */ tmp_set_vxlan_type_index |= @@ -240,7 +218,6 @@ static int nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, struct nfp_fl_set_eth *set_eth) { - u16 tmp_set_eth_op; u32 exact, mask; if (off + 4 > ETH_ALEN * 2) @@ -256,11 +233,8 @@ nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, &set_eth->eth_addr_mask[off]); set_eth->reserved = cpu_to_be16(0); - tmp_set_eth_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, - sizeof(*set_eth) >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, - NFP_FL_ACTION_OPCODE_SET_ETHERNET); - set_eth->a_op = cpu_to_be16(tmp_set_eth_op); + set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET; + set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ; return 0; } @@ -269,7 +243,6 @@ static int nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, struct nfp_fl_set_ip4_addrs *set_ip_addr) { - u16 tmp_set_ipv4_op; __be32 exact, mask; /* We are expecting tcf_pedit to return a big endian value */ @@ -293,11 +266,8 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, } set_ip_addr->reserved = cpu_to_be16(0); - tmp_set_ipv4_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, - sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, - NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS); - set_ip_addr->a_op = cpu_to_be16(tmp_set_ipv4_op); + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ; return 0; } @@ -306,16 +276,12 @@ static void nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask, struct nfp_fl_set_ipv6_addr *ip6) { - u16 tmp_set_op; - ip6->ipv6[idx % 4].mask = mask; ip6->ipv6[idx % 4].exact = exact; ip6->reserved = cpu_to_be16(0); - tmp_set_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, sizeof(*ip6) >> - NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, opcode_tag); - ip6->a_op = cpu_to_be16(tmp_set_op); + ip6->head.jump_id = opcode_tag; + ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ; } static int @@ -352,7 +318,6 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, struct nfp_fl_set_tport *set_tport, int opcode) { u32 exact, mask; - u16 tmp_set_op; if (off) return -EOPNOTSUPP; @@ -367,10 +332,8 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, set_tport->tp_port_mask); set_tport->reserved = cpu_to_be16(0); - tmp_set_op = FIELD_PREP(NFP_FL_ACT_LEN_LW, - sizeof(*set_tport) >> NFP_FL_LW_SIZ); - tmp_set_op |= FIELD_PREP(NFP_FL_ACT_JMP_ID, opcode); - set_tport->a_op = cpu_to_be16(tmp_set_op); + set_tport->head.jump_id = opcode; + set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ; return 0; } @@ -428,15 +391,15 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) return err; } - if (set_eth.a_op) { + if (set_eth.head.len_lw) { act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); *a_len += act_size; - } else if (set_ip_addr.a_op) { + } else if (set_ip_addr.head.len_lw) { act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; - } else if (set_ip6_dst.a_op && set_ip6_src.a_op) { + } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { /* TC compiles set src and dst IPv6 address as a single action, * the hardware requires this to be 2 separate actions. */ @@ -448,15 +411,15 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst, act_size); *a_len += act_size; - } else if (set_ip6_dst.a_op) { + } else if (set_ip6_dst.head.len_lw) { act_size = sizeof(set_ip6_dst); memcpy(nfp_action, &set_ip6_dst, act_size); *a_len += act_size; - } else if (set_ip6_src.a_op) { + } else if (set_ip6_src.head.len_lw) { act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; - } else if (set_tport.a_op) { + } else if (set_tport.head.len_lw) { act_size = sizeof(set_tport); memcpy(nfp_action, &set_tport, act_size); *a_len += act_size; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index f7b7242a22bc..64e87f8e7089 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -86,9 +86,6 @@ #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 -#define NFP_FL_ACT_JMP_ID GENMASK(15, 8) -#define NFP_FL_ACT_LEN_LW GENMASK(7, 0) - #define NFP_FL_OUT_FLAGS_LAST BIT(15) #define NFP_FL_OUT_FLAGS_USE_TUN BIT(4) #define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) @@ -113,15 +110,20 @@ enum nfp_flower_tun_type { NFP_FL_TUNNEL_VXLAN = 2, }; +struct nfp_fl_act_head { + u8 jump_id; + u8 len_lw; +}; + struct nfp_fl_set_eth { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; u8 eth_addr_mask[ETH_ALEN * 2]; u8 eth_addr_val[ETH_ALEN * 2]; }; struct nfp_fl_set_ip4_addrs { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; __be32 ipv4_src_mask; __be32 ipv4_src; @@ -130,7 +132,7 @@ struct nfp_fl_set_ip4_addrs { }; struct nfp_fl_set_ipv6_addr { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; struct { __be32 mask; @@ -139,27 +141,27 @@ struct nfp_fl_set_ipv6_addr { }; struct nfp_fl_set_tport { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; u8 tp_port_mask[4]; u8 tp_port_val[4]; }; struct nfp_fl_output { - __be16 a_op; + struct nfp_fl_act_head head; __be16 flags; __be32 port; }; struct nfp_fl_push_vlan { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; __be16 vlan_tpid; __be16 vlan_tci; }; struct nfp_fl_pop_vlan { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; }; @@ -178,7 +180,7 @@ struct nfp_flower_meta_one { }; struct nfp_fl_pre_tunnel { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; __be32 ipv4_dst; /* reserved for use with IPv6 addresses */ @@ -186,7 +188,7 @@ struct nfp_fl_pre_tunnel { }; struct nfp_fl_set_vxlan { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; __be64 tun_id; __be32 tun_type_index; From e6546ef6d86d0fc38e0e84ccae80e641f3fc0087 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:39 -0700 Subject: [PATCH 1279/2177] bpf: add support for BPF_SOCK_OPS_BASE_RTT A congestion control algorithm can make a call to the BPF socket_ops program to request the base RTT. The base RTT can be congestion control dependent and is meant to represent a congestion threshold such that RTTs above it indicate congestion. This is especially useful for flows within a DC where the base RTT is easy to obtain. Being provided a base RTT solves a basic problem in RTT based congestion avoidance algorithms (such as Vegas, NV and BBR). Although it is easy to get the base RTT when the network is not congested, it is very diffcult to do when it is very congested. Newer connections get an inflated value of the base RTT leading to unfariness (newer flows with a larger base RTT get more bandwidth). As a result, RTT based congestion avoidance algorithms tend to update their base RTTs to improve fairness. In very congested networks this can lead to base RTT inflation, reducing the ability of these RTT based congestion control algorithms to prevent congestion. Note that in my experiments with TCP-NV, the base RTT provided can be much larger than the actual hardware RTT. For example, experimenting with hosts within a rack where the hardware RTT is 16-20us, I've used base RTTs up to 150us. The effect of using a larger base RTT is that the congestion avoidance algorithm will allow more queueing. When there are only a few flows the main effect is larger measured RTTs and RPC latencies due to the increased queueing. When there are a lot of flows, a larger base RTT can lead to more congestion and more packet drops. For this case, where the hardware RTT is 20us, a base RTT of 80us produces good results. This patch only introduces BPF_SOCK_OPS_BASE_RTT, a later patch in this set adds support for using it in TCP-NV. Further study and testing is needed before support can be added to other delay based congestion avoidance algorithms. Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d83f95ea6a1b..1aca744c220f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -955,6 +955,13 @@ enum { BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control * needs ECN */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ From cd86d1fd21025fdd6daf23d1288da405e7ad0ec6 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:40 -0700 Subject: [PATCH 1280/2177] bpf: Adding helper function bpf_getsockops Adding support for helper function bpf_getsockops to socket_ops BPF programs. This patch only supports TCP_CONGESTION. Signed-off-by: Vlad Vysotsky Acked-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 19 ++++++++-- net/core/filter.c | 46 ++++++++++++++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1aca744c220f..f650346aaa1a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -613,12 +613,22 @@ union bpf_attr { * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) * Calls setsockopt. Not all opts are available, only those with * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPROTO_TCP + * Supported levels: SOL_SOCKET and IPPROTO_TCP * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPROTO_TCP + * @level: SOL_SOCKET or IPPROTO_TCP * @optname: option name * @optval: pointer to option value - * @optlen: length of optval in byes + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) + * Calls getsockopt. Not all opts are available. + * Supported levels: IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes * Return: 0 or negative error * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) @@ -721,7 +731,8 @@ union bpf_attr { FN(sock_map_update), \ FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ - FN(perf_prog_read_value), + FN(perf_prog_read_value), \ + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 09e011f20291..ccf62f44140a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3273,7 +3273,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, static const struct bpf_func_proto bpf_setsockopt_proto = { .func = bpf_setsockopt, - .gpl_only = true, + .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -3282,6 +3282,48 @@ static const struct bpf_func_proto bpf_setsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + struct sock *sk = bpf_sock->sk; + int ret = 0; + + if (!sk_fullsock(sk)) + goto err_clear; + +#ifdef CONFIG_INET + if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { + if (optname == TCP_CONGESTION) { + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ca_ops || optlen <= 1) + goto err_clear; + strncpy(optval, icsk->icsk_ca_ops->name, optlen); + optval[optlen - 1] = 0; + } else { + goto err_clear; + } + } else { + goto err_clear; + } + return ret; +#endif +err_clear: + memset(optval, 0, optlen); + return -EINVAL; +} + +static const struct bpf_func_proto bpf_getsockopt_proto = { + .func = bpf_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3460,6 +3502,8 @@ static const struct bpf_func_proto * switch (func_id) { case BPF_FUNC_setsockopt: return &bpf_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_getsockopt_proto; case BPF_FUNC_sock_map_update: return &bpf_sock_map_update_proto; default: diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e25dbf6038cf..609514f74482 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -67,6 +67,9 @@ static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; +static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, + int optlen) = + (void *) BPF_FUNC_getsockopt; static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, From 85cce215781685bbc94b6af2bd1aa40b71965a70 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:41 -0700 Subject: [PATCH 1281/2177] bpf: Add BPF_SOCKET_OPS_BASE_RTT support to tcp_nv TCP_NV will try to get the base RTT from a socket_ops BPF program if one is loaded. NV will then use the base RTT to bound its min RTT (its notion of the base RTT). It uses the base RTT as an upper bound and 80% of the base RTT as its lower bound. In other words, NV will consider filtered RTTs larger than base RTT as a sign of congestion. As a result, there is no minRTT inflation when there is a lot of congestion. For example, in a DC where the RTTs are less than 40us when there is no congestion, a base RTT value of 80us improves the performance of NV. The difference between the uncongested RTT and the base RTT provided represents how much queueing we are willing to have (in practice it can be higher). NV has been tunned to reduce congestion when there are many flows at the cost of one flow not achieving full bandwith utilization. When a reasonable base RTT is provided, one NV flow can now fully utilize the full bandwidth. In addition, the performance is also improved when there are many flows. In the following examples the NV results are using a kernel with this patch set (i.e. both NV results are using the new nv_loss_dec_factor). With one host sending to another host and only one flow the goodputs are: Cubic: 9.3 Gbps, NV: 5.5 Gbps, NV (baseRTT=80us): 9.2 Gbps With 2 hosts sending to one host (1 flow per host, the goodput per flow is: Cubic: 4.6 Gbps, NV: 4.5 Gbps, NV (baseRTT=80us)L 4.6 Gbps But the RTTs seen by a ping process in the sender is: Cubic: 3.3ms NV: 97us, NV (baseRTT=80us): 146us With a lot of flows things look even better for NV with baseRTT. Here we have 3 hosts sending to one host. Each sending host has 6 flows: 1 stream, 4x1MB RPC, 1x10KB RPC. Cubic, NV and NV with baseRTT all fully utilize the full available bandwidth. However, the distribution of bandwidth among the flows is very different. For the 10KB RPC flow: Cubic: 27Mbps, NV: 111Mbps, NV (baseRTT=80us): 222Mbps The 99% latencies for the 10KB flows are: Cubic: 26ms, NV: 1ms, NV (baseRTT=80us): 500us The RTT seen by a ping process at the senders: Cubic: 3.2ms NV: 720us, NV (baseRTT=80us): 330us Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv4/tcp_nv.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 1ff73982e28c..a978e3fa71ec 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -39,7 +39,7 @@ * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected * nv_ssthresh_factor On congestion set ssthresh to this * / 8 * nv_rtt_factor RTT averaging factor - * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur + * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping @@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2; static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ -static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ +static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */ static int nv_cwnd_growth_rate_neg __read_mostly = 8; static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ static int nv_dec_eval_min_calls __read_mostly = 60; @@ -101,6 +101,11 @@ struct tcpnv { u32 nv_last_rtt; /* last rtt */ u32 nv_min_rtt; /* active min rtt. Used to determine slope */ u32 nv_min_rtt_new; /* min rtt for future use */ + u32 nv_base_rtt; /* If non-zero it represents the threshold for + * congestion */ + u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is + * set to 80% of nv_base_rtt. It helps reduce + * unfairness between flows */ u32 nv_rtt_max_rate; /* max rate seen during current RTT */ u32 nv_rtt_start_seq; /* current RTT ends when packet arrives * acking beyond nv_rtt_start_seq */ @@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) static void tcpnv_init(struct sock *sk) { struct tcpnv *ca = inet_csk_ca(sk); + int base_rtt; tcpnv_reset(ca, sk); + /* See if base_rtt is available from socket_ops bpf program. + * It is meant to be used in environments, such as communication + * within a datacenter, where we have reasonable estimates of + * RTTs + */ + base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); + if (base_rtt > 0) { + ca->nv_base_rtt = base_rtt; + ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ + } else { + ca->nv_base_rtt = 0; + ca->nv_lower_bound_rtt = 0; + } + ca->nv_allow_cwnd_growth = 1; ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; ca->nv_min_rtt = NV_INIT_RTT; @@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk) ca->cwnd_growth_factor = 0; } +/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt) + * bounds to RTT. + */ +inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val) +{ + if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt) + return ca->nv_lower_bound_rtt; + else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt) + return ca->nv_base_rtt; + else + return val; +} + static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -265,6 +298,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) if (ca->nv_eval_call_cnt < 255) ca->nv_eval_call_cnt++; + /* Apply bounds to rtt. Only used to update min_rtt */ + avg_rtt = nv_get_bounded_rtt(ca, avg_rtt); + /* update min rtt if necessary */ if (avg_rtt < ca->nv_min_rtt) ca->nv_min_rtt = avg_rtt; From c890063e440456e75c2e70f6bcec3797f1771eb6 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:42 -0700 Subject: [PATCH 1282/2177] bpf: sample BPF_SOCKET_OPS_BASE_RTT program Sample socket_ops BPF program to test the BPF helper function bpf_getsocketops and the new socket_ops op BPF_SOCKET_OPS_BASE_RTT. The program provides a base RTT of 80us when the calling flow is within a DC (as determined by the IPV6 prefix) and the congestion algorithm is "nv". Signed-off-by: Lawrence Brakmo Acked-by: Daniel Borkmann Acked_by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 1 + samples/bpf/tcp_basertt_kern.c | 78 ++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 samples/bpf/tcp_basertt_kern.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3534ccfb5920..ea2b9e6135f3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -129,6 +129,7 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += tcp_basertt_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c new file mode 100644 index 000000000000..4bf4fc597db9 --- /dev/null +++ b/samples/bpf/tcp_basertt_kern.c @@ -0,0 +1,78 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set base_rtt to 80us when host is running TCP-NV and + * both hosts are in the same datacenter (as determined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_basertt(struct bpf_sock_ops *skops) +{ + char cong[20]; + char nv[] = "nv"; + int rv = 0, n; + int op; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_BASE_RTT: + n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) { + /* Set base_rtt to 80us */ + rv = 80; + } else if (n) { + rv = n; + } else { + rv = -1; + } + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; From bfdf75693875fd53d6f08d5fec9506a864f07372 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:43 -0700 Subject: [PATCH 1283/2177] bpf: create samples/bpf/tcp_bpf.readme Readme file explaining how to create a cgroupv2 and attach one of the tcp_*_kern.o socket_ops BPF program. Signed-off-by: Lawrence Brakmo Acked-by: Daniel Borkmann Acked_by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tcp_bbf.readme | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 samples/bpf/tcp_bbf.readme diff --git a/samples/bpf/tcp_bbf.readme b/samples/bpf/tcp_bbf.readme new file mode 100644 index 000000000000..831fb601e3c9 --- /dev/null +++ b/samples/bpf/tcp_bbf.readme @@ -0,0 +1,26 @@ +This file describes how to run the tcp_*_kern.o tcp_bpf (or socket_ops) +programs. These programs attach to a cgroupv2. The following commands create +a cgroupv2 and attach a bash shell to the group. + + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + bash + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs + +Anything that runs under this shell belongs to the foo cgroupv2 To load +(attach) one of the tcp_*_kern.o programs: + + ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o + +If the "-l" flag is used, the load_sock_ops program will continue to run +printing the BPF log buffer. The tcp_*_kern.o programs use special print +functions to print logging information (if enabled by the ifdef). + +If using netperf/netserver to create traffic, you need to run them under the +cgroupv2 to which the BPF programs are attached (i.e. under bash shell +attached to the cgroupv2). + +To remove (unattach) a socket_ops BPF program from a cgroupv2: + + ./load_sock_ops -r /tmp/cgroupv2/foo From 7dbfb4ef77db5666f0f3a425e7db93ca30ff4285 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 11:29:55 -0700 Subject: [PATCH 1284/2177] tun: do not block BH again in tun_flow_cleanup() tun_flow_cleanup() being a timer callback, it is already running in BH context. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3b41c36eae90..f9541f7f9fb7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -454,7 +454,7 @@ static void tun_flow_cleanup(unsigned long data) tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); - spin_lock_bh(&tun->lock); + spin_lock(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; struct hlist_node *n; @@ -472,7 +472,7 @@ static void tun_flow_cleanup(unsigned long data) if (count) mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); - spin_unlock_bh(&tun->lock); + spin_unlock(&tun->lock); } static void tun_flow_update(struct tun_struct *tun, u32 rxhash, From 81d98fa4df3d1683b3ef21e8a7a0ccac7874f0de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 11:29:56 -0700 Subject: [PATCH 1285/2177] tun: avoid extra timer schedule in tun_flow_cleanup() If tun_flow_cleanup() deleted all flows, no need to arm the timer again. It will be armed next time tun_flow_update() is called. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f9541f7f9fb7..995887de5a98 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -461,11 +461,14 @@ static void tun_flow_cleanup(unsigned long data) hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { unsigned long this_timer; - count++; + this_timer = e->updated + delay; - if (time_before_eq(this_timer, jiffies)) + if (time_before_eq(this_timer, jiffies)) { tun_flow_delete(tun, e); - else if (time_before(this_timer, next_timer)) + continue; + } + count++; + if (time_before(this_timer, next_timer)) next_timer = this_timer; } } From ee74d9967b829232723939cb7c9b100b29f6ec98 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 11:29:57 -0700 Subject: [PATCH 1286/2177] tun: do not arm flow_gc_timer in tun_flow_init() Timer is properly armed on demand from tun_flow_update(), so there is no need to arm it at tun init. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 995887de5a98..2a2d058cdd40 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1197,8 +1197,6 @@ static void tun_flow_init(struct tun_struct *tun) tun->ageing_time = TUN_FLOW_EXPIRE; setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); - mod_timer(&tun->flow_gc_timer, - round_jiffies_up(jiffies + tun->ageing_time)); } static void tun_flow_uninit(struct tun_struct *tun) From e27afb84b4680570b64c958dfcba9e0b3da92fc9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 22 Oct 2017 10:29:06 -0700 Subject: [PATCH 1287/2177] selftests/bpf: fix broken build of test_maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix multiple build errors and warnings 1. test_maps.c: In function ‘test_map_rdonly’: test_maps.c:1051:30: error: ‘BPF_F_RDONLY’ undeclared (first use in this function) MAP_SIZE, map_flags | BPF_F_RDONLY); 2. test_maps.c:1048:6: warning: unused variable ‘i’ [-Wunused-variable] int i, fd, key = 0, value = 0; 3. test_maps.c:1087:2: error: called object is not a function or function pointer assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); 4. ./bpf_helpers.h:72:11: error: use of undeclared identifier 'BPF_FUNC_getsockopt' (void *) BPF_FUNC_getsockopt; Fixes: e043325b3087 ("bpf: Add tests for eBPF file mode") Fixes: 6e71b04a8224 ("bpf: Add file mode configuration into bpf maps") Fixes: cd86d1fd2102 ("bpf: Adding helper function bpf_getsockops") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 62 +++++++++++++++++++++---- tools/testing/selftests/bpf/test_maps.c | 6 +-- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 850a5497dcc3..f650346aaa1a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -218,6 +218,10 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -260,6 +264,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ @@ -287,6 +292,7 @@ union bpf_attr { __u32 map_id; }; __u32 next_id; + __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ @@ -607,12 +613,22 @@ union bpf_attr { * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) * Calls setsockopt. Not all opts are available, only those with * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPROTO_TCP + * Supported levels: SOL_SOCKET and IPPROTO_TCP * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPROTO_TCP + * @level: SOL_SOCKET or IPPROTO_TCP * @optname: option name * @optval: pointer to option value - * @optlen: length of optval in byes + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) + * Calls getsockopt. Not all opts are available. + * Supported levels: IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes * Return: 0 or negative error * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) @@ -623,10 +639,9 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(skb, map, key, flags) + * int bpf_sk_redirect_map(map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. - * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use @@ -643,6 +658,21 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: An positive/negative integer to be added to xdp_md.data_meta * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -701,7 +731,8 @@ union bpf_attr { FN(sock_map_update), \ FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ - FN(perf_prog_read_value), + FN(perf_prog_read_value), \ + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -745,7 +776,9 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK /* BPF_FUNC_perf_event_output for sk_buff input context. */ @@ -873,7 +906,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - char name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -933,9 +966,22 @@ enum { BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control * needs ECN */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 057da0cba517..040356ecc862 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1045,7 +1045,7 @@ static void test_map_parallel(void) static void test_map_rdonly(void) { - int i, fd, key = 0, value = 0; + int fd, key = 0, value = 0; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), MAP_SIZE, map_flags | BPF_F_RDONLY); @@ -1068,7 +1068,7 @@ static void test_map_rdonly(void) static void test_map_wronly(void) { - int i, fd, key = 0, value = 0; + int fd, key = 0, value = 0; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), MAP_SIZE, map_flags | BPF_F_WRONLY); @@ -1081,7 +1081,7 @@ static void test_map_wronly(void) key = 1; value = 1234; /* Insert key=1 element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0) + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); /* Check that key=2 is not found. */ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); From 33ad61d0f799656e8987e9c80e6e15151bb857f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 20 Oct 2017 13:47:08 -0700 Subject: [PATCH 1288/2177] isdn/gigaset: Provide cardstate context for bas timer callbacks While the work callback uses the urb to find cardstate from bas_cardstate, this may not be valid for timer callbacks. Instead, introduce a direct pointer back to the cardstate from bas_cardstate for use in timer callbacks. Reported-by: Paul Bolle Fixes: 4cfea08e6251 ("isdn/gigaset: Convert timers to use timer_setup()") Cc: Paul Bolle Cc: Karsten Keil Cc: "David S. Miller" Cc: Johan Hovold Cc: gigaset307x-common@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/gigaset/bas-gigaset.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index c990c6bbffc2..20d0a080a2b0 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -89,6 +89,7 @@ static int start_cbsend(struct cardstate *); struct bas_cardstate { struct usb_device *udev; /* USB device pointer */ + struct cardstate *cs; struct usb_interface *interface; /* interface for this device */ unsigned char minor; /* starting minor number */ @@ -436,8 +437,7 @@ static void check_pending(struct bas_cardstate *ucs) static void cmd_in_timeout(struct timer_list *t) { struct bas_cardstate *ucs = from_timer(ucs, t, timer_cmd_in); - struct urb *urb = ucs->urb_int_in; - struct cardstate *cs = urb->context; + struct cardstate *cs = ucs->cs; int rc; if (!ucs->rcvbuf_size) { @@ -643,8 +643,7 @@ static void int_in_work(struct work_struct *work) static void int_in_resubmit(struct timer_list *t) { struct bas_cardstate *ucs = from_timer(ucs, t, timer_int_in); - struct urb *urb = ucs->urb_int_in; - struct cardstate *cs = urb->context; + struct cardstate *cs = ucs->cs; int rc; if (ucs->retry_int_in++ >= BAS_RETRY) { @@ -1446,8 +1445,7 @@ error: static void req_timeout(struct timer_list *t) { struct bas_cardstate *ucs = from_timer(ucs, t, timer_ctrl); - struct urb *urb = ucs->urb_int_in; - struct cardstate *cs = urb->context; + struct cardstate *cs = ucs->cs; int pending; unsigned long flags; @@ -1843,8 +1841,7 @@ static void write_command_callback(struct urb *urb) static void atrdy_timeout(struct timer_list *t) { struct bas_cardstate *ucs = from_timer(ucs, t, timer_atrdy); - struct urb *urb = ucs->urb_int_in; - struct cardstate *cs = urb->context; + struct cardstate *cs = ucs->cs; dev_warn(cs->dev, "timeout waiting for HD_READY_SEND_ATDATA\n"); @@ -2217,6 +2214,7 @@ static int gigaset_initcshw(struct cardstate *cs) } spin_lock_init(&ucs->lock); + ucs->cs = cs; timer_setup(&ucs->timer_ctrl, req_timeout, 0); timer_setup(&ucs->timer_atrdy, atrdy_timeout, 0); timer_setup(&ucs->timer_cmd_in, cmd_in_timeout, 0); From 39cdd34989cf9fb62935041727bf885136f720a4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:43 -0700 Subject: [PATCH 1289/2177] net: dsa: bcm_sf2: Use existing shift/masks Instead of open coding the shift for the IP protocol, IP fragment bit etc. define and/or use existing constants to that end. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 7 ++++--- drivers/net/dsa/bcm_sf2_regs.h | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 94649e1481ec..cef4b3d4df36 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -189,7 +189,8 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * Reserved [1] * UDF_Valid[8] [0] */ - core_writel(priv, v4_spec->tos << 16 | ip_proto << 8 | ip_frag << 7, + core_writel(priv, v4_spec->tos << IPTOS_SHIFT | + ip_proto << IPPROTO_SHIFT | ip_frag << IP_FRAG_SHIFT, CORE_CFP_DATA_PORT(6)); /* UDF_Valid[7:0] [31:24] @@ -425,8 +426,8 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, return -EINVAL; } - v4_spec->tos = (reg >> 16) & IPPROTO_MASK; - nfc->fs.m_ext.data[0] = cpu_to_be32((reg >> 7) & 1); + v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; + nfc->fs.m_ext.data[0] = cpu_to_be32((reg >> IP_FRAG_SHIFT) & 1); reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); /* src port [15:8] */ diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index d1596dfca323..61bc9729383f 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -302,9 +302,12 @@ enum bcm_sf2_reg_offs { /* UDF_DATA7 */ #define L3_FRAMING_SHIFT 24 #define L3_FRAMING_MASK (0x3 << L3_FRAMING_SHIFT) +#define IPTOS_SHIFT 16 +#define IPTOS_MASK 0xff #define IPPROTO_SHIFT 8 #define IPPROTO_MASK (0xff << IPPROTO_SHIFT) -#define IP_FRAG (1 << 7) +#define IP_FRAG_SHIFT 7 +#define IP_FRAG (1 << IP_FRAG_SHIFT) /* UDF_DATA0 */ #define SLICE_VALID 3 From 3306145866b62ff9087b4fde489df4b3ee8755c1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:44 -0700 Subject: [PATCH 1290/2177] net: dsa: bcm_sf2: Move IPv4 CFP processing to specific functions Move the processing of IPv4 rules into specific functions, allowing us to clearly identify which parts are generic and which ones are not. Also create a specific function to insert a rule into the action and policer RAMs as those tend to be fairly generic. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 281 ++++++++++++++++++++-------------- 1 file changed, 164 insertions(+), 117 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index cef4b3d4df36..7ba5f92c5552 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -112,45 +112,61 @@ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv) return priv->num_cfp_rules - 1; } -static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, - struct ethtool_rx_flow_spec *fs) +static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, + unsigned int rule_index, + unsigned int port_num, + unsigned int queue_num) +{ + int ret; + u32 reg; + + /* Replace ARL derived destination with DST_MAP derived, define + * which port and queue this should be forwarded to. + */ + reg = CHANGE_FWRD_MAP_IB_REP_ARL | BIT(port_num + DST_MAP_IB_SHIFT) | + CHANGE_TC | queue_num << NEW_TC_SHIFT; + + core_writel(priv, reg, CORE_ACT_POL_DATA0); + + /* Set classification ID that needs to be put in Broadcom tag */ + core_writel(priv, rule_index << CHAIN_ID_SHIFT, + CORE_ACT_POL_DATA1); + + core_writel(priv, 0, CORE_ACT_POL_DATA2); + + /* Configure policer RAM now */ + ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | ACT_POL_RAM); + if (ret) { + pr_err("Policer entry at %d failed\n", rule_index); + return ret; + } + + /* Disable the policer */ + core_writel(priv, POLICER_MODE_DISABLE, CORE_RATE_METER0); + + /* Now the rate meter */ + ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | RATE_METER_RAM); + if (ret) { + pr_err("Meter entry at %d failed\n", rule_index); + return ret; + } + + return 0; +} + +static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, + unsigned int port_num, + unsigned int queue_num, + struct ethtool_rx_flow_spec *fs) { - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_tcpip4_spec *v4_spec; const struct cfp_udf_layout *layout; + struct ethtool_tcpip4_spec *v4_spec; unsigned int slice_num, rule_index; - unsigned int queue_num, port_num; u8 ip_proto, ip_frag; u8 num_udf; u32 reg; int ret; - /* Check for unsupported extensions */ - if ((fs->flow_type & FLOW_EXT) && - (fs->m_ext.vlan_etype || fs->m_ext.data[1])) - return -EINVAL; - - if (fs->location != RX_CLS_LOC_ANY && - test_bit(fs->location, priv->cfp.used)) - return -EBUSY; - - if (fs->location != RX_CLS_LOC_ANY && - fs->location > bcm_sf2_cfp_rule_size(priv)) - return -EINVAL; - - ip_frag = be32_to_cpu(fs->m_ext.data[0]); - - /* We do not support discarding packets, check that the - * destination port is enabled and that we are within the - * number of ports supported by the switch - */ - port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; - - if (fs->ring_cookie == RX_CLS_FLOW_DISC || - !(BIT(port_num) & ds->enabled_port_mask) || - port_num >= priv->hw_params.num_ports) - return -EINVAL; - switch (fs->flow_type & ~FLOW_EXT) { case TCP_V4_FLOW: ip_proto = IPPROTO_TCP; @@ -164,6 +180,15 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, return -EINVAL; } + ip_frag = be32_to_cpu(fs->m_ext.data[0]); + + /* Locate the first rule available */ + if (fs->location == RX_CLS_LOC_ANY) + rule_index = find_first_zero_bit(priv->cfp.used, + bcm_sf2_cfp_rule_size(priv)); + else + rule_index = fs->location; + /* We only use one UDF slice for now */ slice_num = 1; layout = &udf_tcpip4_layout; @@ -175,6 +200,9 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, /* Apply to all packets received through this port */ core_writel(priv, BIT(port), CORE_CFP_DATA_PORT(7)); + /* Source port map match */ + core_writel(priv, 0xff, CORE_CFP_MASK_PORT(7)); + /* S-Tag status [31:30] * C-Tag status [29:28] * L2 framing [27:26] @@ -241,9 +269,6 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, SLICE_NUM(slice_num) | SLICE_VALID; core_writel(priv, reg, CORE_CFP_DATA_PORT(0)); - /* Source port map match */ - core_writel(priv, 0xff, CORE_CFP_MASK_PORT(7)); - /* Mask with the specific layout for IPv4 packets */ core_writel(priv, layout->mask_value, CORE_CFP_MASK_PORT(6)); @@ -259,13 +284,6 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(1)); core_writel(priv, 0xffffff0f, CORE_CFP_MASK_PORT(0)); - /* Locate the first rule available */ - if (fs->location == RX_CLS_LOC_ANY) - rule_index = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); - else - rule_index = fs->location; - /* Insert into TCAM now */ bcm_sf2_cfp_rule_addr_set(priv, rule_index); @@ -275,43 +293,10 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, return ret; } - /* Replace ARL derived destination with DST_MAP derived, define - * which port and queue this should be forwarded to. - * - * We have a small oddity where Port 6 just does not have a - * valid bit here (so we subtract by one). - */ - queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES; - if (port_num >= 7) - port_num -= 1; - - reg = CHANGE_FWRD_MAP_IB_REP_ARL | BIT(port_num + DST_MAP_IB_SHIFT) | - CHANGE_TC | queue_num << NEW_TC_SHIFT; - - core_writel(priv, reg, CORE_ACT_POL_DATA0); - - /* Set classification ID that needs to be put in Broadcom tag */ - core_writel(priv, rule_index << CHAIN_ID_SHIFT, - CORE_ACT_POL_DATA1); - - core_writel(priv, 0, CORE_ACT_POL_DATA2); - - /* Configure policer RAM now */ - ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | ACT_POL_RAM); - if (ret) { - pr_err("Policer entry at %d failed\n", rule_index); + /* Insert into Action and policer RAMs now */ + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, queue_num); + if (ret) return ret; - } - - /* Disable the policer */ - core_writel(priv, POLICER_MODE_DISABLE, CORE_RATE_METER0); - - /* Now the rate meter */ - ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | RATE_METER_RAM); - if (ret) { - pr_err("Meter entry at %d failed\n", rule_index); - return ret; - } /* Turn on CFP for this rule now */ reg = core_readl(priv, CORE_CFP_CTL_REG); @@ -325,6 +310,51 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, return 0; } +static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, + struct ethtool_rx_flow_spec *fs) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + unsigned int queue_num, port_num; + int ret; + + /* Check for unsupported extensions */ + if ((fs->flow_type & FLOW_EXT) && (fs->m_ext.vlan_etype || + fs->m_ext.data[1])) + return -EINVAL; + + if (fs->location != RX_CLS_LOC_ANY && + test_bit(fs->location, priv->cfp.used)) + return -EBUSY; + + if (fs->location != RX_CLS_LOC_ANY && + fs->location > bcm_sf2_cfp_rule_size(priv)) + return -EINVAL; + + /* We do not support discarding packets, check that the + * destination port is enabled and that we are within the + * number of ports supported by the switch + */ + port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC || + !(BIT(port_num) & ds->enabled_port_mask) || + port_num >= priv->hw_params.num_ports) + return -EINVAL; + /* + * We have a small oddity where Port 6 just does not have a + * valid bit here (so we substract by one). + */ + queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES; + if (port_num >= 7) + port_num -= 1; + + ret = bcm_sf2_cfp_ipv4_rule_set(priv, port, port_num, queue_num, fs); + if (ret) + return ret; + + return 0; +} + static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 loc) { @@ -370,13 +400,59 @@ static void bcm_sf2_invert_masks(struct ethtool_rx_flow_spec *flow) flow->m_ext.data[1] ^= cpu_to_be32(~0); } +static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, + struct ethtool_tcpip4_spec *v4_spec, + struct ethtool_tcpip4_spec *v4_m_spec) +{ + u16 src_dst_port; + u32 reg, ipv4; + + reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); + /* src port [15:8] */ + src_dst_port = reg << 8; + + reg = core_readl(priv, CORE_CFP_DATA_PORT(2)); + /* src port [7:0] */ + src_dst_port |= (reg >> 24); + + v4_spec->pdst = cpu_to_be16(src_dst_port); + v4_m_spec->pdst = cpu_to_be16(~0); + v4_spec->psrc = cpu_to_be16((u16)(reg >> 8)); + v4_m_spec->psrc = cpu_to_be16(~0); + + /* IPv4 dst [15:8] */ + ipv4 = (reg & 0xff) << 8; + reg = core_readl(priv, CORE_CFP_DATA_PORT(1)); + /* IPv4 dst [31:16] */ + ipv4 |= ((reg >> 8) & 0xffff) << 16; + /* IPv4 dst [7:0] */ + ipv4 |= (reg >> 24) & 0xff; + v4_spec->ip4dst = cpu_to_be32(ipv4); + v4_m_spec->ip4dst = cpu_to_be32(~0); + + /* IPv4 src [15:8] */ + ipv4 = (reg & 0xff) << 8; + reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); + + if (!(reg & SLICE_VALID)) + return -EINVAL; + + /* IPv4 src [7:0] */ + ipv4 |= (reg >> 24) & 0xff; + /* IPv4 src [31:16] */ + ipv4 |= ((reg >> 8) & 0xffff) << 16; + v4_spec->ip4src = cpu_to_be32(ipv4); + v4_m_spec->ip4src = cpu_to_be32(~0); + + return 0; +} + static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, struct ethtool_rxnfc *nfc, bool search) { - struct ethtool_tcpip4_spec *v4_spec; + struct ethtool_tcpip4_spec *v4_spec = NULL, *v4_m_spec; unsigned int queue_num; - u16 src_dst_port; - u32 reg, ipv4; + u32 reg; int ret; if (!search) { @@ -414,10 +490,12 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, case IPPROTO_TCP: nfc->fs.flow_type = TCP_V4_FLOW; v4_spec = &nfc->fs.h_u.tcp_ip4_spec; + v4_m_spec = &nfc->fs.m_u.tcp_ip4_spec; break; case IPPROTO_UDP: nfc->fs.flow_type = UDP_V4_FLOW; v4_spec = &nfc->fs.h_u.udp_ip4_spec; + v4_m_spec = &nfc->fs.m_u.udp_ip4_spec; break; default: /* Clear to exit the search process */ @@ -426,45 +504,14 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, return -EINVAL; } - v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; nfc->fs.m_ext.data[0] = cpu_to_be32((reg >> IP_FRAG_SHIFT) & 1); + if (v4_spec) { + v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; + ret = bcm_sf2_cfp_ipv4_rule_get(priv, port, v4_spec, v4_m_spec); + } - reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); - /* src port [15:8] */ - src_dst_port = reg << 8; - - reg = core_readl(priv, CORE_CFP_DATA_PORT(2)); - /* src port [7:0] */ - src_dst_port |= (reg >> 24); - - v4_spec->pdst = cpu_to_be16(src_dst_port); - nfc->fs.m_u.tcp_ip4_spec.pdst = cpu_to_be16(~0); - v4_spec->psrc = cpu_to_be16((u16)(reg >> 8)); - nfc->fs.m_u.tcp_ip4_spec.psrc = cpu_to_be16(~0); - - /* IPv4 dst [15:8] */ - ipv4 = (reg & 0xff) << 8; - reg = core_readl(priv, CORE_CFP_DATA_PORT(1)); - /* IPv4 dst [31:16] */ - ipv4 |= ((reg >> 8) & 0xffff) << 16; - /* IPv4 dst [7:0] */ - ipv4 |= (reg >> 24) & 0xff; - v4_spec->ip4dst = cpu_to_be32(ipv4); - nfc->fs.m_u.tcp_ip4_spec.ip4dst = cpu_to_be32(~0); - - /* IPv4 src [15:8] */ - ipv4 = (reg & 0xff) << 8; - reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); - - if (!(reg & SLICE_VALID)) - return -EINVAL; - - /* IPv4 src [7:0] */ - ipv4 |= (reg >> 24) & 0xff; - /* IPv4 src [31:16] */ - ipv4 |= ((reg >> 8) & 0xffff) << 16; - v4_spec->ip4src = cpu_to_be32(ipv4); - nfc->fs.m_u.tcp_ip4_spec.ip4src = cpu_to_be32(~0); + if (ret) + return ret; /* Read last to avoid next entry clobbering the results during search * operations From 5d80bcbb631ce035f37bce1924fe73ed0d77b546 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:45 -0700 Subject: [PATCH 1291/2177] net: dsa: bcm_sf2: Make UDF slices more configurable In preparation for introducing IPv6 rules support, make the cfp_udf_layout more flexible and match more accurately how the HW is designed: we have 3 + 1 slices per protocol, but we may not be using all of them and we are relative to a particular base offset (slice A for IPv4 for instance). Also populate the slice number that should be used (slice 1 for IPv4) based on the lookup function. Finally, we introduce two helper functions: udf_upper_bits() and udf_lower_bits() to help setting the UDF_n_* valid bits based on the number of UDFs valid within a slice. Update the IPv4 rule setting to make use of it to be more robust wrt. change in number of User Defined Fields being programmed. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 109 +++++++++++++++++++++++---------- drivers/net/dsa/bcm_sf2_regs.h | 3 +- 2 files changed, 78 insertions(+), 34 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 7ba5f92c5552..d033fc6440c4 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -20,37 +20,48 @@ #include "bcm_sf2.h" #include "bcm_sf2_regs.h" -struct cfp_udf_layout { - u8 slices[UDF_NUM_SLICES]; +struct cfp_udf_slice_layout { + u8 slices[UDFS_PER_SLICE]; u32 mask_value; - + u32 base_offset; }; +struct cfp_udf_layout { + struct cfp_udf_slice_layout udfs[UDF_NUM_SLICES]; +}; + +static const u8 zero_slice[UDFS_PER_SLICE] = { }; + /* UDF slices layout for a TCPv4/UDPv4 specification */ static const struct cfp_udf_layout udf_tcpip4_layout = { - .slices = { - /* End of L2, byte offset 12, src IP[0:15] */ - CFG_UDF_EOL2 | 6, - /* End of L2, byte offset 14, src IP[16:31] */ - CFG_UDF_EOL2 | 7, - /* End of L2, byte offset 16, dst IP[0:15] */ - CFG_UDF_EOL2 | 8, - /* End of L2, byte offset 18, dst IP[16:31] */ - CFG_UDF_EOL2 | 9, - /* End of L3, byte offset 0, src port */ - CFG_UDF_EOL3 | 0, - /* End of L3, byte offset 2, dst port */ - CFG_UDF_EOL3 | 1, - 0, 0, 0 + .udfs = { + [1] = { + .slices = { + /* End of L2, byte offset 12, src IP[0:15] */ + CFG_UDF_EOL2 | 6, + /* End of L2, byte offset 14, src IP[16:31] */ + CFG_UDF_EOL2 | 7, + /* End of L2, byte offset 16, dst IP[0:15] */ + CFG_UDF_EOL2 | 8, + /* End of L2, byte offset 18, dst IP[16:31] */ + CFG_UDF_EOL2 | 9, + /* End of L3, byte offset 0, src port */ + CFG_UDF_EOL3 | 0, + /* End of L3, byte offset 2, dst port */ + CFG_UDF_EOL3 | 1, + 0, 0, 0 + }, + .mask_value = L3_FRAMING_MASK | IPPROTO_MASK | IP_FRAG, + .base_offset = CORE_UDF_0_A_0_8_PORT_0 + UDF_SLICE_OFFSET, + }, }, - .mask_value = L3_FRAMING_MASK | IPPROTO_MASK | IP_FRAG, }; static inline unsigned int bcm_sf2_get_num_udf_slices(const u8 *layout) { unsigned int i, count = 0; - for (i = 0; i < UDF_NUM_SLICES; i++) { + for (i = 0; i < UDFS_PER_SLICE; i++) { if (layout[i] != 0) count++; } @@ -58,15 +69,42 @@ static inline unsigned int bcm_sf2_get_num_udf_slices(const u8 *layout) return count; } -static void bcm_sf2_cfp_udf_set(struct bcm_sf2_priv *priv, - unsigned int slice_num, - const u8 *layout) +static inline u32 udf_upper_bits(unsigned int num_udf) { - u32 offset = CORE_UDF_0_A_0_8_PORT_0 + slice_num * UDF_SLICE_OFFSET; + return GENMASK(num_udf - 1, 0) >> (UDFS_PER_SLICE - 1); +} + +static inline u32 udf_lower_bits(unsigned int num_udf) +{ + return (u8)GENMASK(num_udf - 1, 0); +} + +static unsigned int bcm_sf2_get_slice_number(const struct cfp_udf_layout *l, + unsigned int start) +{ + const struct cfp_udf_slice_layout *slice_layout; + unsigned int slice_idx; + + for (slice_idx = start; slice_idx < UDF_NUM_SLICES; slice_idx++) { + slice_layout = &l->udfs[slice_idx]; + if (memcmp(slice_layout->slices, zero_slice, + sizeof(zero_slice))) + break; + } + + return slice_idx; +} + +static void bcm_sf2_cfp_udf_set(struct bcm_sf2_priv *priv, + const struct cfp_udf_layout *layout, + unsigned int slice_num) +{ + u32 offset = layout->udfs[slice_num].base_offset; unsigned int i; - for (i = 0; i < UDF_NUM_SLICES; i++) - core_writel(priv, layout[i], offset + i * 4); + for (i = 0; i < UDFS_PER_SLICE; i++) + core_writel(priv, layout->udfs[slice_num].slices[i], + offset + i * 4); } static int bcm_sf2_cfp_op(struct bcm_sf2_priv *priv, unsigned int op) @@ -189,13 +227,16 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, else rule_index = fs->location; - /* We only use one UDF slice for now */ - slice_num = 1; layout = &udf_tcpip4_layout; - num_udf = bcm_sf2_get_num_udf_slices(layout->slices); + /* We only use one UDF slice for now */ + slice_num = bcm_sf2_get_slice_number(layout, 0); + if (slice_num == UDF_NUM_SLICES) + return -EINVAL; + + num_udf = bcm_sf2_get_num_udf_slices(layout->udfs[slice_num].slices); /* Apply the UDF layout for this filter */ - bcm_sf2_cfp_udf_set(priv, slice_num, layout->slices); + bcm_sf2_cfp_udf_set(priv, layout, slice_num); /* Apply to all packets received through this port */ core_writel(priv, BIT(port), CORE_CFP_DATA_PORT(7)); @@ -218,14 +259,15 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, * UDF_Valid[8] [0] */ core_writel(priv, v4_spec->tos << IPTOS_SHIFT | - ip_proto << IPPROTO_SHIFT | ip_frag << IP_FRAG_SHIFT, + ip_proto << IPPROTO_SHIFT | ip_frag << IP_FRAG_SHIFT | + udf_upper_bits(num_udf), CORE_CFP_DATA_PORT(6)); /* UDF_Valid[7:0] [31:24] * S-Tag [23:8] * C-Tag [7:0] */ - core_writel(priv, GENMASK(num_udf - 1, 0) << 24, CORE_CFP_DATA_PORT(5)); + core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_DATA_PORT(5)); /* C-Tag [31:24] * UDF_n_A8 [23:8] @@ -270,10 +312,11 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, core_writel(priv, reg, CORE_CFP_DATA_PORT(0)); /* Mask with the specific layout for IPv4 packets */ - core_writel(priv, layout->mask_value, CORE_CFP_MASK_PORT(6)); + core_writel(priv, layout->udfs[slice_num].mask_value | + udf_upper_bits(num_udf), CORE_CFP_MASK_PORT(6)); /* Mask all but valid UDFs */ - core_writel(priv, GENMASK(num_udf - 1, 0) << 24, CORE_CFP_MASK_PORT(5)); + core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); /* Mask all */ core_writel(priv, 0, CORE_CFP_MASK_PORT(4)); diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 61bc9729383f..263e4c758795 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -409,7 +409,8 @@ enum bcm_sf2_reg_offs { #define CFG_UDF_EOL3 (3 << CFG_UDF_OFFSET_BASE_SHIFT) /* Number of slices for IPv4, IPv6 and non-IP */ -#define UDF_NUM_SLICES 9 +#define UDF_NUM_SLICES 4 +#define UDFS_PER_SLICE 9 /* Spacing between different slices */ #define UDF_SLICE_OFFSET 0x40 From 4daa70cfb69186a4fe70fa2fefc271a41ece5085 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:46 -0700 Subject: [PATCH 1292/2177] net: dsa: bcm_sf2: Simplify bcm_sf2_cfp_rule_get_all() There is no need to do a HW search of the TCAMs which is something slow and expensive. Since we already maintain a bitmask of active CFP rules, just iterate over those, starting from bit 1 (after the reserved entry) to get a count and index position to store the rule later on. As a result we can remove the code in bcm_sf2_cfp_rule_get() which acted on the "search" argument, and remove that argument. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 68 ++++++++--------------------------- 1 file changed, 14 insertions(+), 54 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index d033fc6440c4..9c8299580795 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -491,28 +491,24 @@ static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, } static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, - struct ethtool_rxnfc *nfc, bool search) + struct ethtool_rxnfc *nfc) { struct ethtool_tcpip4_spec *v4_spec = NULL, *v4_m_spec; unsigned int queue_num; u32 reg; int ret; - if (!search) { - bcm_sf2_cfp_rule_addr_set(priv, nfc->fs.location); + bcm_sf2_cfp_rule_addr_set(priv, nfc->fs.location); - ret = bcm_sf2_cfp_op(priv, OP_SEL_READ | ACT_POL_RAM); - if (ret) - return ret; + ret = bcm_sf2_cfp_op(priv, OP_SEL_READ | ACT_POL_RAM); + if (ret) + return ret; - reg = core_readl(priv, CORE_ACT_POL_DATA0); + reg = core_readl(priv, CORE_ACT_POL_DATA0); - ret = bcm_sf2_cfp_op(priv, OP_SEL_READ | TCAM_SEL); - if (ret) - return ret; - } else { - reg = core_readl(priv, CORE_ACT_POL_DATA0); - } + ret = bcm_sf2_cfp_op(priv, OP_SEL_READ | TCAM_SEL); + if (ret) + return ret; /* Extract the destination port */ nfc->fs.ring_cookie = fls((reg >> DST_MAP_IB_SHIFT) & @@ -541,9 +537,6 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, v4_m_spec = &nfc->fs.m_u.udp_ip4_spec; break; default: - /* Clear to exit the search process */ - if (search) - core_readl(priv, CORE_CFP_DATA_PORT(7)); return -EINVAL; } @@ -577,44 +570,11 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv, u32 *rule_locs) { unsigned int index = 1, rules_cnt = 0; - int ret; - u32 reg; - /* Do not poll on OP_STR_DONE to be self-clearing for search - * operations, we cannot use bcm_sf2_cfp_op here because it completes - * on clearing OP_STR_DONE which won't clear until the entire search - * operation is over. - */ - reg = core_readl(priv, CORE_CFP_ACC); - reg &= ~(XCESS_ADDR_MASK << XCESS_ADDR_SHIFT); - reg |= index << XCESS_ADDR_SHIFT; - reg &= ~(OP_SEL_MASK | RAM_SEL_MASK); - reg |= OP_SEL_SEARCH | TCAM_SEL | OP_STR_DONE; - core_writel(priv, reg, CORE_CFP_ACC); - - do { - /* Wait for results to be ready */ - reg = core_readl(priv, CORE_CFP_ACC); - - /* Extract the address we are searching */ - index = reg >> XCESS_ADDR_SHIFT; - index &= XCESS_ADDR_MASK; - - /* We have a valid search result, so flag it accordingly */ - if (reg & SEARCH_STS) { - ret = bcm_sf2_cfp_rule_get(priv, port, nfc, true); - if (ret) - continue; - - rule_locs[rules_cnt] = index; - rules_cnt++; - } - - /* Search is over break out */ - if (!(reg & OP_STR_DONE)) - break; - - } while (index < priv->num_cfp_rules); + for_each_set_bit_from(index, priv->cfp.used, priv->num_cfp_rules) { + rule_locs[rules_cnt] = index; + rules_cnt++; + } /* Put the TCAM size here */ nfc->data = bcm_sf2_cfp_rule_size(priv); @@ -640,7 +600,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, nfc->data |= RX_CLS_LOC_SPECIAL; break; case ETHTOOL_GRXCLSRULE: - ret = bcm_sf2_cfp_rule_get(priv, port, nfc, false); + ret = bcm_sf2_cfp_rule_get(priv, port, nfc); break; case ETHTOOL_GRXCLSRLALL: ret = bcm_sf2_cfp_rule_get_all(priv, port, nfc, rule_locs); From ba0696c22e7c5b7394283a0dbef1462252d1606f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:47 -0700 Subject: [PATCH 1293/2177] net: dsa: bcm_sf2: Add support for IPv6 CFP rules Inserting IPv6 CFP rules complicates the code a little bit in that we need to insert two rules side by side and chain them to match a full IPv6 tuple (src, dst IPv6 + port + protocol). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 1 + drivers/net/dsa/bcm_sf2.h | 1 + drivers/net/dsa/bcm_sf2_cfp.c | 565 ++++++++++++++++++++++++++++++--- drivers/net/dsa/bcm_sf2_regs.h | 7 + 4 files changed, 534 insertions(+), 40 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b43c063b9634..2574a52ee161 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1067,6 +1067,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) * permanently used */ set_bit(0, priv->cfp.used); + set_bit(0, priv->cfp.unique); bcm_sf2_identify_ports(priv, dn->child); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 1922e027ff59..cc31e986e6e3 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -54,6 +54,7 @@ struct bcm_sf2_cfp_priv { /* Mutex protecting concurrent accesses to the CFP registers */ struct mutex lock; DECLARE_BITMAP(used, CFP_NUM_RULES); + DECLARE_BITMAP(unique, CFP_NUM_RULES); unsigned int rules_cnt; }; diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 9c8299580795..0861f66ef739 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -57,6 +57,60 @@ static const struct cfp_udf_layout udf_tcpip4_layout = { }, }; +/* UDF slices layout for a TCPv6/UDPv6 specification */ +static const struct cfp_udf_layout udf_tcpip6_layout = { + .udfs = { + [0] = { + .slices = { + /* End of L2, byte offset 8, src IP[0:15] */ + CFG_UDF_EOL2 | 4, + /* End of L2, byte offset 10, src IP[16:31] */ + CFG_UDF_EOL2 | 5, + /* End of L2, byte offset 12, src IP[32:47] */ + CFG_UDF_EOL2 | 6, + /* End of L2, byte offset 14, src IP[48:63] */ + CFG_UDF_EOL2 | 7, + /* End of L2, byte offset 16, src IP[64:79] */ + CFG_UDF_EOL2 | 8, + /* End of L2, byte offset 18, src IP[80:95] */ + CFG_UDF_EOL2 | 9, + /* End of L2, byte offset 20, src IP[96:111] */ + CFG_UDF_EOL2 | 10, + /* End of L2, byte offset 22, src IP[112:127] */ + CFG_UDF_EOL2 | 11, + /* End of L3, byte offset 0, src port */ + CFG_UDF_EOL3 | 0, + }, + .mask_value = L3_FRAMING_MASK | IPPROTO_MASK | IP_FRAG, + .base_offset = CORE_UDF_0_B_0_8_PORT_0, + }, + [3] = { + .slices = { + /* End of L2, byte offset 24, dst IP[0:15] */ + CFG_UDF_EOL2 | 12, + /* End of L2, byte offset 26, dst IP[16:31] */ + CFG_UDF_EOL2 | 13, + /* End of L2, byte offset 28, dst IP[32:47] */ + CFG_UDF_EOL2 | 14, + /* End of L2, byte offset 30, dst IP[48:63] */ + CFG_UDF_EOL2 | 15, + /* End of L2, byte offset 32, dst IP[64:79] */ + CFG_UDF_EOL2 | 16, + /* End of L2, byte offset 34, dst IP[80:95] */ + CFG_UDF_EOL2 | 17, + /* End of L2, byte offset 36, dst IP[96:111] */ + CFG_UDF_EOL2 | 18, + /* End of L2, byte offset 38, dst IP[112:127] */ + CFG_UDF_EOL2 | 19, + /* End of L3, byte offset 2, dst port */ + CFG_UDF_EOL3 | 1, + }, + .mask_value = L3_FRAMING_MASK | IPPROTO_MASK | IP_FRAG, + .base_offset = CORE_UDF_0_D_0_11_PORT_0, + }, + }, +}; + static inline unsigned int bcm_sf2_get_num_udf_slices(const u8 *layout) { unsigned int i, count = 0; @@ -153,7 +207,8 @@ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv) static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, unsigned int rule_index, unsigned int port_num, - unsigned int queue_num) + unsigned int queue_num, + bool fwd_map_change) { int ret; u32 reg; @@ -161,14 +216,17 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, /* Replace ARL derived destination with DST_MAP derived, define * which port and queue this should be forwarded to. */ - reg = CHANGE_FWRD_MAP_IB_REP_ARL | BIT(port_num + DST_MAP_IB_SHIFT) | - CHANGE_TC | queue_num << NEW_TC_SHIFT; + if (fwd_map_change) + reg = CHANGE_FWRD_MAP_IB_REP_ARL | + BIT(port_num + DST_MAP_IB_SHIFT) | + CHANGE_TC | queue_num << NEW_TC_SHIFT; + else + reg = 0; core_writel(priv, reg, CORE_ACT_POL_DATA0); /* Set classification ID that needs to be put in Broadcom tag */ - core_writel(priv, rule_index << CHAIN_ID_SHIFT, - CORE_ACT_POL_DATA1); + core_writel(priv, rule_index << CHAIN_ID_SHIFT, CORE_ACT_POL_DATA1); core_writel(priv, 0, CORE_ACT_POL_DATA2); @@ -337,7 +395,8 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, } /* Insert into Action and policer RAMs now */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, queue_num); + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, + queue_num, true); if (ret) return ret; @@ -348,17 +407,280 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, /* Flag the rule as being used and return it */ set_bit(rule_index, priv->cfp.used); + set_bit(rule_index, priv->cfp.unique); fs->location = rule_index; return 0; } +static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, + const __be32 *ip6_addr, const __be16 port, + unsigned int slice_num) +{ + u32 reg, tmp, val; + + /* C-Tag [31:24] + * UDF_n_B8 [23:8] (port) + * UDF_n_B7 (upper) [7:0] (addr[15:8]) + */ + reg = be32_to_cpu(ip6_addr[3]); + val = (u32)be16_to_cpu(port) << 8 | ((reg >> 8) & 0xff); + core_writel(priv, val, CORE_CFP_DATA_PORT(4)); + + /* UDF_n_B7 (lower) [31:24] (addr[7:0]) + * UDF_n_B6 [23:8] (addr[31:16]) + * UDF_n_B5 (upper) [7:0] (addr[47:40]) + */ + tmp = be32_to_cpu(ip6_addr[2]); + val = (u32)(reg & 0xff) << 24 | (u32)(reg >> 16) << 8 | + ((tmp >> 8) & 0xff); + core_writel(priv, val, CORE_CFP_DATA_PORT(3)); + + /* UDF_n_B5 (lower) [31:24] (addr[39:32]) + * UDF_n_B4 [23:8] (addr[63:48]) + * UDF_n_B3 (upper) [7:0] (addr[79:72]) + */ + reg = be32_to_cpu(ip6_addr[1]); + val = (u32)(tmp & 0xff) << 24 | (u32)(tmp >> 16) << 8 | + ((reg >> 8) & 0xff); + core_writel(priv, val, CORE_CFP_DATA_PORT(2)); + + /* UDF_n_B3 (lower) [31:24] (addr[71:64]) + * UDF_n_B2 [23:8] (addr[95:80]) + * UDF_n_B1 (upper) [7:0] (addr[111:104]) + */ + tmp = be32_to_cpu(ip6_addr[0]); + val = (u32)(reg & 0xff) << 24 | (u32)(reg >> 16) << 8 | + ((tmp >> 8) & 0xff); + core_writel(priv, val, CORE_CFP_DATA_PORT(1)); + + /* UDF_n_B1 (lower) [31:24] (addr[103:96]) + * UDF_n_B0 [23:8] (addr[127:112]) + * Reserved [7:4] + * Slice ID [3:2] + * Slice valid [1:0] + */ + reg = (u32)(tmp & 0xff) << 24 | (u32)(tmp >> 16) << 8 | + SLICE_NUM(slice_num) | SLICE_VALID; + core_writel(priv, reg, CORE_CFP_DATA_PORT(0)); + + /* All other UDFs should be matched with the filter */ + core_writel(priv, 0x00ffffff, CORE_CFP_MASK_PORT(4)); + core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(3)); + core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(2)); + core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(1)); + core_writel(priv, 0xffffff0f, CORE_CFP_MASK_PORT(0)); +} + +static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, + unsigned int port_num, + unsigned int queue_num, + struct ethtool_rx_flow_spec *fs) +{ + unsigned int slice_num, rule_index[2]; + struct ethtool_tcpip6_spec *v6_spec; + const struct cfp_udf_layout *layout; + u8 ip_proto, ip_frag; + int ret = 0; + u8 num_udf; + u32 reg; + + switch (fs->flow_type & ~FLOW_EXT) { + case TCP_V6_FLOW: + ip_proto = IPPROTO_TCP; + v6_spec = &fs->h_u.tcp_ip6_spec; + break; + case UDP_V6_FLOW: + ip_proto = IPPROTO_UDP; + v6_spec = &fs->h_u.udp_ip6_spec; + break; + default: + return -EINVAL; + } + + ip_frag = be32_to_cpu(fs->m_ext.data[0]); + + layout = &udf_tcpip6_layout; + slice_num = bcm_sf2_get_slice_number(layout, 0); + if (slice_num == UDF_NUM_SLICES) + return -EINVAL; + + num_udf = bcm_sf2_get_num_udf_slices(layout->udfs[slice_num].slices); + + /* Negotiate two indexes, one for the second half which we are chained + * from, which is what we will return to user-space, and a second one + * which is used to store its first half. That first half does not + * allow any choice of placement, so it just needs to find the next + * available bit. We return the second half as fs->location because + * that helps with the rule lookup later on since the second half is + * chained from its first half, we can easily identify IPv6 CFP rules + * by looking whether they carry a CHAIN_ID. + * + * We also want the second half to have a lower rule_index than its + * first half because the HW search is by incrementing addresses. + */ + if (fs->location == RX_CLS_LOC_ANY) + rule_index[0] = find_first_zero_bit(priv->cfp.used, + bcm_sf2_cfp_rule_size(priv)); + else + rule_index[0] = fs->location; + + /* Flag it as used (cleared on error path) such that we can immediately + * obtain a second one to chain from. + */ + set_bit(rule_index[0], priv->cfp.used); + + rule_index[1] = find_first_zero_bit(priv->cfp.used, + bcm_sf2_cfp_rule_size(priv)); + if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) { + ret = -ENOSPC; + goto out_err; + } + + /* Apply the UDF layout for this filter */ + bcm_sf2_cfp_udf_set(priv, layout, slice_num); + + /* Apply to all packets received through this port */ + core_writel(priv, BIT(port), CORE_CFP_DATA_PORT(7)); + + /* Source port map match */ + core_writel(priv, 0xff, CORE_CFP_MASK_PORT(7)); + + /* S-Tag status [31:30] + * C-Tag status [29:28] + * L2 framing [27:26] + * L3 framing [25:24] + * IP ToS [23:16] + * IP proto [15:08] + * IP Fragm [7] + * Non 1st frag [6] + * IP Authen [5] + * TTL range [4:3] + * PPPoE session [2] + * Reserved [1] + * UDF_Valid[8] [0] + */ + reg = 1 << L3_FRAMING_SHIFT | ip_proto << IPPROTO_SHIFT | + ip_frag << IP_FRAG_SHIFT | udf_upper_bits(num_udf); + core_writel(priv, reg, CORE_CFP_DATA_PORT(6)); + + /* Mask with the specific layout for IPv6 packets including + * UDF_Valid[8] + */ + reg = layout->udfs[slice_num].mask_value | udf_upper_bits(num_udf); + core_writel(priv, reg, CORE_CFP_MASK_PORT(6)); + + /* UDF_Valid[7:0] [31:24] + * S-Tag [23:8] + * C-Tag [7:0] + */ + core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_DATA_PORT(5)); + + /* Mask all but valid UDFs */ + core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); + + /* Slice the IPv6 source address and port */ + bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, slice_num); + + /* Insert into TCAM now because we need to insert a second rule */ + bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]); + + ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | TCAM_SEL); + if (ret) { + pr_err("TCAM entry at addr %d failed\n", rule_index[0]); + goto out_err; + } + + /* Insert into Action and policer RAMs now */ + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + queue_num, false); + if (ret) + goto out_err; + + /* Now deal with the second slice to chain this rule */ + slice_num = bcm_sf2_get_slice_number(layout, slice_num + 1); + if (slice_num == UDF_NUM_SLICES) { + ret = -EINVAL; + goto out_err; + } + + num_udf = bcm_sf2_get_num_udf_slices(layout->udfs[slice_num].slices); + + /* Apply the UDF layout for this filter */ + bcm_sf2_cfp_udf_set(priv, layout, slice_num); + + /* Chained rule, source port match is coming from the rule we are + * chained from. + */ + core_writel(priv, 0, CORE_CFP_DATA_PORT(7)); + core_writel(priv, 0, CORE_CFP_MASK_PORT(7)); + + /* + * CHAIN ID [31:24] chain to previous slice + * Reserved [23:20] + * UDF_Valid[11:8] [19:16] + * UDF_Valid[7:0] [15:8] + * UDF_n_D11 [7:0] + */ + reg = rule_index[0] << 24 | udf_upper_bits(num_udf) << 16 | + udf_lower_bits(num_udf) << 8; + core_writel(priv, reg, CORE_CFP_DATA_PORT(6)); + + /* Mask all except chain ID, UDF Valid[8] and UDF Valid[7:0] */ + reg = XCESS_ADDR_MASK << 24 | udf_upper_bits(num_udf) << 16 | + udf_lower_bits(num_udf) << 8; + core_writel(priv, reg, CORE_CFP_MASK_PORT(6)); + + /* Don't care */ + core_writel(priv, 0, CORE_CFP_DATA_PORT(5)); + + /* Mask all */ + core_writel(priv, 0, CORE_CFP_MASK_PORT(5)); + + bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6dst, v6_spec->pdst, slice_num); + + /* Insert into TCAM now */ + bcm_sf2_cfp_rule_addr_set(priv, rule_index[1]); + + ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | TCAM_SEL); + if (ret) { + pr_err("TCAM entry at addr %d failed\n", rule_index[1]); + goto out_err; + } + + /* Insert into Action and policer RAMs now, set chain ID to + * the one we are chained to + */ + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + queue_num, true); + if (ret) + goto out_err; + + /* Turn on CFP for this rule now */ + reg = core_readl(priv, CORE_CFP_CTL_REG); + reg |= BIT(port); + core_writel(priv, reg, CORE_CFP_CTL_REG); + + /* Flag the second half rule as being used now, return it as the + * location, and flag it as unique while dumping rules + */ + set_bit(rule_index[1], priv->cfp.used); + set_bit(rule_index[1], priv->cfp.unique); + fs->location = rule_index[1]; + + return ret; + +out_err: + clear_bit(rule_index[0], priv->cfp.used); + return ret; +} + static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, struct ethtool_rx_flow_spec *fs) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int queue_num, port_num; - int ret; + int ret = -EINVAL; /* Check for unsupported extensions */ if ((fs->flow_type & FLOW_EXT) && (fs->m_ext.vlan_etype || @@ -391,15 +713,26 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, if (port_num >= 7) port_num -= 1; - ret = bcm_sf2_cfp_ipv4_rule_set(priv, port, port_num, queue_num, fs); - if (ret) - return ret; + switch (fs->flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + ret = bcm_sf2_cfp_ipv4_rule_set(priv, port, port_num, + queue_num, fs); + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + ret = bcm_sf2_cfp_ipv6_rule_set(priv, port, port_num, + queue_num, fs); + break; + default: + break; + } - return 0; + return ret; } -static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, - u32 loc) +static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port, + u32 loc, u32 *next_loc) { int ret; u32 reg; @@ -415,6 +748,14 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, if (ret) return ret; + /* Check if this is possibly an IPv6 rule that would + * indicate we need to delete its companion rule + * as well + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(6)); + if (next_loc) + *next_loc = (reg >> 24) & CHAIN_ID_MASK; + /* Clear its valid bits */ reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); reg &= ~SLICE_VALID; @@ -426,10 +767,28 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, return ret; clear_bit(loc, priv->cfp.used); + clear_bit(loc, priv->cfp.unique); return 0; } +static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, + u32 loc) +{ + u32 next_loc = 0; + int ret; + + ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc); + if (ret) + return ret; + + /* If this was an IPv6 rule, delete is companion rule too */ + if (next_loc) + ret = bcm_sf2_cfp_rule_del_one(priv, port, next_loc, NULL); + + return ret; +} + static void bcm_sf2_invert_masks(struct ethtool_rx_flow_spec *flow) { unsigned int i; @@ -444,12 +803,32 @@ static void bcm_sf2_invert_masks(struct ethtool_rx_flow_spec *flow) } static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, - struct ethtool_tcpip4_spec *v4_spec, - struct ethtool_tcpip4_spec *v4_m_spec) + struct ethtool_rx_flow_spec *fs) { + struct ethtool_tcpip4_spec *v4_spec = NULL, *v4_m_spec = NULL; u16 src_dst_port; u32 reg, ipv4; + reg = core_readl(priv, CORE_CFP_DATA_PORT(6)); + + switch ((reg & IPPROTO_MASK) >> IPPROTO_SHIFT) { + case IPPROTO_TCP: + fs->flow_type = TCP_V4_FLOW; + v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; + break; + case IPPROTO_UDP: + fs->flow_type = UDP_V4_FLOW; + v4_spec = &fs->h_u.udp_ip4_spec; + v4_m_spec = &fs->m_u.udp_ip4_spec; + break; + default: + return -EINVAL; + } + + fs->m_ext.data[0] = cpu_to_be32((reg >> IP_FRAG_SHIFT) & 1); + v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; + reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); /* src port [15:8] */ src_dst_port = reg << 8; @@ -490,12 +869,128 @@ static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, return 0; } +static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, + __be32 *ip6_addr, __be16 *port, + __be32 *ip6_mask, __be16 *port_mask) +{ + u32 reg, tmp; + + /* C-Tag [31:24] + * UDF_n_B8 [23:8] (port) + * UDF_n_B7 (upper) [7:0] (addr[15:8]) + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(4)); + *port = cpu_to_be32(reg) >> 8; + *port_mask = cpu_to_be16(~0); + tmp = (u32)(reg & 0xff) << 8; + + /* UDF_n_B7 (lower) [31:24] (addr[7:0]) + * UDF_n_B6 [23:8] (addr[31:16]) + * UDF_n_B5 (upper) [7:0] (addr[47:40]) + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); + tmp |= (reg >> 24) & 0xff; + tmp |= (u32)((reg >> 8) << 16); + ip6_mask[3] = cpu_to_be32(~0); + ip6_addr[3] = cpu_to_be32(tmp); + tmp = (u32)(reg & 0xff) << 8; + + /* UDF_n_B5 (lower) [31:24] (addr[39:32]) + * UDF_n_B4 [23:8] (addr[63:48]) + * UDF_n_B3 (upper) [7:0] (addr[79:72]) + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(2)); + tmp |= (reg >> 24) & 0xff; + tmp |= (u32)((reg >> 8) << 16); + ip6_mask[2] = cpu_to_be32(~0); + ip6_addr[2] = cpu_to_be32(tmp); + tmp = (u32)(reg & 0xff) << 8; + + /* UDF_n_B3 (lower) [31:24] (addr[71:64]) + * UDF_n_B2 [23:8] (addr[95:80]) + * UDF_n_B1 (upper) [7:0] (addr[111:104]) + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(1)); + tmp |= (reg >> 24) & 0xff; + tmp |= (u32)((reg >> 8) << 16); + ip6_mask[1] = cpu_to_be32(~0); + ip6_addr[1] = cpu_to_be32(tmp); + tmp = (u32)(reg & 0xff) << 8; + + /* UDF_n_B1 (lower) [31:24] (addr[103:96]) + * UDF_n_B0 [23:8] (addr[127:112]) + * Reserved [7:4] + * Slice ID [3:2] + * Slice valid [1:0] + */ + reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); + tmp |= (reg >> 24) & 0xff; + tmp |= (u32)((reg >> 8) << 16); + ip6_mask[0] = cpu_to_be32(~0); + ip6_addr[0] = cpu_to_be32(tmp); + + if (!(reg & SLICE_VALID)) + return -EINVAL; + + return 0; +} + +static int bcm_sf2_cfp_ipv6_rule_get(struct bcm_sf2_priv *priv, int port, + struct ethtool_rx_flow_spec *fs, + u32 next_loc) +{ + struct ethtool_tcpip6_spec *v6_spec = NULL, *v6_m_spec = NULL; + u32 reg; + int ret; + + /* UDPv6 and TCPv6 both use ethtool_tcpip6_spec so we are fine + * assuming tcp_ip6_spec here being an union. + */ + v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; + + /* Read the second half first */ + ret = bcm_sf2_cfp_unslice_ipv6(priv, v6_spec->ip6dst, &v6_spec->pdst, + v6_m_spec->ip6dst, &v6_m_spec->pdst); + if (ret) + return ret; + + /* Read last to avoid next entry clobbering the results during search + * operations. We would not have the port enabled for this rule, so + * don't bother checking it. + */ + (void)core_readl(priv, CORE_CFP_DATA_PORT(7)); + + /* The slice number is valid, so read the rule we are chained from now + * which is our first half. + */ + bcm_sf2_cfp_rule_addr_set(priv, next_loc); + ret = bcm_sf2_cfp_op(priv, OP_SEL_READ | TCAM_SEL); + if (ret) + return ret; + + reg = core_readl(priv, CORE_CFP_DATA_PORT(6)); + + switch ((reg & IPPROTO_MASK) >> IPPROTO_SHIFT) { + case IPPROTO_TCP: + fs->flow_type = TCP_V6_FLOW; + break; + case IPPROTO_UDP: + fs->flow_type = UDP_V6_FLOW; + break; + default: + return -EINVAL; + } + + return bcm_sf2_cfp_unslice_ipv6(priv, v6_spec->ip6src, &v6_spec->psrc, + v6_m_spec->ip6src, &v6_m_spec->psrc); +} + static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, struct ethtool_rxnfc *nfc) { - struct ethtool_tcpip4_spec *v4_spec = NULL, *v4_m_spec; + u32 reg, ipv4_or_chain_id; unsigned int queue_num; - u32 reg; int ret; bcm_sf2_cfp_rule_addr_set(priv, nfc->fs.location); @@ -523,29 +1018,19 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, queue_num = (reg >> NEW_TC_SHIFT) & NEW_TC_MASK; nfc->fs.ring_cookie += queue_num; - /* Extract the IP protocol */ + /* Extract the L3_FRAMING or CHAIN_ID */ reg = core_readl(priv, CORE_CFP_DATA_PORT(6)); - switch ((reg & IPPROTO_MASK) >> IPPROTO_SHIFT) { - case IPPROTO_TCP: - nfc->fs.flow_type = TCP_V4_FLOW; - v4_spec = &nfc->fs.h_u.tcp_ip4_spec; - v4_m_spec = &nfc->fs.m_u.tcp_ip4_spec; - break; - case IPPROTO_UDP: - nfc->fs.flow_type = UDP_V4_FLOW; - v4_spec = &nfc->fs.h_u.udp_ip4_spec; - v4_m_spec = &nfc->fs.m_u.udp_ip4_spec; - break; - default: - return -EINVAL; - } - - nfc->fs.m_ext.data[0] = cpu_to_be32((reg >> IP_FRAG_SHIFT) & 1); - if (v4_spec) { - v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; - ret = bcm_sf2_cfp_ipv4_rule_get(priv, port, v4_spec, v4_m_spec); - } + /* With IPv6 rules this would contain a non-zero chain ID since + * we reserve entry 0 and it cannot be used. So if we read 0 here + * this means an IPv4 rule. + */ + ipv4_or_chain_id = (reg >> L3_FRAMING_SHIFT) & 0xff; + if (ipv4_or_chain_id == 0) + ret = bcm_sf2_cfp_ipv4_rule_get(priv, port, &nfc->fs); + else + ret = bcm_sf2_cfp_ipv6_rule_get(priv, port, &nfc->fs, + ipv4_or_chain_id); if (ret) return ret; @@ -571,7 +1056,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv, { unsigned int index = 1, rules_cnt = 0; - for_each_set_bit_from(index, priv->cfp.used, priv->num_cfp_rules) { + for_each_set_bit_from(index, priv->cfp.unique, priv->num_cfp_rules) { rule_locs[rules_cnt] = index; rules_cnt++; } @@ -594,7 +1079,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, switch (nfc->cmd) { case ETHTOOL_GRXCLSRLCNT: /* Subtract the default, unusable rule */ - nfc->rule_cnt = bitmap_weight(priv->cfp.used, + nfc->rule_cnt = bitmap_weight(priv->cfp.unique, priv->num_cfp_rules) - 1; /* We support specifying rule locations */ nfc->data |= RX_CLS_LOC_SPECIAL; diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 263e4c758795..93c600d1f732 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -313,6 +313,7 @@ enum bcm_sf2_reg_offs { #define SLICE_VALID 3 #define SLICE_NUM_SHIFT 2 #define SLICE_NUM(x) ((x) << SLICE_NUM_SHIFT) +#define SLICE_NUM_MASK 0xff #define CORE_CFP_MASK_PORT_0 0x280c0 @@ -408,6 +409,12 @@ enum bcm_sf2_reg_offs { #define CFG_UDF_EOL2 (2 << CFG_UDF_OFFSET_BASE_SHIFT) #define CFG_UDF_EOL3 (3 << CFG_UDF_OFFSET_BASE_SHIFT) +/* IPv6 slices */ +#define CORE_UDF_0_B_0_8_PORT_0 0x28500 + +/* IPv6 chained slices */ +#define CORE_UDF_0_D_0_11_PORT_0 0x28680 + /* Number of slices for IPv4, IPv6 and non-IP */ #define UDF_NUM_SLICES 4 #define UDFS_PER_SLICE 9 From bc3fc44c12d6a4722b253f19423db1244175c18c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:48 -0700 Subject: [PATCH 1294/2177] net: dsa: bcm_sf2: Allow matching arbitrary IPv4 mask lengths There is no reason why we should limit ourselves to matching only full IPv4 addresses (/32), the same logic applies between the DATA and MASK ports, so just make it more configurable to accept both. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 249 ++++++++++++++++++++------------- drivers/net/dsa/bcm_sf2_regs.h | 2 +- 2 files changed, 156 insertions(+), 95 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 0861f66ef739..043fd39793cc 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -250,13 +250,84 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, return 0; } +static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, + struct ethtool_tcpip4_spec *v4_spec, + unsigned int slice_num, + bool mask) +{ + u32 reg, offset; + + /* C-Tag [31:24] + * UDF_n_A8 [23:8] + * UDF_n_A7 [7:0] + */ + reg = 0; + if (mask) + offset = CORE_CFP_MASK_PORT(4); + else + offset = CORE_CFP_DATA_PORT(4); + core_writel(priv, reg, offset); + + /* UDF_n_A7 [31:24] + * UDF_n_A6 [23:8] + * UDF_n_A5 [7:0] + */ + reg = be16_to_cpu(v4_spec->pdst) >> 8; + if (mask) + offset = CORE_CFP_MASK_PORT(3); + else + offset = CORE_CFP_DATA_PORT(3); + core_writel(priv, reg, offset); + + /* UDF_n_A5 [31:24] + * UDF_n_A4 [23:8] + * UDF_n_A3 [7:0] + */ + reg = (be16_to_cpu(v4_spec->pdst) & 0xff) << 24 | + (u32)be16_to_cpu(v4_spec->psrc) << 8 | + (be32_to_cpu(v4_spec->ip4dst) & 0x0000ff00) >> 8; + if (mask) + offset = CORE_CFP_MASK_PORT(2); + else + offset = CORE_CFP_DATA_PORT(2); + core_writel(priv, reg, offset); + + /* UDF_n_A3 [31:24] + * UDF_n_A2 [23:8] + * UDF_n_A1 [7:0] + */ + reg = (u32)(be32_to_cpu(v4_spec->ip4dst) & 0xff) << 24 | + (u32)(be32_to_cpu(v4_spec->ip4dst) >> 16) << 8 | + (be32_to_cpu(v4_spec->ip4src) & 0x0000ff00) >> 8; + if (mask) + offset = CORE_CFP_MASK_PORT(1); + else + offset = CORE_CFP_DATA_PORT(1); + core_writel(priv, reg, offset); + + /* UDF_n_A1 [31:24] + * UDF_n_A0 [23:8] + * Reserved [7:4] + * Slice ID [3:2] + * Slice valid [1:0] + */ + reg = (u32)(be32_to_cpu(v4_spec->ip4src) & 0xff) << 24 | + (u32)(be32_to_cpu(v4_spec->ip4src) >> 16) << 8 | + SLICE_NUM(slice_num) | SLICE_VALID; + if (mask) + offset = CORE_CFP_MASK_PORT(0); + else + offset = CORE_CFP_DATA_PORT(0); + core_writel(priv, reg, offset); +} + static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, unsigned int port_num, unsigned int queue_num, struct ethtool_rx_flow_spec *fs) { + struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; const struct cfp_udf_layout *layout; - struct ethtool_tcpip4_spec *v4_spec; unsigned int slice_num, rule_index; u8 ip_proto, ip_frag; u8 num_udf; @@ -267,10 +338,12 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, case TCP_V4_FLOW: ip_proto = IPPROTO_TCP; v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; break; case UDP_V4_FLOW: ip_proto = IPPROTO_UDP; v4_spec = &fs->h_u.udp_ip4_spec; + v4_m_spec = &fs->m_u.udp_ip4_spec; break; default: return -EINVAL; @@ -321,69 +394,22 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, udf_upper_bits(num_udf), CORE_CFP_DATA_PORT(6)); + /* Mask with the specific layout for IPv4 packets */ + core_writel(priv, layout->udfs[slice_num].mask_value | + udf_upper_bits(num_udf), CORE_CFP_MASK_PORT(6)); + /* UDF_Valid[7:0] [31:24] * S-Tag [23:8] * C-Tag [7:0] */ core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_DATA_PORT(5)); - /* C-Tag [31:24] - * UDF_n_A8 [23:8] - * UDF_n_A7 [7:0] - */ - core_writel(priv, 0, CORE_CFP_DATA_PORT(4)); - - /* UDF_n_A7 [31:24] - * UDF_n_A6 [23:8] - * UDF_n_A5 [7:0] - */ - core_writel(priv, be16_to_cpu(v4_spec->pdst) >> 8, - CORE_CFP_DATA_PORT(3)); - - /* UDF_n_A5 [31:24] - * UDF_n_A4 [23:8] - * UDF_n_A3 [7:0] - */ - reg = (be16_to_cpu(v4_spec->pdst) & 0xff) << 24 | - (u32)be16_to_cpu(v4_spec->psrc) << 8 | - (be32_to_cpu(v4_spec->ip4dst) & 0x0000ff00) >> 8; - core_writel(priv, reg, CORE_CFP_DATA_PORT(2)); - - /* UDF_n_A3 [31:24] - * UDF_n_A2 [23:8] - * UDF_n_A1 [7:0] - */ - reg = (u32)(be32_to_cpu(v4_spec->ip4dst) & 0xff) << 24 | - (u32)(be32_to_cpu(v4_spec->ip4dst) >> 16) << 8 | - (be32_to_cpu(v4_spec->ip4src) & 0x0000ff00) >> 8; - core_writel(priv, reg, CORE_CFP_DATA_PORT(1)); - - /* UDF_n_A1 [31:24] - * UDF_n_A0 [23:8] - * Reserved [7:4] - * Slice ID [3:2] - * Slice valid [1:0] - */ - reg = (u32)(be32_to_cpu(v4_spec->ip4src) & 0xff) << 24 | - (u32)(be32_to_cpu(v4_spec->ip4src) >> 16) << 8 | - SLICE_NUM(slice_num) | SLICE_VALID; - core_writel(priv, reg, CORE_CFP_DATA_PORT(0)); - - /* Mask with the specific layout for IPv4 packets */ - core_writel(priv, layout->udfs[slice_num].mask_value | - udf_upper_bits(num_udf), CORE_CFP_MASK_PORT(6)); - /* Mask all but valid UDFs */ core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); - /* Mask all */ - core_writel(priv, 0, CORE_CFP_MASK_PORT(4)); - - /* All other UDFs should be matched with the filter */ - core_writel(priv, 0xff, CORE_CFP_MASK_PORT(3)); - core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(2)); - core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(1)); - core_writel(priv, 0xffffff0f, CORE_CFP_MASK_PORT(0)); + /* Program the match and the mask */ + bcm_sf2_cfp_slice_ipv4(priv, v4_spec, slice_num, false); + bcm_sf2_cfp_slice_ipv4(priv, v4_m_spec, SLICE_NUM_MASK, true); /* Insert into TCAM now */ bcm_sf2_cfp_rule_addr_set(priv, rule_index); @@ -802,12 +828,80 @@ static void bcm_sf2_invert_masks(struct ethtool_rx_flow_spec *flow) flow->m_ext.data[1] ^= cpu_to_be32(~0); } +static int bcm_sf2_cfp_unslice_ipv4(struct bcm_sf2_priv *priv, + struct ethtool_tcpip4_spec *v4_spec, + bool mask) +{ + u32 reg, offset, ipv4; + u16 src_dst_port; + + if (mask) + offset = CORE_CFP_MASK_PORT(3); + else + offset = CORE_CFP_DATA_PORT(3); + + reg = core_readl(priv, offset); + /* src port [15:8] */ + src_dst_port = reg << 8; + + if (mask) + offset = CORE_CFP_MASK_PORT(2); + else + offset = CORE_CFP_DATA_PORT(2); + + reg = core_readl(priv, offset); + /* src port [7:0] */ + src_dst_port |= (reg >> 24); + + v4_spec->pdst = cpu_to_be16(src_dst_port); + v4_spec->psrc = cpu_to_be16((u16)(reg >> 8)); + + /* IPv4 dst [15:8] */ + ipv4 = (reg & 0xff) << 8; + + if (mask) + offset = CORE_CFP_MASK_PORT(1); + else + offset = CORE_CFP_DATA_PORT(1); + + reg = core_readl(priv, offset); + /* IPv4 dst [31:16] */ + ipv4 |= ((reg >> 8) & 0xffff) << 16; + /* IPv4 dst [7:0] */ + ipv4 |= (reg >> 24) & 0xff; + v4_spec->ip4dst = cpu_to_be32(ipv4); + + /* IPv4 src [15:8] */ + ipv4 = (reg & 0xff) << 8; + + if (mask) + offset = CORE_CFP_MASK_PORT(0); + else + offset = CORE_CFP_DATA_PORT(0); + reg = core_readl(priv, offset); + + /* Once the TCAM is programmed, the mask reflects the slice number + * being matched, don't bother checking it when reading back the + * mask spec + */ + if (!mask && !(reg & SLICE_VALID)) + return -EINVAL; + + /* IPv4 src [7:0] */ + ipv4 |= (reg >> 24) & 0xff; + /* IPv4 src [31:16] */ + ipv4 |= ((reg >> 8) & 0xffff) << 16; + v4_spec->ip4src = cpu_to_be32(ipv4); + + return 0; +} + static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, struct ethtool_rx_flow_spec *fs) { struct ethtool_tcpip4_spec *v4_spec = NULL, *v4_m_spec = NULL; - u16 src_dst_port; - u32 reg, ipv4; + u32 reg; + int ret; reg = core_readl(priv, CORE_CFP_DATA_PORT(6)); @@ -829,44 +923,11 @@ static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, fs->m_ext.data[0] = cpu_to_be32((reg >> IP_FRAG_SHIFT) & 1); v4_spec->tos = (reg >> IPTOS_SHIFT) & IPTOS_MASK; - reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); - /* src port [15:8] */ - src_dst_port = reg << 8; + ret = bcm_sf2_cfp_unslice_ipv4(priv, v4_spec, false); + if (ret) + return ret; - reg = core_readl(priv, CORE_CFP_DATA_PORT(2)); - /* src port [7:0] */ - src_dst_port |= (reg >> 24); - - v4_spec->pdst = cpu_to_be16(src_dst_port); - v4_m_spec->pdst = cpu_to_be16(~0); - v4_spec->psrc = cpu_to_be16((u16)(reg >> 8)); - v4_m_spec->psrc = cpu_to_be16(~0); - - /* IPv4 dst [15:8] */ - ipv4 = (reg & 0xff) << 8; - reg = core_readl(priv, CORE_CFP_DATA_PORT(1)); - /* IPv4 dst [31:16] */ - ipv4 |= ((reg >> 8) & 0xffff) << 16; - /* IPv4 dst [7:0] */ - ipv4 |= (reg >> 24) & 0xff; - v4_spec->ip4dst = cpu_to_be32(ipv4); - v4_m_spec->ip4dst = cpu_to_be32(~0); - - /* IPv4 src [15:8] */ - ipv4 = (reg & 0xff) << 8; - reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); - - if (!(reg & SLICE_VALID)) - return -EINVAL; - - /* IPv4 src [7:0] */ - ipv4 |= (reg >> 24) & 0xff; - /* IPv4 src [31:16] */ - ipv4 |= ((reg >> 8) & 0xffff) << 16; - v4_spec->ip4src = cpu_to_be32(ipv4); - v4_m_spec->ip4src = cpu_to_be32(~0); - - return 0; + return bcm_sf2_cfp_unslice_ipv4(priv, v4_m_spec, true); } static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 93c600d1f732..3ccd5a865dcb 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -313,7 +313,7 @@ enum bcm_sf2_reg_offs { #define SLICE_VALID 3 #define SLICE_NUM_SHIFT 2 #define SLICE_NUM(x) ((x) << SLICE_NUM_SHIFT) -#define SLICE_NUM_MASK 0xff +#define SLICE_NUM_MASK 0x3 #define CORE_CFP_MASK_PORT_0 0x280c0 From dd8eff68343d5ab6535fb05bc837a2f85434d506 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 14:39:49 -0700 Subject: [PATCH 1295/2177] net: dsa: bcm_sf2: Allow matching arbitrary IPv6 masks/lengths There is no reason why we should limit ourselves to matching only full IPv4 addresses (/32), the same logic applies between the DATA and MASK ports, so just make it more configurable to accept both. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 113 ++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 33 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 043fd39793cc..4feb507eeee0 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -441,9 +441,10 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, const __be32 *ip6_addr, const __be16 port, - unsigned int slice_num) + unsigned int slice_num, + bool mask) { - u32 reg, tmp, val; + u32 reg, tmp, val, offset; /* C-Tag [31:24] * UDF_n_B8 [23:8] (port) @@ -451,7 +452,11 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, */ reg = be32_to_cpu(ip6_addr[3]); val = (u32)be16_to_cpu(port) << 8 | ((reg >> 8) & 0xff); - core_writel(priv, val, CORE_CFP_DATA_PORT(4)); + if (mask) + offset = CORE_CFP_MASK_PORT(4); + else + offset = CORE_CFP_DATA_PORT(4); + core_writel(priv, val, offset); /* UDF_n_B7 (lower) [31:24] (addr[7:0]) * UDF_n_B6 [23:8] (addr[31:16]) @@ -460,7 +465,11 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, tmp = be32_to_cpu(ip6_addr[2]); val = (u32)(reg & 0xff) << 24 | (u32)(reg >> 16) << 8 | ((tmp >> 8) & 0xff); - core_writel(priv, val, CORE_CFP_DATA_PORT(3)); + if (mask) + offset = CORE_CFP_MASK_PORT(3); + else + offset = CORE_CFP_DATA_PORT(3); + core_writel(priv, val, offset); /* UDF_n_B5 (lower) [31:24] (addr[39:32]) * UDF_n_B4 [23:8] (addr[63:48]) @@ -469,7 +478,11 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, reg = be32_to_cpu(ip6_addr[1]); val = (u32)(tmp & 0xff) << 24 | (u32)(tmp >> 16) << 8 | ((reg >> 8) & 0xff); - core_writel(priv, val, CORE_CFP_DATA_PORT(2)); + if (mask) + offset = CORE_CFP_MASK_PORT(2); + else + offset = CORE_CFP_DATA_PORT(2); + core_writel(priv, val, offset); /* UDF_n_B3 (lower) [31:24] (addr[71:64]) * UDF_n_B2 [23:8] (addr[95:80]) @@ -478,7 +491,11 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, tmp = be32_to_cpu(ip6_addr[0]); val = (u32)(reg & 0xff) << 24 | (u32)(reg >> 16) << 8 | ((tmp >> 8) & 0xff); - core_writel(priv, val, CORE_CFP_DATA_PORT(1)); + if (mask) + offset = CORE_CFP_MASK_PORT(1); + else + offset = CORE_CFP_DATA_PORT(1); + core_writel(priv, val, offset); /* UDF_n_B1 (lower) [31:24] (addr[103:96]) * UDF_n_B0 [23:8] (addr[127:112]) @@ -488,14 +505,11 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, */ reg = (u32)(tmp & 0xff) << 24 | (u32)(tmp >> 16) << 8 | SLICE_NUM(slice_num) | SLICE_VALID; - core_writel(priv, reg, CORE_CFP_DATA_PORT(0)); - - /* All other UDFs should be matched with the filter */ - core_writel(priv, 0x00ffffff, CORE_CFP_MASK_PORT(4)); - core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(3)); - core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(2)); - core_writel(priv, 0xffffffff, CORE_CFP_MASK_PORT(1)); - core_writel(priv, 0xffffff0f, CORE_CFP_MASK_PORT(0)); + if (mask) + offset = CORE_CFP_MASK_PORT(0); + else + offset = CORE_CFP_DATA_PORT(0); + core_writel(priv, reg, offset); } static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, @@ -503,8 +517,8 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, unsigned int queue_num, struct ethtool_rx_flow_spec *fs) { + struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; unsigned int slice_num, rule_index[2]; - struct ethtool_tcpip6_spec *v6_spec; const struct cfp_udf_layout *layout; u8 ip_proto, ip_frag; int ret = 0; @@ -515,10 +529,12 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, case TCP_V6_FLOW: ip_proto = IPPROTO_TCP; v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; break; case UDP_V6_FLOW: ip_proto = IPPROTO_UDP; v6_spec = &fs->h_u.udp_ip6_spec; + v6_m_spec = &fs->m_u.udp_ip6_spec; break; default: return -EINVAL; @@ -606,7 +622,10 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); /* Slice the IPv6 source address and port */ - bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, slice_num); + bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, + slice_num, false); + bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc, + slice_num, true); /* Insert into TCAM now because we need to insert a second rule */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]); @@ -663,7 +682,10 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Mask all */ core_writel(priv, 0, CORE_CFP_MASK_PORT(5)); - bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6dst, v6_spec->pdst, slice_num); + bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6dst, v6_spec->pdst, slice_num, + false); + bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6dst, v6_m_spec->pdst, + SLICE_NUM_MASK, true); /* Insert into TCAM now */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[1]); @@ -932,27 +954,33 @@ static int bcm_sf2_cfp_ipv4_rule_get(struct bcm_sf2_priv *priv, int port, static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, __be32 *ip6_addr, __be16 *port, - __be32 *ip6_mask, __be16 *port_mask) + bool mask) { - u32 reg, tmp; + u32 reg, tmp, offset; /* C-Tag [31:24] * UDF_n_B8 [23:8] (port) * UDF_n_B7 (upper) [7:0] (addr[15:8]) */ - reg = core_readl(priv, CORE_CFP_DATA_PORT(4)); + if (mask) + offset = CORE_CFP_MASK_PORT(4); + else + offset = CORE_CFP_DATA_PORT(4); + reg = core_readl(priv, offset); *port = cpu_to_be32(reg) >> 8; - *port_mask = cpu_to_be16(~0); tmp = (u32)(reg & 0xff) << 8; /* UDF_n_B7 (lower) [31:24] (addr[7:0]) * UDF_n_B6 [23:8] (addr[31:16]) * UDF_n_B5 (upper) [7:0] (addr[47:40]) */ - reg = core_readl(priv, CORE_CFP_DATA_PORT(3)); + if (mask) + offset = CORE_CFP_MASK_PORT(3); + else + offset = CORE_CFP_DATA_PORT(3); + reg = core_readl(priv, offset); tmp |= (reg >> 24) & 0xff; tmp |= (u32)((reg >> 8) << 16); - ip6_mask[3] = cpu_to_be32(~0); ip6_addr[3] = cpu_to_be32(tmp); tmp = (u32)(reg & 0xff) << 8; @@ -960,10 +988,13 @@ static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, * UDF_n_B4 [23:8] (addr[63:48]) * UDF_n_B3 (upper) [7:0] (addr[79:72]) */ - reg = core_readl(priv, CORE_CFP_DATA_PORT(2)); + if (mask) + offset = CORE_CFP_MASK_PORT(2); + else + offset = CORE_CFP_DATA_PORT(2); + reg = core_readl(priv, offset); tmp |= (reg >> 24) & 0xff; tmp |= (u32)((reg >> 8) << 16); - ip6_mask[2] = cpu_to_be32(~0); ip6_addr[2] = cpu_to_be32(tmp); tmp = (u32)(reg & 0xff) << 8; @@ -971,10 +1002,13 @@ static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, * UDF_n_B2 [23:8] (addr[95:80]) * UDF_n_B1 (upper) [7:0] (addr[111:104]) */ - reg = core_readl(priv, CORE_CFP_DATA_PORT(1)); + if (mask) + offset = CORE_CFP_MASK_PORT(1); + else + offset = CORE_CFP_DATA_PORT(1); + reg = core_readl(priv, offset); tmp |= (reg >> 24) & 0xff; tmp |= (u32)((reg >> 8) << 16); - ip6_mask[1] = cpu_to_be32(~0); ip6_addr[1] = cpu_to_be32(tmp); tmp = (u32)(reg & 0xff) << 8; @@ -984,13 +1018,16 @@ static int bcm_sf2_cfp_unslice_ipv6(struct bcm_sf2_priv *priv, * Slice ID [3:2] * Slice valid [1:0] */ - reg = core_readl(priv, CORE_CFP_DATA_PORT(0)); + if (mask) + offset = CORE_CFP_MASK_PORT(0); + else + offset = CORE_CFP_DATA_PORT(0); + reg = core_readl(priv, offset); tmp |= (reg >> 24) & 0xff; tmp |= (u32)((reg >> 8) << 16); - ip6_mask[0] = cpu_to_be32(~0); ip6_addr[0] = cpu_to_be32(tmp); - if (!(reg & SLICE_VALID)) + if (!mask && !(reg & SLICE_VALID)) return -EINVAL; return 0; @@ -1012,7 +1049,12 @@ static int bcm_sf2_cfp_ipv6_rule_get(struct bcm_sf2_priv *priv, int port, /* Read the second half first */ ret = bcm_sf2_cfp_unslice_ipv6(priv, v6_spec->ip6dst, &v6_spec->pdst, - v6_m_spec->ip6dst, &v6_m_spec->pdst); + false); + if (ret) + return ret; + + ret = bcm_sf2_cfp_unslice_ipv6(priv, v6_m_spec->ip6dst, + &v6_m_spec->pdst, true); if (ret) return ret; @@ -1043,8 +1085,13 @@ static int bcm_sf2_cfp_ipv6_rule_get(struct bcm_sf2_priv *priv, int port, return -EINVAL; } - return bcm_sf2_cfp_unslice_ipv6(priv, v6_spec->ip6src, &v6_spec->psrc, - v6_m_spec->ip6src, &v6_m_spec->psrc); + ret = bcm_sf2_cfp_unslice_ipv6(priv, v6_spec->ip6src, &v6_spec->psrc, + false); + if (ret) + return ret; + + return bcm_sf2_cfp_unslice_ipv6(priv, v6_m_spec->ip6src, + &v6_m_spec->psrc, true); } static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, From e69cd9d75ee797a46e1d2703226f0478d05bca10 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:43 +0200 Subject: [PATCH 1296/2177] mlxsw: spectrum_dpipe: Add adjacency group size The adjacency group size is part of the match on the adjacency group and should therefore be exposed using dpipe. When non-equal-cost multi-path support will be introduced, the group's size will help users understand the exact number of adjacency entries each nexthop occupies, as a nexthop will no longer correspond to a single entry. The output for a multi-path route with two nexthops, one with weight 255 and the second 1 will be: Example: $ devlink dpipe table dump pci/0000:01:00.0 name mlxsw_adj pci/0000:01:00.0: index 0 match_value: type field_exact header mlxsw_meta field adj_index value 65536 type field_exact header mlxsw_meta field adj_size value 512 type field_exact header mlxsw_meta field adj_hash_index value 0 action_value: type field_modify header ethernet field destination mac value e4:1d:2d:a5:f3:64 type field_modify header mlxsw_meta field erif_port mapping ifindex mapping_value 3 value 1 index 1 match_value: type field_exact header mlxsw_meta field adj_index value 65536 type field_exact header mlxsw_meta field adj_size value 512 type field_exact header mlxsw_meta field adj_hash_index value 510 action_value: type field_modify header ethernet field destination mac value e4:1d:2d:a5:f3:65 type field_modify header mlxsw_meta field erif_port mapping ifindex mapping_value 4 value 2 Thus, the first nexthop occupies 510 adjacency entries and the second 2, which leads to a ratio of 255 to 1. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 46 +++++++++++++++++-- .../ethernet/mellanox/mlxsw/spectrum_router.c | 3 +- .../ethernet/mellanox/mlxsw/spectrum_router.h | 2 +- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index a056f23d3a0e..6ea6435279c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -44,6 +44,7 @@ enum mlxsw_sp_field_metadata_id { MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, }; @@ -69,6 +70,11 @@ static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, .bitwidth = 32, }, + { + .name = "adj_size", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, + .bitwidth = 32, + }, { .name = "adj_hash_index", .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, @@ -851,6 +857,14 @@ static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, match.header = &mlxsw_sp_dpipe_header_metadata; match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + err = devlink_dpipe_match_put(skb, &match); if (err) return err; @@ -897,6 +911,7 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) enum mlxsw_sp_dpipe_table_adj_match { MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, }; @@ -919,6 +934,11 @@ mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matche match->header = &mlxsw_sp_dpipe_header_metadata; match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; match->header = &mlxsw_sp_dpipe_header_metadata; @@ -955,6 +975,15 @@ mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match_value->match = match; match_value->value_size = sizeof(u32); match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); @@ -993,8 +1022,8 @@ mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, static void __mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, - u32 adj_index, u32 adj_hash_index, - unsigned char *ha, + u32 adj_index, u32 adj_size, + u32 adj_hash_index, unsigned char *ha, struct mlxsw_sp_rif *rif) { struct devlink_dpipe_value *value; @@ -1005,6 +1034,10 @@ __mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, p_index = value->value; *p_index = adj_index; + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + p_index = value->value; + *p_index = adj_size; + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; p_index = value->value; *p_index = adj_hash_index; @@ -1027,10 +1060,11 @@ static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, unsigned char *ha = mlxsw_sp_nexthop_ha(nh); u32 adj_hash_index = 0; u32 adj_index = 0; + u32 adj_size = 0; int err; - mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); - __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size, adj_hash_index, ha, rif); err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); if (!err) @@ -1138,13 +1172,15 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) struct mlxsw_sp_nexthop *nh; u32 adj_hash_index = 0; u32 adj_index = 0; + u32 adj_size = 0; mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { if (!mlxsw_sp_nexthop_offload(nh) || mlxsw_sp_nexthop_group_has_ipip(nh)) continue; - mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, + &adj_hash_index); if (enable) mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); else diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 5f2d100e3718..cb0d25ede9c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2299,7 +2299,7 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) } int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, - u32 *p_adj_hash_index) + u32 *p_adj_size, u32 *p_adj_hash_index) { struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; u32 adj_hash_index = 0; @@ -2309,6 +2309,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, return -EINVAL; *p_adj_index = nh_grp->adj_index; + *p_adj_size = nh_grp->ecmp_size; for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3f2d840cb285..39e5811ed263 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -115,7 +115,7 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, - u32 *p_adj_hash_index); + u32 *p_adj_size, u32 *p_adj_hash_index); struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); #define mlxsw_sp_nexthop_for_each(nh, router) \ From a875a2ee2db8970dd93b8d287e35b8eba72f0a89 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:44 +0200 Subject: [PATCH 1297/2177] mlxsw: spectrum: Better represent KVDL partitions The KVD linear (KVDL) allocator currently consists of a very large bitmap that reflects the KVDL's usage. The boundaries of each partition as well as their allocation size are represented using defines. This representation requires us to patch all the functions that act on a partition whenever the partitioning scheme is changed. In addition, it does not enable the dynamic configuration of the KVDL using the up-coming resource manager. Add objects to represent these partitions as well as the accompanying code that acts on them to perform allocations and de-allocations. In the following patches, this will allow us to easily add another partition as well as new operations to act on these partitions. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 11 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 7 +- .../ethernet/mellanox/mlxsw/spectrum_kvdl.c | 259 +++++++++++++++--- 3 files changed, 239 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4d73a6f7759e..1bc3fc356084 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3726,10 +3726,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return err; } + err = mlxsw_sp_kvdl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n"); + return err; + } + err = mlxsw_sp_fids_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n"); - return err; + goto err_fids_init; } err = mlxsw_sp_traps_init(mlxsw_sp); @@ -3834,6 +3840,8 @@ err_buffers_init: mlxsw_sp_traps_fini(mlxsw_sp); err_traps_init: mlxsw_sp_fids_fini(mlxsw_sp); +err_fids_init: + mlxsw_sp_kvdl_fini(mlxsw_sp); return err; } @@ -3854,6 +3862,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_fids_fini(mlxsw_sp); + mlxsw_sp_kvdl_fini(mlxsw_sp); } static const struct mlxsw_config_profile mlxsw_sp_config_profile = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 2a2472a09d8c..035c753585a0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -143,6 +143,7 @@ struct mlxsw_sp_mr; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; +struct mlxsw_sp_kvdl; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -158,9 +159,7 @@ struct mlxsw_sp { struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; - struct { - DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); - } kvdl; + struct mlxsw_sp_kvdl *kvdl; struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; @@ -411,6 +410,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); /* spectrum_kvdl.c */ +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 26c26cd30c3d..512537561483 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -39,55 +39,246 @@ #define MLXSW_SP_KVDL_SINGLE_BASE 0 #define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_SINGLE_END \ + (MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1) #define MLXSW_SP_KVDL_CHUNKS_BASE \ (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) #define MLXSW_SP_KVDL_CHUNKS_SIZE \ (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_KVDL_CHUNKS_END \ + (MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1) #define MLXSW_SP_CHUNK_MAX 32 +struct mlxsw_sp_kvdl_part_info { + unsigned int part_index; + unsigned int start_index; + unsigned int end_index; + unsigned int alloc_size; +}; + +struct mlxsw_sp_kvdl_part { + struct list_head list; + const struct mlxsw_sp_kvdl_part_info *info; + unsigned long usage[0]; /* Entries */ +}; + +struct mlxsw_sp_kvdl { + struct list_head parts_list; +}; + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl, + unsigned int alloc_size) +{ + struct mlxsw_sp_kvdl_part *part, *min_part = NULL; + + list_for_each_entry(part, &kvdl->parts_list, list) { + if (alloc_size <= part->info->alloc_size && + (!min_part || + part->info->alloc_size <= min_part->info->alloc_size)) + min_part = part; + } + + return min_part ?: ERR_PTR(-ENOBUFS); +} + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index) +{ + struct mlxsw_sp_kvdl_part *part; + + list_for_each_entry(part, &kvdl->parts_list, list) { + if (kvdl_index >= part->info->start_index && + kvdl_index <= part->info->end_index) + return part; + } + + return ERR_PTR(-EINVAL); +} + +static u32 +mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info, + unsigned int entry_index) +{ + return info->start_index + entry_index * info->alloc_size; +} + +static unsigned int +mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info, + u32 kvdl_index) +{ + return (kvdl_index - info->start_index) / info->alloc_size; +} + +static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part, + u32 *p_kvdl_index) +{ + const struct mlxsw_sp_kvdl_part_info *info = part->info; + unsigned int entry_index, nr_entries; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + entry_index = find_first_zero_bit(part->usage, nr_entries); + if (entry_index == nr_entries) + return -ENOBUFS; + __set_bit(entry_index, part->usage); + + *p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(part->info, + entry_index); + + return 0; +} + +static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part, + u32 kvdl_index) +{ + unsigned int entry_index; + + entry_index = mlxsw_sp_kvdl_index_entry_index(part->info, + kvdl_index); + __clear_bit(entry_index, part->usage); +} + int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index) { - int entry_index; - int size; - int type_base; - int type_size; - int type_entries; + struct mlxsw_sp_kvdl_part *part; - if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { - return -EINVAL; - } else if (entry_count == 1) { - type_base = MLXSW_SP_KVDL_SINGLE_BASE; - type_size = MLXSW_SP_KVDL_SINGLE_SIZE; - type_entries = 1; - } else { - type_base = MLXSW_SP_KVDL_CHUNKS_BASE; - type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; - type_entries = MLXSW_SP_CHUNK_MAX; - } + /* Find partition with smallest allocation size satisfying the + * requested size. + */ + part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); - entry_index = type_base; - size = type_base + type_size; - for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { - int i; - - for (i = 0; i < type_entries; i++) - set_bit(entry_index + i, mlxsw_sp->kvdl.usage); - *p_entry_index = entry_index; - return 0; - } - return -ENOBUFS; + return mlxsw_sp_kvdl_part_alloc(part, p_entry_index); } void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) { - int type_entries; + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index); + if (IS_ERR(part)) + return; + mlxsw_sp_kvdl_part_free(part, entry_index); +} + +static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = { + { + .part_index = 0, + .start_index = MLXSW_SP_KVDL_SINGLE_BASE, + .end_index = MLXSW_SP_KVDL_SINGLE_END, + .alloc_size = 1, + }, + { + .part_index = 1, + .start_index = MLXSW_SP_KVDL_CHUNKS_BASE, + .end_index = MLXSW_SP_KVDL_CHUNKS_END, + .alloc_size = MLXSW_SP_CHUNK_MAX, + }, +}; + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index) +{ + struct mlxsw_sp_kvdl_part *part; + + list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list) { + if (part->info->part_index == part_index) + return part; + } + + return NULL; +} + +static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, + unsigned int part_index) +{ + const struct mlxsw_sp_kvdl_part_info *info; + struct mlxsw_sp_kvdl_part *part; + unsigned int nr_entries; + size_t usage_size; + + info = &kvdl_parts_info[part_index]; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); + part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + if (!part) + return -ENOMEM; + + part->info = info; + list_add(&part->list, &mlxsw_sp->kvdl->parts_list); + + return 0; +} + +static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp, + unsigned int part_index) +{ + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_part_find(mlxsw_sp, part_index); + if (!part) + return; + + list_del(&part->list); + kfree(part); +} + +static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp) +{ + int err, i; + + INIT_LIST_HEAD(&mlxsw_sp->kvdl->parts_list); + + for (i = 0; i < ARRAY_SIZE(kvdl_parts_info); i++) { + err = mlxsw_sp_kvdl_part_init(mlxsw_sp, i); + if (err) + goto err_kvdl_part_init; + } + + return 0; + +err_kvdl_part_init: + for (i--; i >= 0; i--) + mlxsw_sp_kvdl_part_fini(mlxsw_sp, i); + return err; +} + +static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp) +{ int i; - if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) - type_entries = 1; - else - type_entries = MLXSW_SP_CHUNK_MAX; - for (i = 0; i < type_entries; i++) - clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); + for (i = ARRAY_SIZE(kvdl_parts_info) - 1; i >= 0; i--) + mlxsw_sp_kvdl_part_fini(mlxsw_sp, i); +} + +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_kvdl *kvdl; + int err; + + kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL); + if (!kvdl) + return -ENOMEM; + mlxsw_sp->kvdl = kvdl; + + err = mlxsw_sp_kvdl_parts_init(mlxsw_sp); + if (err) + goto err_kvdl_parts_init; + + return 0; + +err_kvdl_parts_init: + kfree(mlxsw_sp->kvdl); + return err; +} + +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_kvdl_parts_fini(mlxsw_sp); + kfree(mlxsw_sp->kvdl); } From d672aec45fd4a1e060109fbce6739ef91c3bd135 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:45 +0200 Subject: [PATCH 1298/2177] mlxsw: spectrum: Add ability to query KVDL allocation size The current KVDL allocation API allows the user to specify the requested number of entries, but the user has no way of knowing how many entries were actually allocated. This works because existing users (e.g., router) request the exact number they end up using. With the introduction of large adjacency groups, this will change, as the router will have the ability to choose from several allocation sizes, where larger allocations provide higher accuracy with respect to requested weights and better resilience against nexthop failures. One option is to have the router try several allocations of descending size until one succeeds, but a better way is to simply allow it to query the actual allocation size and then size its request accordingly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 +++ .../net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 035c753585a0..78ff20d86db1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -415,6 +415,9 @@ void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); +int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + unsigned int entry_count, + unsigned int *p_alloc_size); struct mlxsw_sp_acl_rule_info { unsigned int priority; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 512537561483..266b3af6513c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -164,6 +164,21 @@ void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) mlxsw_sp_kvdl_part_free(part, entry_index); } +int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + unsigned int entry_count, + unsigned int *p_alloc_size) +{ + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + *p_alloc_size = part->info->alloc_size; + + return 0; +} + static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = { { .part_index = 0, From 408bd946bfee69ec99937bd0f9ed9dcd2d19705b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:46 +0200 Subject: [PATCH 1299/2177] mlxsw: spectrum_router: Store weight in nexthop struct As the first step towards non-equal-cost multi-path support, store each nexthop's weight. For IPv6 nexthops always set the weight to 1, as it only supports ECMP. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cb0d25ede9c6..8cd422d7640c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2203,6 +2203,7 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_nexthop_key key; unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; + int nh_weight; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -3045,6 +3046,11 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight = fib_nh->nh_weight; +#else + nh->nh_weight = 1; +#endif memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) @@ -4304,6 +4310,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct net_device *dev = rt->dst.dev; nh->nh_grp = nh_grp; + nh->nh_weight = 1; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); From 425a08c67317acee103b3ad58f57c762e8834faf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:47 +0200 Subject: [PATCH 1300/2177] mlxsw: spectrum_router: Prepare for large adjacency groups The device has certain restrictions regarding the size of an adjacency group. Have the router determine the size of the adjacency group according to available KVDL allocation sizes and these restrictions. This was not needed until now since only allocations of up 32 entries were supported and these are all valid sizes for an adjacency group. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 8cd422d7640c..68ce5492aa0f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2708,6 +2708,59 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) } } +static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) +{ + /* Valid sizes for an adjacency group are: + * 1-64, 512, 1024, 2048 and 4096. + */ + if (*p_adj_grp_size <= 64) + return; + else if (*p_adj_grp_size <= 512) + *p_adj_grp_size = 512; + else if (*p_adj_grp_size <= 1024) + *p_adj_grp_size = 1024; + else if (*p_adj_grp_size <= 2048) + *p_adj_grp_size = 2048; + else + *p_adj_grp_size = 4096; +} + +static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size, + unsigned int alloc_size) +{ + if (alloc_size >= 4096) + *p_adj_grp_size = 4096; + else if (alloc_size >= 2048) + *p_adj_grp_size = 2048; + else if (alloc_size >= 1024) + *p_adj_grp_size = 1024; + else if (alloc_size >= 512) + *p_adj_grp_size = 512; +} + +static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) +{ + unsigned int alloc_size; + int err; + + /* Round up the requested group size to the next size supported + * by the device and make sure the request can be satisfied. + */ + mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size); + err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size, + &alloc_size); + if (err) + return err; + /* It is possible the allocation results in more allocated + * entries than requested. Try to use as much of them as + * possible. + */ + mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size); + + return 0; +} + static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -2755,6 +2808,11 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, */ goto set_trap; + err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); + if (err) + /* No valid allocation size available. */ + goto set_trap; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index); if (err) { /* We ran out of KVD linear space, just set the From eb789980d0aa6cd2ebee3eb07792800bbe134bc0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:48 +0200 Subject: [PATCH 1301/2177] mlxsw: spectrum_router: Populate adjacency entries according to weights Up until now the driver assumed all the nexthops have an equal weight and wrote each to a single adjacency entry. This patch takes the `weight` parameter into account and populates the adjacency group according to the relative weight of each nexthop. Specifically, the weights of all the nexthops that should be offloaded are first normalized and then used to calculate the upper adjacency index of each nexthop. This is done according to the hash-threshold algorithm used by the kernel for IPv4 multi-path routing. Adjacency groups are currently limited to 32 entries which limits the weights that can be used, but follow-up patches will introduce groups of 512 entries. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 111 ++++++++++++++++-- 1 file changed, 99 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 68ce5492aa0f..27b632cac991 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -2204,6 +2205,8 @@ struct mlxsw_sp_nexthop { unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; int nh_weight; + int norm_nh_weight; + int num_adj_entries; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -2233,6 +2236,7 @@ struct mlxsw_sp_nexthop_group { u32 adj_index; u16 ecmp_size; u16 count; + int sum_norm_weight; struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; @@ -2318,7 +2322,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, if (nh_iter == nh) break; if (nh_iter->offloaded) - adj_hash_index++; + adj_hash_index += nh_iter->num_adj_entries; } *p_adj_hash_index = adj_hash_index; @@ -2601,8 +2605,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; @@ -2619,9 +2623,25 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } -static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, - u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) { const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -2629,6 +2649,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); } +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, + nh); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, @@ -2663,7 +2701,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, nh->update = 0; nh->offloaded = 1; } - adj_index++; + adj_index += nh->num_adj_entries; } return 0; } @@ -2761,17 +2799,65 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, return 0; } +static void +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i, g = 0, sum_norm_weight = 0; + struct mlxsw_sp_nexthop *nh; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) + continue; + if (g > 0) + g = gcd(nh->nh_weight, g); + else + g = nh->nh_weight; + } + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) + continue; + nh->norm_nh_weight = nh->nh_weight / g; + sum_norm_weight += nh->norm_nh_weight; + } + + nh_grp->sum_norm_weight = sum_norm_weight; +} + +static void +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) +{ + int total = nh_grp->sum_norm_weight; + u16 ecmp_size = nh_grp->ecmp_size; + int i, weight = 0, lower_bound = 0; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + int upper_bound; + + if (!nh->should_offload) + continue; + weight += nh->norm_nh_weight; + upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total); + nh->num_adj_entries = upper_bound - lower_bound; + lower_bound = upper_bound; + } +} + static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + u16 ecmp_size, old_ecmp_size; struct mlxsw_sp_nexthop *nh; bool offload_change = false; u32 adj_index; - u16 ecmp_size = 0; bool old_adj_index_valid; u32 old_adj_index; - u16 old_ecmp_size; int i; int err; @@ -2788,8 +2874,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, if (nh->should_offload) nh->update = 1; } - if (nh->should_offload) - ecmp_size++; } if (!offload_change) { /* Nothing was added or removed, so no need to reallocate. Just @@ -2802,12 +2886,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, } return; } - if (!ecmp_size) + mlxsw_sp_nexthop_group_normalize(nh_grp); + if (!nh_grp->sum_norm_weight) /* No neigh of this group is connected so we just set * the trap and let everthing flow through kernel. */ goto set_trap; + ecmp_size = nh_grp->sum_norm_weight; err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); if (err) /* No valid allocation size available. */ @@ -2827,6 +2913,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nh_grp); err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); From f11fbaf8b5a83608523b88cf62682914cf521546 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:49 +0200 Subject: [PATCH 1302/2177] mlxsw: spectrum: Increase number of linear entries The memory region where adjacency entries (nexthops) are stored is called the KVD linear and is configured during initialization with a size of 64K. Extend this area with 32K more entries, that will be partitioned into 64 groups of 0.5K entries, thereby allowing us to support weighted nexthops with high accuracy. Change the ratio between both types of hash entries, so as to prevent reduction in the number of double hash entries, which are used for IPv6 neighbours and routes with a prefix length greater than 64. Note that the user will be able to control all these sizes once the devlink resource manager is introduced. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1bc3fc356084..12b6ac487d8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3885,8 +3885,8 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_pkey = 0, .used_kvd_split_data = 1, .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, - .kvd_hash_single_parts = 2, - .kvd_hash_double_parts = 1, + .kvd_hash_single_parts = 59, + .kvd_hash_double_parts = 41, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, .swid_config = { { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 78ff20d86db1..dc1b739c3ae1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -62,7 +62,7 @@ #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ -#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 struct mlxsw_sp_port; From 330e2cc65d5f2c0545230e00f8f50b35a3e5995b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 22 Oct 2017 23:11:50 +0200 Subject: [PATCH 1303/2177] mlxsw: spectrum: Add another partition to KVD linear The KVD linear is currently partitioned into two partitions. One for single entries and another for groups of 32 entries. Add another partition consisting of groups of 512 entries which will allow us to more accurately represent the nexthop weights in non-equal cost multi-path routing. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_kvdl.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 266b3af6513c..310c38247b5c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -41,13 +41,22 @@ #define MLXSW_SP_KVDL_SINGLE_SIZE 16384 #define MLXSW_SP_KVDL_SINGLE_END \ (MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1) + #define MLXSW_SP_KVDL_CHUNKS_BASE \ (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) -#define MLXSW_SP_KVDL_CHUNKS_SIZE \ - (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152 #define MLXSW_SP_KVDL_CHUNKS_END \ (MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1) + +#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \ + (MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE) +#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE) +#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \ + (MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1) + #define MLXSW_SP_CHUNK_MAX 32 +#define MLXSW_SP_LARGE_CHUNK_MAX 512 struct mlxsw_sp_kvdl_part_info { unsigned int part_index; @@ -192,6 +201,12 @@ static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = { .end_index = MLXSW_SP_KVDL_CHUNKS_END, .alloc_size = MLXSW_SP_CHUNK_MAX, }, + { + .part_index = 2, + .start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE, + .end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END, + .alloc_size = MLXSW_SP_LARGE_CHUNK_MAX, + }, }; static struct mlxsw_sp_kvdl_part * From e83b171568e6a69cff5eb592907b71e480b535ac Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Oct 2017 15:59:30 -0700 Subject: [PATCH 1304/2177] net: systemport: Guard against unmapped TX ring Because SYSTEMPORT is a (semi) normal network device, the stack may attempt to queue packets on it oustide of the DSA slave transmit path. When that happens, the DSA layer has not had a chance to tag packets with the appropriate per-port and per-queue information, and if that happens and we don't have a port 0 queue 0 available (e.g: on boards where this does not exist), we will hit a NULL pointer de-reference in bcm_sysport_select_queue(). Guard against such cases by testing for the TX ring validity. Fixes: 84ff33eeb23d ("net: systemport: Establish DSA network device queue mapping") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index dafc26690555..1d9d5f986e14 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2040,6 +2040,9 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, port = BRCM_TAG_GET_PORT(queue); tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues]; + if (unlikely(!tx_ring)) + return fallback(dev, skb); + return tx_ring->index; } From 058c8d59124193f46db4efa5abcd9d6d0f04c88e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Oct 2017 19:43:11 -0500 Subject: [PATCH 1305/2177] net: core: rtnetlink: use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG in do_setlink. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 04680a53c8dd..df8dba998c48 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2274,8 +2274,7 @@ static int do_setlink(const struct sk_buff *skb, rcu_read_lock(); - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) - BUG(); + BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); err = af_ops->set_link_af(dev, af); if (err < 0) { From d3cc547d9ccbe223b06e87256fdd571eb63762b7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Oct 2017 15:09:47 +0200 Subject: [PATCH 1306/2177] esp6: remove redundant initialization of esph The pointer esph is being initialized with a value that is never read and then being updated. Remove the redundant initialization and move the declaration and initializtion of esph to the local code block. Cleans up clang warning: net/ipv6/esp6.c:562:21: warning: Value stored to 'esph' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 89910e2c10f4..1696401fed6c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -559,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb) static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) { struct xfrm_state *x = xfrm_input_state(skb); - struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = skb_push(skb, 4); + struct ip_esp_hdr *esph = skb_push(skb, 4); + *seqhi = esph->spi; esph->spi = esph->seq_no; esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; From 40b16b9be5773a314948656c96adf7bf7cfdbd0b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 21 Oct 2017 11:45:46 +0200 Subject: [PATCH 1307/2177] batman-adv: use inline kernel-doc for uapi constants The enums of constants for netlink tends to become rather large over time. Documenting them is easier when the kernel-doc is actually next to constant and not in a different block above the enum. Also inline kernel-doc allows multi-paragraph description. This could be required to better document the netlink command types and the expected return values. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 369 +++++++++++++++++++++++++------- 1 file changed, 290 insertions(+), 79 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a83ddb7b63db..efd641c8a5d6 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -24,20 +24,6 @@ /** * enum batadv_tt_client_flags - TT client specific flags - * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table - * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new - * update telling its new real location has not been received/sent yet - * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. - * This information is used by the "AP Isolation" feature - * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This - * information is used by the Extended Isolation feature - * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table - * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has - * not been announced yet - * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept - * in the table for one more originator interval for consistency purposes - * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of - * the network but no nnode has already announced it * * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. * Bits from 8 to 15 are called _local flags_ because they are used for local @@ -48,160 +34,385 @@ * in the TT CRC computation. */ enum batadv_tt_client_flags { + /** + * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table + */ BATADV_TT_CLIENT_DEL = (1 << 0), + + /** + * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and + * the new update telling its new real location has not been + * received/sent yet + */ BATADV_TT_CLIENT_ROAM = (1 << 1), + + /** + * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi + * interface. This information is used by the "AP Isolation" feature + */ BATADV_TT_CLIENT_WIFI = (1 << 4), + + /** + * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This + * information is used by the Extended Isolation feature + */ BATADV_TT_CLIENT_ISOLA = (1 << 5), + + /** + * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from + * the table + */ BATADV_TT_CLIENT_NOPURGE = (1 << 8), + + /** + * @BATADV_TT_CLIENT_NEW: this client has been added to the local table + * but has not been announced yet + */ BATADV_TT_CLIENT_NEW = (1 << 9), + + /** + * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it + * is kept in the table for one more originator interval for consistency + * purposes + */ BATADV_TT_CLIENT_PENDING = (1 << 10), + + /** + * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be + * part of the network but no nnode has already announced it + */ BATADV_TT_CLIENT_TEMP = (1 << 11), }; /** * enum batadv_nl_attrs - batman-adv netlink attributes - * - * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors - * @BATADV_ATTR_VERSION: batman-adv version string - * @BATADV_ATTR_ALGO_NAME: name of routing algorithm - * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface - * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface - * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface - * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface - * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface - * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface - * @BATADV_ATTR_ORIG_ADDRESS: originator mac address - * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status) - * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took - * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run - * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session - * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment - * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active - * @BATADV_ATTR_TT_ADDRESS: Client MAC address - * @BATADV_ATTR_TT_TTVN: Translation table version - * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version - * @BATADV_ATTR_TT_CRC32: CRC32 over translation table - * @BATADV_ATTR_TT_VID: VLAN ID - * @BATADV_ATTR_TT_FLAGS: Translation table client flags - * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best - * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen - * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address - * @BATADV_ATTR_TQ: TQ to neighbour - * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour - * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth - * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth - * @BATADV_ATTR_ROUTER: Gateway router MAC address - * @BATADV_ATTR_BLA_OWN: Flag indicating own originator - * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address - * @BATADV_ATTR_BLA_VID: BLA VLAN ID - * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address - * @BATADV_ATTR_BLA_CRC: BLA CRC - * @__BATADV_ATTR_AFTER_LAST: internal use - * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available - * @BATADV_ATTR_MAX: highest attribute number currently defined */ enum batadv_nl_attrs { + /** + * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors + */ BATADV_ATTR_UNSPEC, + + /** + * @BATADV_ATTR_VERSION: batman-adv version string + */ BATADV_ATTR_VERSION, + + /** + * @BATADV_ATTR_ALGO_NAME: name of routing algorithm + */ BATADV_ATTR_ALGO_NAME, + + /** + * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface + */ BATADV_ATTR_MESH_IFINDEX, + + /** + * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface + */ BATADV_ATTR_MESH_IFNAME, + + /** + * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface + */ BATADV_ATTR_MESH_ADDRESS, + + /** + * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface + */ BATADV_ATTR_HARD_IFINDEX, + + /** + * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface + */ BATADV_ATTR_HARD_IFNAME, + + /** + * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv + * interface + */ BATADV_ATTR_HARD_ADDRESS, + + /** + * @BATADV_ATTR_ORIG_ADDRESS: originator mac address + */ BATADV_ATTR_ORIG_ADDRESS, + + /** + * @BATADV_ATTR_TPMETER_RESULT: result of run (see + * batadv_tp_meter_status) + */ BATADV_ATTR_TPMETER_RESULT, + + /** + * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took + */ BATADV_ATTR_TPMETER_TEST_TIME, + + /** + * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run + */ BATADV_ATTR_TPMETER_BYTES, + + /** + * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session + */ BATADV_ATTR_TPMETER_COOKIE, + + /** + * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment + */ BATADV_ATTR_PAD, + + /** + * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active + */ BATADV_ATTR_ACTIVE, + + /** + * @BATADV_ATTR_TT_ADDRESS: Client MAC address + */ BATADV_ATTR_TT_ADDRESS, + + /** + * @BATADV_ATTR_TT_TTVN: Translation table version + */ BATADV_ATTR_TT_TTVN, + + /** + * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version + */ BATADV_ATTR_TT_LAST_TTVN, + + /** + * @BATADV_ATTR_TT_CRC32: CRC32 over translation table + */ BATADV_ATTR_TT_CRC32, + + /** + * @BATADV_ATTR_TT_VID: VLAN ID + */ BATADV_ATTR_TT_VID, + + /** + * @BATADV_ATTR_TT_FLAGS: Translation table client flags + */ BATADV_ATTR_TT_FLAGS, + + /** + * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best + */ BATADV_ATTR_FLAG_BEST, + + /** + * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen + */ BATADV_ATTR_LAST_SEEN_MSECS, + + /** + * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address + */ BATADV_ATTR_NEIGH_ADDRESS, + + /** + * @BATADV_ATTR_TQ: TQ to neighbour + */ BATADV_ATTR_TQ, + + /** + * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour + */ BATADV_ATTR_THROUGHPUT, + + /** + * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth + */ BATADV_ATTR_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth + */ BATADV_ATTR_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_ROUTER: Gateway router MAC address + */ BATADV_ATTR_ROUTER, + + /** + * @BATADV_ATTR_BLA_OWN: Flag indicating own originator + */ BATADV_ATTR_BLA_OWN, + + /** + * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address + */ BATADV_ATTR_BLA_ADDRESS, + + /** + * @BATADV_ATTR_BLA_VID: BLA VLAN ID + */ BATADV_ATTR_BLA_VID, + + /** + * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address + */ BATADV_ATTR_BLA_BACKBONE, + + /** + * @BATADV_ATTR_BLA_CRC: BLA CRC + */ BATADV_ATTR_BLA_CRC, + /* add attributes above here, update the policy in netlink.c */ + + /** + * @__BATADV_ATTR_AFTER_LAST: internal use + */ __BATADV_ATTR_AFTER_LAST, + + /** + * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available + */ NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, + + /** + * @BATADV_ATTR_MAX: highest attribute number currently defined + */ BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1 }; /** * enum batadv_nl_commands - supported batman-adv netlink commands - * - * @BATADV_CMD_UNSPEC: unspecified command to catch errors - * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device - * @BATADV_CMD_TP_METER: Start a tp meter session - * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session - * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. - * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces - * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations - * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations - * @BATADV_CMD_GET_ORIGINATORS: Query list of originators - * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours - * @BATADV_CMD_GET_GATEWAYS: Query list of gateways - * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims - * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones - * @__BATADV_CMD_AFTER_LAST: internal use - * @BATADV_CMD_MAX: highest used command number */ enum batadv_nl_commands { + /** + * @BATADV_CMD_UNSPEC: unspecified command to catch errors + */ BATADV_CMD_UNSPEC, + + /** + * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv + * device + */ BATADV_CMD_GET_MESH_INFO, + + /** + * @BATADV_CMD_TP_METER: Start a tp meter session + */ BATADV_CMD_TP_METER, + + /** + * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session + */ BATADV_CMD_TP_METER_CANCEL, + + /** + * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. + */ BATADV_CMD_GET_ROUTING_ALGOS, + + /** + * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + */ BATADV_CMD_GET_HARDIFS, + + /** + * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations + */ BATADV_CMD_GET_TRANSTABLE_LOCAL, + + /** + * @BATADV_CMD_GET_TRANSTABLE_GLOBAL: Query list of global translations + */ BATADV_CMD_GET_TRANSTABLE_GLOBAL, + + /** + * @BATADV_CMD_GET_ORIGINATORS: Query list of originators + */ BATADV_CMD_GET_ORIGINATORS, + + /** + * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours + */ BATADV_CMD_GET_NEIGHBORS, + + /** + * @BATADV_CMD_GET_GATEWAYS: Query list of gateways + */ BATADV_CMD_GET_GATEWAYS, + + /** + * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims + */ BATADV_CMD_GET_BLA_CLAIM, + + /** + * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance + * backbones + */ BATADV_CMD_GET_BLA_BACKBONE, + /* add new commands above here */ + + /** + * @__BATADV_CMD_AFTER_LAST: internal use + */ __BATADV_CMD_AFTER_LAST, + + /** + * @BATADV_CMD_MAX: highest used command number + */ BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 }; /** * enum batadv_tp_meter_reason - reason of a tp meter test run stop - * @BATADV_TP_REASON_COMPLETE: sender finished tp run - * @BATADV_TP_REASON_CANCEL: sender was stopped during run - * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't - * answer - * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit - * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already - * ongoing - * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory - * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface - * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions */ enum batadv_tp_meter_reason { + /** + * @BATADV_TP_REASON_COMPLETE: sender finished tp run + */ BATADV_TP_REASON_COMPLETE = 3, + + /** + * @BATADV_TP_REASON_CANCEL: sender was stopped during run + */ BATADV_TP_REASON_CANCEL = 4, + /* error status >= 128 */ + + /** + * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or + * didn't answer + */ BATADV_TP_REASON_DST_UNREACHABLE = 128, + + /** + * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit + */ BATADV_TP_REASON_RESEND_LIMIT = 129, + + /** + * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node + * already ongoing + */ BATADV_TP_REASON_ALREADY_ONGOING = 130, + + /** + * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory + */ BATADV_TP_REASON_MEMORY_ERROR = 131, + + /** + * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface + */ BATADV_TP_REASON_CANT_SEND = 132, + + /** + * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions + */ BATADV_TP_REASON_TOO_MANY = 133, }; From b9077428ec5569aacb2952d8a2ffb51c8988d3c2 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:01 +0800 Subject: [PATCH 1308/2177] net: hns3: fix a bug when alloc new buffer When alloce new buffer to HW, should unmap the old buffer first. This old code map the old buffer but not unmap the old buffer, this patch fixes it. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 8383d6726ae4..3ddcd47fa61c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1595,7 +1595,7 @@ out_buffer_fail: static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, struct hns3_desc_cb *res_cb) { - hns3_map_buffer(ring, &ring->desc_cb[i]); + hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); } From 564883bb4dc1a4f3cba6344e77743175694b0761 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:02 +0800 Subject: [PATCH 1309/2177] net: hns3: fix the bug when map buffer fail If one buffer had been recieved to stack, driver will alloc a new buffer, map the buffer to device and replace the old buffer. When map fail, should only free the new alloced buffer, but not free all buffers in the ring. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 3ddcd47fa61c..58aa2dd6ace0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1555,7 +1555,7 @@ static int hns3_reserve_buffer_map(struct hns3_enet_ring *ring, return 0; out_with_buf: - hns3_free_buffers(ring); + hns3_free_buffer(ring, cb); out: return ret; } From 7410343eab04088225267949477d1c7b5f9598fc Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:03 +0800 Subject: [PATCH 1310/2177] net: hns3: fix the ops check in hns3_get_rxnfc 1# patch: 07d2995 net: hns3: add support for ETHTOOL_GRXFH. 2# patch: 5668abd net: hns3: add support for set_ringparam. 1# patch adds ae_algo->ops->get_rss_tuple to hns3_get_rxnfc and 2# patch delete ae_algo->ops->get_tc_size from hns3_get_rxnfc.This patch fix the ops check in hns3_get_rxnfc. Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 6c469e49a04f..5cd163bdbf14 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -717,7 +717,7 @@ static int hns3_get_rxnfc(struct net_device *netdev, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size) + if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_tuple) return -EOPNOTSUPP; switch (cmd->cmd) { From 709eb41ad8cd56ee68f9ca5140cfd46839d35837 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:04 +0800 Subject: [PATCH 1311/2177] net: hns3: get vf count by pci_sriov_get_totalvfs This patch gets vf count by standard function pci_sriov_get_totalvfs, instead of info from NIC HW. Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8508521c26e8..443124177f05 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -837,7 +837,6 @@ static int hclge_parse_func_status(struct hclge_dev *hdev, else hdev->flag &= ~HCLGE_FLAG_MAIN; - hdev->num_req_vfs = status->vf_num / status->pf_num; return 0; } @@ -4361,6 +4360,8 @@ static int hclge_pci_init(struct hclge_dev *hdev) goto err_clr_master; } + hdev->num_req_vfs = pci_sriov_get_totalvfs(pdev); + return 0; err_clr_master: pci_clear_master(pdev); From 66b447301ac710ee237dba8b653244018fbb6168 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:05 +0800 Subject: [PATCH 1312/2177] net: hns3: fix the TX/RX ring.queue_index in hns3_ring_get_cfg The interface hns3_ring_get_cfg only update TX ring queue_index, but do not update RX ring queue_index. This patch fixes it. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 58aa2dd6ace0..14de0f7581c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2506,16 +2506,16 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, if (ring_type == HNAE3_RING_TYPE_TX) { ring_data[q->tqp_index].ring = ring; + ring_data[q->tqp_index].queue_index = q->tqp_index; ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET; } else { ring_data[q->tqp_index + queue_num].ring = ring; + ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index; ring->io_base = q->io_base; } hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type); - ring_data[q->tqp_index].queue_index = q->tqp_index; - ring->tqp = q; ring->desc = NULL; ring->desc_cb = NULL; From 51145dae2748233315a7a411cd97f4bedf8cc22f Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:06 +0800 Subject: [PATCH 1313/2177] net: hns3: remove redundant memset when alloc buffer HW will use packet length to write packets to buffer or read packets from buffer. There is a redundant memset when alloc buffer, the memset have no sense and will increase time-consuming. This patch removes it. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 14de0f7581c8..06af3c86b60c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1444,8 +1444,6 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, cb->length = hnae_page_size(ring); cb->type = DESC_TYPE_PAGE; - memset(cb->buf, 0, cb->length); - return 0; } From 24e750c410ae046d1236af50014cbc697bb375d7 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Mon, 23 Oct 2017 19:51:07 +0800 Subject: [PATCH 1314/2177] net: hns3: fix a bug about hns3_clean_tx_ring The return value of hns3_clean_tx_ring means tx ring clean result. Return true means clean complete and there is no more pakcet need clean. Retrun false means there is packets need clean and napi need poll again. The last return of hns3_clean_tx_ring is "return !!budget" as budget will decrease when clean a buffer. If there is no valid BD in TX ring, return 0 for hns3_clean_tx_ring will cause napi poll again and never complete the napi poll. This patch fixes the bug. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 6 +++--- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 06af3c86b60c..537f6c3babb7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1629,7 +1629,7 @@ static int is_valid_clean_head(struct hns3_enet_ring *ring, int h) return u > c ? (h > c && h <= u) : (h > c || h <= u); } -int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) +bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) { struct net_device *netdev = ring->tqp->handle->kinfo.netdev; struct netdev_queue *dev_queue; @@ -1640,7 +1640,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) rmb(); /* Make sure head is ready before touch any data */ if (is_ring_empty(ring) || head == ring->next_to_clean) - return 0; /* no data to poll */ + return true; /* no data to poll */ if (!is_valid_clean_head(ring, head)) { netdev_err(netdev, "wrong head (%d, %d-%d)\n", head, @@ -1649,7 +1649,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) u64_stats_update_begin(&ring->syncp); ring->stats.io_err_cnt++; u64_stats_update_end(&ring->syncp); - return -EIO; + return true; } bytes = 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 6228b2603d93..58dc30bf893c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -594,7 +594,7 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) void hns3_ethtool_set_ops(struct net_device *netdev); -int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); +bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); int hns3_init_all_ring(struct hns3_nic_priv *priv); int hns3_uninit_all_ring(struct hns3_nic_priv *priv); netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev); From f6e37b25413cf636369668652e9752ee77c7d9f7 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:22 -0700 Subject: [PATCH 1315/2177] tcp: add trace event class tcp_event_sk_skb Introduce event class tcp_event_sk_skb for tcp tracepoints that have arguments sk and skb. Existing tracepoint trace_tcp_retransmit_skb() falls into this class. This patch rewrites the definition of trace_tcp_retransmit_skb() with tcp_event_sk_skb. Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index c3220d914475..14b0a7083f1d 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -9,7 +9,13 @@ #include #include -TRACE_EVENT(tcp_retransmit_skb, +/* + * tcp event with arguments sk and skb + * + * Note: this class requires a valid sk pointer; while skb pointer could + * be NULL. + */ +DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(struct sock *sk, struct sk_buff *skb), @@ -64,6 +70,13 @@ TRACE_EVENT(tcp_retransmit_skb, __entry->saddr_v6, __entry->daddr_v6) ); +DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, + + TP_PROTO(struct sock *sk, struct sk_buff *skb), + + TP_ARGS(sk, skb) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ From 7344e29f285a94b965075599731811c352f3ab40 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:23 -0700 Subject: [PATCH 1316/2177] tcp: mark trace event arguments sk and skb as const Some functions that we plan to add trace points require const sk and/or skb. So we mark these fields as const in the tracepoint. Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 14b0a7083f1d..2b6fe72c6781 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -17,13 +17,13 @@ */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, - TP_PROTO(struct sock *sk, struct sk_buff *skb), + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( - __field(void *, skbaddr) - __field(void *, skaddr) + __field(const void *, skbaddr) + __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __array(__u8, saddr, 4) @@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, - TP_PROTO(struct sock *sk, struct sk_buff *skb), + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); From c24b14c46bb88d844275de5c4024c8745ae89d42 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:24 -0700 Subject: [PATCH 1317/2177] tcp: add tracepoint trace_tcp_send_reset New tracepoint trace_tcp_send_reset is added and called from tcp_v4_send_reset(), tcp_v6_send_reset() and tcp_send_active_reset(). Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 11 +++++++++++ net/core/net-traces.c | 2 ++ net/ipv4/tcp_ipv4.c | 6 +++++- net/ipv4/tcp_output.c | 5 +++++ net/ipv6/tcp_ipv6.c | 10 ++++++++-- 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 2b6fe72c6781..3e57e1ae1c6b 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -77,6 +77,17 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_ARGS(sk, skb) ); +/* + * skb of trace_tcp_send_reset is the skb that caused RST. In case of + * active reset, skb should be NULL + */ +DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + + TP_ARGS(sk, skb) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/core/net-traces.c b/net/core/net-traces.c index f4e4fa2db505..8dcd9b0be04a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -49,3 +49,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); + +EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e22439f05e46..eb3f3b8e1e4b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,6 +85,8 @@ #include #include +#include + #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); @@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. */ - if (sk) + if (sk) { arg.bound_dev_if = sk->sk_bound_dev_if; + trace_tcp_send_reset(sk, skb); + } BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 988733f289c8..1f01f4c9c738 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3084,6 +3084,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); + + /* skb of trace_tcp_send_reset() keeps the skb that caused RST, + * skb here is different to the troublesome skb, so use NULL + */ + trace_tcp_send_reset(sk, NULL); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ae83615b7f6d..0e2529958b52 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -69,6 +69,8 @@ #include #include +#include + static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif - int oif; + int oif = 0; if (th->rst) return; @@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - oif = sk ? sk->sk_bound_dev_if : 0; + if (sk) { + oif = sk->sk_bound_dev_if; + trace_tcp_send_reset(sk, skb); + } + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG From 5941521c05d69cf3f2b1293eefd21207e083b70f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:25 -0700 Subject: [PATCH 1318/2177] tcp: add tracepoint trace_tcp_receive_reset New tracepoint trace_tcp_receive_reset is added and called from tcp_reset(). This tracepoint is define with a new class tcp_event_sk. Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 66 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 3 ++ 2 files changed, 69 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 3e57e1ae1c6b..c83c71187719 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -88,6 +88,72 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, TP_ARGS(sk, skb) ); +/* + * tcp event with arguments sk + * + * Note: this class requires a valid sk pointer. + */ +DECLARE_EVENT_CLASS(tcp_event_sk, + + TP_PROTO(const struct sock *sk), + + TP_ARGS(sk), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + ), + + TP_fast_assign( + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *pin6; + __be32 *p32; + + __entry->skaddr = sk; + + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->saddr_v6; + *pin6 = sk->sk_v6_rcv_saddr; + pin6 = (struct in6_addr *)__entry->daddr_v6; + *pin6 = sk->sk_v6_daddr; + } else +#endif + { + pin6 = (struct in6_addr *)__entry->saddr_v6; + ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); + pin6 = (struct in6_addr *)__entry->daddr_v6; + ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); + } + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6) +); + +DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, + + TP_PROTO(const struct sock *sk), + + TP_ARGS(sk) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab3f12898245..c5e64d4b5839 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -75,6 +75,7 @@ #include #include #include +#include int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; @@ -4010,6 +4011,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) /* When we get a reset we do this. */ void tcp_reset(struct sock *sk) { + trace_tcp_receive_reset(sk); + /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: From e1a4aa50f47303ebb3ca0cfd01687884551ce03d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:26 -0700 Subject: [PATCH 1319/2177] tcp: add tracepoint trace_tcp_destroy_sock This patch adds trace event trace_tcp_destroy_sock. Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 7 +++++++ net/ipv4/tcp_ipv4.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index c83c71187719..1724c12c25cf 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -154,6 +154,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_ARGS(sk) ); +DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, + + TP_PROTO(const struct sock *sk), + + TP_ARGS(sk) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb3f3b8e1e4b..23a8100af5ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + trace_tcp_destroy_sock(sk); + tcp_clear_xmit_timers(sk); tcp_cleanup_congestion_control(sk); From e8fce23946b7e7eadf25ad78d8207c22903dfe27 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Oct 2017 09:20:27 -0700 Subject: [PATCH 1320/2177] tcp: add tracepoint trace_tcp_set_state() This patch adds tracepoint trace_tcp_set_state. Besides usual fields (s/d ports, IP addresses), old and new state of the socket is also printed with TP_printk, with __print_symbolic(). Signed-off-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 76 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 4 ++ 2 files changed, 80 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1724c12c25cf..03699ba71623 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -9,6 +9,22 @@ #include #include +#define tcp_state_name(state) { state, #state } +#define show_tcp_state_name(val) \ + __print_symbolic(val, \ + tcp_state_name(TCP_ESTABLISHED), \ + tcp_state_name(TCP_SYN_SENT), \ + tcp_state_name(TCP_SYN_RECV), \ + tcp_state_name(TCP_FIN_WAIT1), \ + tcp_state_name(TCP_FIN_WAIT2), \ + tcp_state_name(TCP_TIME_WAIT), \ + tcp_state_name(TCP_CLOSE), \ + tcp_state_name(TCP_CLOSE_WAIT), \ + tcp_state_name(TCP_LAST_ACK), \ + tcp_state_name(TCP_LISTEN), \ + tcp_state_name(TCP_CLOSING), \ + tcp_state_name(TCP_NEW_SYN_RECV)) + /* * tcp event with arguments sk and skb * @@ -161,6 +177,66 @@ DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_ARGS(sk) ); +TRACE_EVENT(tcp_set_state, + + TP_PROTO(const struct sock *sk, const int oldstate, const int newstate), + + TP_ARGS(sk, oldstate, newstate), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(int, oldstate) + __field(int, newstate) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + ), + + TP_fast_assign( + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *pin6; + __be32 *p32; + + __entry->skaddr = sk; + __entry->oldstate = oldstate; + __entry->newstate = newstate; + + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->saddr_v6; + *pin6 = sk->sk_v6_rcv_saddr; + pin6 = (struct in6_addr *)__entry->daddr_v6; + *pin6 = sk->sk_v6_daddr; + } else +#endif + { + pin6 = (struct in6_addr *)__entry->saddr_v6; + ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); + pin6 = (struct in6_addr *)__entry->daddr_v6; + ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); + } + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6, + show_tcp_state_name(__entry->oldstate), + show_tcp_state_name(__entry->newstate)) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8b1fa4dd4538..be07e9b6dbdd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,8 @@ #include #include +#include + int sysctl_tcp_min_tso_segs __read_mostly = 2; int sysctl_tcp_autocorking __read_mostly = 1; @@ -2040,6 +2042,8 @@ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; + trace_tcp_set_state(sk, oldstate, state); + switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) From b66e907cfee240a09a4b5aabf950a0d4c8da8d32 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:05 -0700 Subject: [PATCH 1321/2177] tools: bpftool: copy JSON writer from iproute2 repository In prevision of following commits, supposed to add JSON output to the tool, two files are copied from the iproute2 repository (taken at commit 268a9eee985f): lib/json_writer.c and include/json_writer.h. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/json_writer.c | 348 ++++++++++++++++++++++++++++++++ tools/bpf/bpftool/json_writer.h | 70 +++++++ 2 files changed, 418 insertions(+) create mode 100644 tools/bpf/bpftool/json_writer.c create mode 100644 tools/bpf/bpftool/json_writer.h diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c new file mode 100644 index 000000000000..6b77d288cce2 --- /dev/null +++ b/tools/bpf/bpftool/json_writer.c @@ -0,0 +1,348 @@ +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Stephen Hemminger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "json_writer.h" + +struct json_writer { + FILE *out; /* output file */ + unsigned depth; /* nesting */ + bool pretty; /* optional whitepace */ + char sep; /* either nul or comma */ +}; + +/* indentation for pretty print */ +static void jsonw_indent(json_writer_t *self) +{ + unsigned i; + for (i = 0; i < self->depth; ++i) + fputs(" ", self->out); +} + +/* end current line and indent if pretty printing */ +static void jsonw_eol(json_writer_t *self) +{ + if (!self->pretty) + return; + + putc('\n', self->out); + jsonw_indent(self); +} + +/* If current object is not empty print a comma */ +static void jsonw_eor(json_writer_t *self) +{ + if (self->sep != '\0') + putc(self->sep, self->out); + self->sep = ','; +} + + +/* Output JSON encoded string */ +/* Handles C escapes, does not do Unicode */ +static void jsonw_puts(json_writer_t *self, const char *str) +{ + putc('"', self->out); + for (; *str; ++str) + switch (*str) { + case '\t': + fputs("\\t", self->out); + break; + case '\n': + fputs("\\n", self->out); + break; + case '\r': + fputs("\\r", self->out); + break; + case '\f': + fputs("\\f", self->out); + break; + case '\b': + fputs("\\b", self->out); + break; + case '\\': + fputs("\\n", self->out); + break; + case '"': + fputs("\\\"", self->out); + break; + case '\'': + fputs("\\\'", self->out); + break; + default: + putc(*str, self->out); + } + putc('"', self->out); +} + +/* Create a new JSON stream */ +json_writer_t *jsonw_new(FILE *f) +{ + json_writer_t *self = malloc(sizeof(*self)); + if (self) { + self->out = f; + self->depth = 0; + self->pretty = false; + self->sep = '\0'; + } + return self; +} + +/* End output to JSON stream */ +void jsonw_destroy(json_writer_t **self_p) +{ + json_writer_t *self = *self_p; + + assert(self->depth == 0); + fputs("\n", self->out); + fflush(self->out); + free(self); + *self_p = NULL; +} + +void jsonw_pretty(json_writer_t *self, bool on) +{ + self->pretty = on; +} + +/* Basic blocks */ +static void jsonw_begin(json_writer_t *self, int c) +{ + jsonw_eor(self); + putc(c, self->out); + ++self->depth; + self->sep = '\0'; +} + +static void jsonw_end(json_writer_t *self, int c) +{ + assert(self->depth > 0); + + --self->depth; + if (self->sep != '\0') + jsonw_eol(self); + putc(c, self->out); + self->sep = ','; +} + + +/* Add a JSON property name */ +void jsonw_name(json_writer_t *self, const char *name) +{ + jsonw_eor(self); + jsonw_eol(self); + self->sep = '\0'; + jsonw_puts(self, name); + putc(':', self->out); + if (self->pretty) + putc(' ', self->out); +} + +void jsonw_printf(json_writer_t *self, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + jsonw_eor(self); + vfprintf(self->out, fmt, ap); + va_end(ap); +} + +/* Collections */ +void jsonw_start_object(json_writer_t *self) +{ + jsonw_begin(self, '{'); +} + +void jsonw_end_object(json_writer_t *self) +{ + jsonw_end(self, '}'); +} + +void jsonw_start_array(json_writer_t *self) +{ + jsonw_begin(self, '['); +} + +void jsonw_end_array(json_writer_t *self) +{ + jsonw_end(self, ']'); +} + +/* JSON value types */ +void jsonw_string(json_writer_t *self, const char *value) +{ + jsonw_eor(self); + jsonw_puts(self, value); +} + +void jsonw_bool(json_writer_t *self, bool val) +{ + jsonw_printf(self, "%s", val ? "true" : "false"); +} + +void jsonw_null(json_writer_t *self) +{ + jsonw_printf(self, "null"); +} + +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num) +{ + jsonw_printf(self, fmt, num); +} + +#ifdef notused +void jsonw_float(json_writer_t *self, double num) +{ + jsonw_printf(self, "%g", num); +} +#endif + +void jsonw_hu(json_writer_t *self, unsigned short num) +{ + jsonw_printf(self, "%hu", num); +} + +void jsonw_uint(json_writer_t *self, uint64_t num) +{ + jsonw_printf(self, "%"PRIu64, num); +} + +void jsonw_lluint(json_writer_t *self, unsigned long long int num) +{ + jsonw_printf(self, "%llu", num); +} + +void jsonw_int(json_writer_t *self, int64_t num) +{ + jsonw_printf(self, "%"PRId64, num); +} + +/* Basic name/value objects */ +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val) +{ + jsonw_name(self, prop); + jsonw_string(self, val); +} + +void jsonw_bool_field(json_writer_t *self, const char *prop, bool val) +{ + jsonw_name(self, prop); + jsonw_bool(self, val); +} + +#ifdef notused +void jsonw_float_field(json_writer_t *self, const char *prop, double val) +{ + jsonw_name(self, prop); + jsonw_float(self, val); +} +#endif + +void jsonw_float_field_fmt(json_writer_t *self, + const char *prop, + const char *fmt, + double val) +{ + jsonw_name(self, prop); + jsonw_float_fmt(self, fmt, val); +} + +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) +{ + jsonw_name(self, prop); + jsonw_uint(self, num); +} + +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) +{ + jsonw_name(self, prop); + jsonw_hu(self, num); +} + +void jsonw_lluint_field(json_writer_t *self, + const char *prop, + unsigned long long int num) +{ + jsonw_name(self, prop); + jsonw_lluint(self, num); +} + +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num) +{ + jsonw_name(self, prop); + jsonw_int(self, num); +} + +void jsonw_null_field(json_writer_t *self, const char *prop) +{ + jsonw_name(self, prop); + jsonw_null(self); +} + +#ifdef TEST +int main(int argc, char **argv) +{ + json_writer_t *wr = jsonw_new(stdout); + + jsonw_start_object(wr); + jsonw_pretty(wr, true); + jsonw_name(wr, "Vyatta"); + jsonw_start_object(wr); + jsonw_string_field(wr, "url", "http://vyatta.com"); + jsonw_uint_field(wr, "downloads", 2000000ul); + jsonw_float_field(wr, "stock", 8.16); + + jsonw_name(wr, "ARGV"); + jsonw_start_array(wr); + while (--argc) + jsonw_string(wr, *++argv); + jsonw_end_array(wr); + + jsonw_name(wr, "empty"); + jsonw_start_array(wr); + jsonw_end_array(wr); + + jsonw_name(wr, "NIL"); + jsonw_start_object(wr); + jsonw_end_object(wr); + + jsonw_null_field(wr, "my_null"); + + jsonw_name(wr, "special chars"); + jsonw_start_array(wr); + jsonw_string_field(wr, "slash", "/"); + jsonw_string_field(wr, "newline", "\n"); + jsonw_string_field(wr, "tab", "\t"); + jsonw_string_field(wr, "ff", "\f"); + jsonw_string_field(wr, "quote", "\""); + jsonw_string_field(wr, "tick", "\'"); + jsonw_string_field(wr, "backslash", "\\"); + jsonw_end_array(wr); + + jsonw_end_object(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +#endif diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h new file mode 100644 index 000000000000..1516aafba59d --- /dev/null +++ b/tools/bpf/bpftool/json_writer.h @@ -0,0 +1,70 @@ +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Stephen Hemminger + */ + +#ifndef _JSON_WRITER_H_ +#define _JSON_WRITER_H_ + +#include +#include + +/* Opaque class structure */ +typedef struct json_writer json_writer_t; + +/* Create a new JSON stream */ +json_writer_t *jsonw_new(FILE *f); +/* End output to JSON stream */ +void jsonw_destroy(json_writer_t **self_p); + +/* Cause output to have pretty whitespace */ +void jsonw_pretty(json_writer_t *self, bool on); + +/* Add property name */ +void jsonw_name(json_writer_t *self, const char *name); + +/* Add value */ +void jsonw_printf(json_writer_t *self, const char *fmt, ...); +void jsonw_string(json_writer_t *self, const char *value); +void jsonw_bool(json_writer_t *self, bool value); +void jsonw_float(json_writer_t *self, double number); +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num); +void jsonw_uint(json_writer_t *self, uint64_t number); +void jsonw_hu(json_writer_t *self, unsigned short number); +void jsonw_int(json_writer_t *self, int64_t number); +void jsonw_null(json_writer_t *self); +void jsonw_lluint(json_writer_t *self, unsigned long long int num); + +/* Useful Combinations of name and value */ +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); +void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); +void jsonw_float_field(json_writer_t *self, const char *prop, double num); +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num); +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); +void jsonw_null_field(json_writer_t *self, const char *prop); +void jsonw_lluint_field(json_writer_t *self, const char *prop, + unsigned long long int num); +void jsonw_float_field_fmt(json_writer_t *self, const char *prop, + const char *fmt, double val); + +/* Collections */ +void jsonw_start_object(json_writer_t *self); +void jsonw_end_object(json_writer_t *self); + +void jsonw_start_array(json_writer_t *self); +void jsonw_end_array(json_writer_t *self); + +/* Override default exception handling */ +typedef void (jsonw_err_handler_fn)(const char *); + +#endif /* _JSON_WRITER_H_ */ From a2bc2e5c2c0604bf5366b5e56ef46335adaf7491 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:06 -0700 Subject: [PATCH 1322/2177] tools: bpftool: add option parsing to bpftool, --help and --version Add an option parsing facility to bpftool, in prevision of future options for demanding JSON output. Currently, two options are added: --help and --version, that act the same as the respective commands `help` and `version`. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- .../bpf/bpftool/Documentation/bpftool-map.rst | 8 ++++++ .../bpftool/Documentation/bpftool-prog.rst | 8 ++++++ tools/bpf/bpftool/Documentation/bpftool.rst | 8 ++++++ tools/bpf/bpftool/main.c | 27 ++++++++++++++++++- 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index ff63e89e4b6c..5210c4fab356 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -68,6 +68,14 @@ DESCRIPTION **bpftool map help** Print short help message. +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 69b3770370c8..6620a81d9dc9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -50,6 +50,14 @@ DESCRIPTION **bpftool prog help** Print short help message. +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 45ad8baf1915..9c04cd6677bd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -31,6 +31,14 @@ DESCRIPTION Note that format of the output of all tools is not guaranteed to be stable and should not be depended upon. +OPTIONS +======= + -h, --help + Print short help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 814d19e1b53f..613e3c75f78a 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -215,8 +216,32 @@ err_close: int main(int argc, char **argv) { + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { 0 } + }; + int opt; + + last_do_help = do_help; bin_name = argv[0]; - NEXT_ARG(); + + while ((opt = getopt_long(argc, argv, "Vh", + options, NULL)) >= 0) { + switch (opt) { + case 'V': + return do_version(argc, argv); + case 'h': + return do_help(argc, argv); + default: + usage(); + } + } + + argc -= optind; + argv += optind; + if (argc < 0) + usage(); bfd_init(); From d35efba99d9221d9fe1715a23247ad9b703544ec Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:07 -0700 Subject: [PATCH 1323/2177] tools: bpftool: introduce --json and --pretty options These two options can be used to ask for a JSON output (--j or -json), and to make this JSON human-readable (-p or --pretty). A json_writer object is created when JSON is required, and will be used in follow-up commits to produce JSON output. Note that --pretty implies --json. Update for the manual pages and interactive help messages comes in a later patch of the series. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/main.c | 33 ++++++++++++++++++++++++++++++--- tools/bpf/bpftool/main.h | 5 +++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 613e3c75f78a..14bfc17cd4de 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -51,6 +51,9 @@ const char *bin_name; static int last_argc; static char **last_argv; static int (*last_do_help)(int argc, char **argv); +json_writer_t *json_wtr; +bool pretty_output; +bool json_output; void usage(void) { @@ -217,22 +220,32 @@ err_close: int main(int argc, char **argv) { static const struct option options[] = { + { "json", no_argument, NULL, 'j' }, { "help", no_argument, NULL, 'h' }, + { "pretty", no_argument, NULL, 'p' }, { "version", no_argument, NULL, 'V' }, { 0 } }; - int opt; + int opt, ret; last_do_help = do_help; + pretty_output = false; + json_output = false; bin_name = argv[0]; - while ((opt = getopt_long(argc, argv, "Vh", + while ((opt = getopt_long(argc, argv, "Vhpj", options, NULL)) >= 0) { switch (opt) { case 'V': return do_version(argc, argv); case 'h': return do_help(argc, argv); + case 'p': + pretty_output = true; + /* fall through */ + case 'j': + json_output = true; + break; default: usage(); } @@ -243,7 +256,21 @@ int main(int argc, char **argv) if (argc < 0) usage(); + if (json_output) { + json_wtr = jsonw_new(stdout); + if (!json_wtr) { + err("failed to create JSON writer\n"); + return -1; + } + jsonw_pretty(json_wtr, pretty_output); + } + bfd_init(); - return cmd_select(cmds, argc, argv, do_help); + ret = cmd_select(cmds, argc, argv, do_help); + + if (json_output) + jsonw_destroy(&json_wtr); + + return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 41e6c7d3fcad..15927fc9fb31 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -43,6 +43,8 @@ #include #include +#include "json_writer.h" + #define err(msg...) fprintf(stderr, "Error: " msg) #define warn(msg...) fprintf(stderr, "Warning: " msg) #define info(msg...) fprintf(stderr, msg) @@ -66,6 +68,9 @@ enum bpf_obj_type { extern const char *bin_name; +extern json_writer_t *json_wtr; +extern bool json_output; + bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); From 743cc665d5f62d2c75eceb59c461e653ad6ea58c Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:08 -0700 Subject: [PATCH 1324/2177] tools: bpftool: add JSON output for `bpftool prog show *` command Reuse the json_writer API introduced in an earlier commit to make bpftool able to generate JSON output on `bpftool prog show *` commands. For readability, the code from show_prog() has been split into two functions, one for plain output, one for JSON. Outputs from sample programs have been successfully tested against a JSON validator. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 147 +++++++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 36 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 7838206a455b..f373f2baef5a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -195,51 +195,100 @@ static void show_prog_maps(int fd, u32 num_maps) if (err || !info.nr_map_ids) return; - printf(" map_ids "); - for (i = 0; i < info.nr_map_ids; i++) - printf("%u%s", map_ids[i], - i == info.nr_map_ids - 1 ? "" : ","); + if (json_output) { + jsonw_name(json_wtr, "map_ids"); + jsonw_start_array(json_wtr); + for (i = 0; i < info.nr_map_ids; i++) + jsonw_uint(json_wtr, map_ids[i]); + jsonw_end_array(json_wtr); + } else { + printf(" map_ids "); + for (i = 0; i < info.nr_map_ids; i++) + printf("%u%s", map_ids[i], + i == info.nr_map_ids - 1 ? "" : ","); + } } -static int show_prog(int fd) +static void print_prog_json(struct bpf_prog_info *info, int fd) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); char *memlock; - int err; - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - err("can't get prog info: %s\n", strerror(errno)); - return -1; - } - - printf("%u: ", info.id); - if (info.type < ARRAY_SIZE(prog_type_name)) - printf("%s ", prog_type_name[info.type]); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "id", info->id); + if (info->type < ARRAY_SIZE(prog_type_name)) + jsonw_string_field(json_wtr, "type", + prog_type_name[info->type]); else - printf("type %u ", info.type); + jsonw_uint_field(json_wtr, "type", info->type); - if (*info.name) - printf("name %s ", info.name); + if (*info->name) + jsonw_string_field(json_wtr, "name", info->name); - printf("tag "); - fprint_hex(stdout, info.tag, BPF_TAG_SIZE, ""); - printf("\n"); + jsonw_name(json_wtr, "tag"); + jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"", + info->tag[0], info->tag[1], info->tag[2], info->tag[3], + info->tag[4], info->tag[5], info->tag[6], info->tag[7]); - if (info.load_time) { + if (info->load_time) { char buf[32]; - print_boot_time(info.load_time, buf, sizeof(buf)); + print_boot_time(info->load_time, buf, sizeof(buf)); /* Piggy back on load_time, since 0 uid is a valid one */ - printf("\tloaded_at %s uid %u\n", buf, info.created_by_uid); + jsonw_string_field(json_wtr, "loaded_at", buf); + jsonw_uint_field(json_wtr, "uid", info->created_by_uid); } - printf("\txlated %uB", info.xlated_prog_len); + jsonw_uint_field(json_wtr, "bytes_xlated", info->xlated_prog_len); - if (info.jited_prog_len) - printf(" jited %uB", info.jited_prog_len); + if (info->jited_prog_len) { + jsonw_bool_field(json_wtr, "jited", true); + jsonw_uint_field(json_wtr, "bytes_jited", info->jited_prog_len); + } else { + jsonw_bool_field(json_wtr, "jited", false); + } + + memlock = get_fdinfo(fd, "memlock"); + if (memlock) + jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); + free(memlock); + + if (info->nr_map_ids) + show_prog_maps(fd, info->nr_map_ids); + + jsonw_end_object(json_wtr); +} + +static void print_prog_plain(struct bpf_prog_info *info, int fd) +{ + char *memlock; + + printf("%u: ", info->id); + if (info->type < ARRAY_SIZE(prog_type_name)) + printf("%s ", prog_type_name[info->type]); + else + printf("type %u ", info->type); + + if (*info->name) + printf("name %s ", info->name); + + printf("tag "); + fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + printf("\n"); + + if (info->load_time) { + char buf[32]; + + print_boot_time(info->load_time, buf, sizeof(buf)); + + /* Piggy back on load_time, since 0 uid is a valid one */ + printf("\tloaded_at %s uid %u\n", buf, info->created_by_uid); + } + + printf("\txlated %uB", info->xlated_prog_len); + + if (info->jited_prog_len) + printf(" jited %uB", info->jited_prog_len); else printf(" not jited"); @@ -248,16 +297,35 @@ static int show_prog(int fd) printf(" memlock %sB", memlock); free(memlock); - if (info.nr_map_ids) - show_prog_maps(fd, info.nr_map_ids); + if (info->nr_map_ids) + show_prog_maps(fd, info->nr_map_ids); printf("\n"); +} + +static int show_prog(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err("can't get prog info: %s\n", strerror(errno)); + return -1; + } + + if (json_output) + print_prog_json(&info, fd); + else + print_prog_plain(&info, fd); return 0; } static int do_show(int argc, char **argv) -{ __u32 id = 0; +{ + __u32 id = 0; int err; int fd; @@ -272,6 +340,8 @@ static int do_show(int argc, char **argv) if (argc) return BAD_ARG(); + if (json_output) + jsonw_start_array(json_wtr); while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { @@ -282,23 +352,28 @@ static int do_show(int argc, char **argv) err("can't get next program: %s\n", strerror(errno)); if (errno == EINVAL) err("kernel too old?\n"); - return -1; + err = -1; + break; } fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { err("can't get prog by id (%u): %s\n", id, strerror(errno)); - return -1; + err = -1; + break; } err = show_prog(fd); close(fd); if (err) - return err; + break; } - return 0; + if (json_output) + jsonw_end_array(json_wtr); + + return err; } static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) From 107f041212c1dfd3bf72b01c0d2013e98b6f32c2 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:09 -0700 Subject: [PATCH 1325/2177] tools: bpftool: add JSON output for `bpftool prog dump jited *` command Reuse the json_writer API introduced in an earlier commit to make bpftool able to generate JSON output on `bpftool prog show *` commands. A new printing function is created to be passed as an argument to the disassembler. Similarly to plain output, opcodes are printed on request. Outputs from sample programs have been successfully tested against a JSON validator. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/jit_disasm.c | 86 +++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 70e480b59e9d..5937e134e408 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -10,6 +10,7 @@ * Licensed under the GNU General Public License, version 2.0 (GPLv2) */ +#include #include #include #include @@ -21,6 +22,9 @@ #include #include +#include "json_writer.h" +#include "main.h" + static void get_exec_path(char *tpath, size_t size) { ssize_t len; @@ -39,6 +43,38 @@ static void get_exec_path(char *tpath, size_t size) free(path); } +static int oper_count; +static int fprintf_json(void *out, const char *fmt, ...) +{ + va_list ap; + char *s; + + va_start(ap, fmt); + if (!oper_count) { + int i; + + s = va_arg(ap, char *); + + /* Strip trailing spaces */ + i = strlen(s) - 1; + while (s[i] == ' ') + s[i--] = '\0'; + + jsonw_string_field(json_wtr, "operation", s); + jsonw_name(json_wtr, "operands"); + jsonw_start_array(json_wtr); + oper_count++; + } else if (!strcmp(fmt, ",")) { + /* Skip */ + } else { + s = va_arg(ap, char *); + jsonw_string(json_wtr, s); + oper_count++; + } + va_end(ap); + return 0; +} + void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) { disassembler_ftype disassemble; @@ -57,7 +93,12 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) assert(bfdf); assert(bfd_check_format(bfdf, bfd_object)); - init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + if (json_output) + init_disassemble_info(&info, stdout, + (fprintf_ftype) fprintf_json); + else + init_disassemble_info(&info, stdout, + (fprintf_ftype) fprintf); info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); info.buffer = image; @@ -68,20 +109,53 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) disassemble = disassembler(bfdf); assert(disassemble); + if (json_output) + jsonw_start_array(json_wtr); do { - printf("%4x:\t", pc); + if (json_output) { + jsonw_start_object(json_wtr); + oper_count = 0; + jsonw_name(json_wtr, "pc"); + jsonw_printf(json_wtr, "\"0x%x\"", pc); + } else { + printf("%4x:\t", pc); + } count = disassemble(pc, &info); + if (json_output) { + /* Operand array, was started in fprintf_json. Before + * that, make sure we have a _null_ value if no operand + * other than operation code was present. + */ + if (oper_count == 1) + jsonw_null(json_wtr); + jsonw_end_array(json_wtr); + } if (opcodes) { - printf("\n\t"); - for (i = 0; i < count; ++i) - printf("%02x ", (uint8_t) image[pc + i]); + if (json_output) { + jsonw_name(json_wtr, "opcodes"); + jsonw_start_array(json_wtr); + for (i = 0; i < count; ++i) + jsonw_printf(json_wtr, "\"0x%02hhx\"", + (uint8_t)image[pc + i]); + jsonw_end_array(json_wtr); + } else { + printf("\n\t"); + for (i = 0; i < count; ++i) + printf("%02x ", + (uint8_t)image[pc + i]); + } } - printf("\n"); + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); pc += count; } while (count > 0 && pc < len); + if (json_output) + jsonw_end_array(json_wtr); bfd_close(bfdf); } From f05e2c32f715985f54265b1e237b5cce1b576c71 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:10 -0700 Subject: [PATCH 1326/2177] tools: bpftool: add JSON output for `bpftool prog dump xlated *` command Add a new printing function to dump translated eBPF instructions as JSON. As for plain output, opcodes are printed only on request (when `opcodes` is provided on the command line). The disassembled output is generated by the same code that is used by the kernel verifier. Example output: $ bpftool --json --pretty prog dump xlated id 1 [{ "disasm": "(bf) r6 = r1" },{ "disasm": "(61) r7 = *(u32 *)(r6 +16)" },{ "disasm": "(95) exit" } ] $ bpftool --json --pretty prog dump xlated id 1 opcodes [{ "disasm": "(bf) r6 = r1", "opcodes": { "code": "0xbf", "src_reg": "0x1", "dst_reg": "0x6", "off": ["0x00","0x00" ], "imm": ["0x00","0x00","0x00","0x00" ] } },{ "disasm": "(61) r7 = *(u32 *)(r6 +16)", "opcodes": { "code": "0x61", "src_reg": "0x6", "dst_reg": "0x7", "off": ["0x10","0x00" ], "imm": ["0x00","0x00","0x00","0x00" ] } },{ "disasm": "(95) exit", "opcodes": { "code": "0x95", "src_reg": "0x0", "dst_reg": "0x0", "off": ["0x00","0x00" ], "imm": ["0x00","0x00","0x00","0x00" ] } } ] Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 10 +++++ tools/bpf/bpftool/json_writer.c | 8 ++++ tools/bpf/bpftool/json_writer.h | 2 + tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/prog.c | 70 ++++++++++++++++++++++++++++++++- 5 files changed, 89 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index df8396a0c400..e7a756b8ee21 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -214,3 +214,13 @@ char *get_fdinfo(int fd, const char *key) fclose(fdi); return NULL; } + +void print_hex_data_json(uint8_t *data, size_t len) +{ + unsigned int i; + + jsonw_start_array(json_wtr); + for (i = 0; i < len; i++) + jsonw_printf(json_wtr, "\"0x%02hhx\"", data[i]); + jsonw_end_array(json_wtr); +} diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index 6b77d288cce2..c6eef76322ae 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -156,6 +156,14 @@ void jsonw_name(json_writer_t *self, const char *name) putc(' ', self->out); } +void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +{ + jsonw_eor(self); + putc('"', self->out); + vfprintf(self->out, fmt, ap); + putc('"', self->out); +} + void jsonw_printf(json_writer_t *self, const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 1516aafba59d..0fa2fb1b6351 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -17,6 +17,7 @@ #include #include +#include /* Opaque class structure */ typedef struct json_writer json_writer_t; @@ -33,6 +34,7 @@ void jsonw_pretty(json_writer_t *self, bool on); void jsonw_name(json_writer_t *self, const char *name); /* Add value */ +void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap); void jsonw_printf(json_writer_t *self, const char *fmt, ...); void jsonw_string(json_writer_t *self, const char *value); void jsonw_bool(json_writer_t *self, bool value); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 15927fc9fb31..693fc9710be1 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -95,5 +95,6 @@ int do_map(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); +void print_hex_data_json(uint8_t *data, size_t len); #endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f373f2baef5a..43e49799a624 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -385,7 +385,7 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) { struct bpf_insn *insn = buf; bool double_insn = false; @@ -414,6 +414,69 @@ static void dump_xlated(void *buf, unsigned int len, bool opcodes) } } +static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) +{ + unsigned int l = strlen(fmt); + char chomped_fmt[l]; + va_list args; + + va_start(args, fmt); + if (l > 0) { + strncpy(chomped_fmt, fmt, l - 1); + chomped_fmt[l - 1] = '\0'; + } + jsonw_vprintf_enquote(json_wtr, chomped_fmt, args); + va_end(args); +} + +static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +{ + struct bpf_insn *insn = buf; + bool double_insn = false; + unsigned int i; + + jsonw_start_array(json_wtr); + for (i = 0; i < len / sizeof(*insn); i++) { + if (double_insn) { + double_insn = false; + continue; + } + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); + + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "disasm"); + print_bpf_insn(print_insn_json, NULL, insn + i, true); + + if (opcodes) { + jsonw_name(json_wtr, "opcodes"); + jsonw_start_object(json_wtr); + + jsonw_name(json_wtr, "code"); + jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code); + + jsonw_name(json_wtr, "src_reg"); + jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg); + + jsonw_name(json_wtr, "dst_reg"); + jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg); + + jsonw_name(json_wtr, "off"); + print_hex_data_json((uint8_t *)(&insn[i].off), 2); + + jsonw_name(json_wtr, "imm"); + if (double_insn && i < len - 1) + print_hex_data_json((uint8_t *)(&insn[i].imm), + 12); + else + print_hex_data_json((uint8_t *)(&insn[i].imm), + 4); + jsonw_end_object(json_wtr); + } + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; @@ -523,7 +586,10 @@ static int do_dump(int argc, char **argv) if (member_len == &info.jited_prog_len) disasm_print_insn(buf, *member_len, opcodes); else - dump_xlated(buf, *member_len, opcodes); + if (json_output) + dump_xlated_json(buf, *member_len, opcodes); + else + dump_xlated_plain(buf, *member_len, opcodes); } free(buf); From 831a0aafe5c39405150d47994fe0a5bcd78fbadc Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:11 -0700 Subject: [PATCH 1327/2177] tools: bpftool: add JSON output for `bpftool map *` commands Reuse the json_writer API introduced in an earlier commit to make bpftool able to generate JSON output on `bpftool map { show | dump | lookup | getnext }` commands. Remaining commands produce no output. Some functions have been spit into plain-output and JSON versions in order to remain readable. Outputs for sample maps have been successfully tested against a JSON validator. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/map.c | 149 ++++++++++++++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 20 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e1004d825392..14d89bfabc66 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -205,8 +205,45 @@ map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } -static void print_entry(struct bpf_map_info *info, unsigned char *key, - unsigned char *value) +static void print_entry_json(struct bpf_map_info *info, unsigned char *key, + unsigned char *value) +{ + jsonw_start_object(json_wtr); + + if (!map_is_per_cpu(info->type)) { + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, info->key_size); + jsonw_name(json_wtr, "value"); + print_hex_data_json(value, info->value_size); + } else { + unsigned int i, n; + + n = get_possible_cpus(); + + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, info->key_size); + + jsonw_name(json_wtr, "values"); + jsonw_start_array(json_wtr); + for (i = 0; i < n; i++) { + jsonw_start_object(json_wtr); + + jsonw_int_field(json_wtr, "cpu", i); + + jsonw_name(json_wtr, "value"); + print_hex_data_json(value + i * info->value_size, + info->value_size); + + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); + } + + jsonw_end_object(json_wtr); +} + +static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, + unsigned char *value) { if (!map_is_per_cpu(info->type)) { bool single_line, break_names; @@ -370,7 +407,41 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } -static int show_map_close(int fd, struct bpf_map_info *info) +static int show_map_close_json(int fd, struct bpf_map_info *info) +{ + char *memlock; + + memlock = get_fdinfo(fd, "memlock"); + close(fd); + + jsonw_start_object(json_wtr); + + jsonw_uint_field(json_wtr, "id", info->id); + if (info->type < ARRAY_SIZE(map_type_name)) + jsonw_string_field(json_wtr, "type", + map_type_name[info->type]); + else + jsonw_uint_field(json_wtr, "type", info->type); + + if (*info->name) + jsonw_string_field(json_wtr, "name", info->name); + + jsonw_name(json_wtr, "flags"); + jsonw_printf(json_wtr, "%#x", info->map_flags); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); + jsonw_uint_field(json_wtr, "bytes_value", info->value_size); + jsonw_uint_field(json_wtr, "max_entries", info->max_entries); + + if (memlock) + jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); + free(memlock); + + jsonw_end_object(json_wtr); + + return 0; +} + +static int show_map_close_plain(int fd, struct bpf_map_info *info) { char *memlock; @@ -412,12 +483,17 @@ static int do_show(int argc, char **argv) if (fd < 0) return -1; - return show_map_close(fd, &info); + if (json_output) + return show_map_close_json(fd, &info); + else + return show_map_close_plain(fd, &info); } if (argc) return BAD_ARG(); + if (json_output) + jsonw_start_array(json_wtr); while (true) { err = bpf_map_get_next_id(id, &id); if (err) { @@ -443,8 +519,13 @@ static int do_show(int argc, char **argv) return -1; } - show_map_close(fd, &info); + if (json_output) + show_map_close_json(fd, &info); + else + show_map_close_plain(fd, &info); } + if (json_output) + jsonw_end_array(json_wtr); return errno == ENOENT ? 0 : -1; } @@ -480,6 +561,8 @@ static int do_dump(int argc, char **argv) } prev_key = NULL; + if (json_output) + jsonw_start_array(json_wtr); while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -489,7 +572,10 @@ static int do_dump(int argc, char **argv) } if (!bpf_map_lookup_elem(fd, key, value)) { - print_entry(&info, key, value); + if (json_output) + print_entry_json(&info, key, value); + else + print_entry_plain(&info, key, value); } else { info("can't lookup element with key: "); fprint_hex(stderr, key, info.key_size, " "); @@ -500,7 +586,11 @@ static int do_dump(int argc, char **argv) num_elems++; } - printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); + if (json_output) + jsonw_end_array(json_wtr); + else + printf("Found %u element%s\n", num_elems, + num_elems != 1 ? "s" : ""); exit_free: free(key); @@ -584,11 +674,18 @@ static int do_lookup(int argc, char **argv) err = bpf_map_lookup_elem(fd, key, value); if (!err) { - print_entry(&info, key, value); + if (json_output) + print_entry_json(&info, key, value); + else + print_entry_plain(&info, key, value); } else if (errno == ENOENT) { - printf("key:\n"); - fprint_hex(stdout, key, info.key_size, " "); - printf("\n\nNot found\n"); + if (json_output) { + jsonw_null(json_wtr); + } else { + printf("key:\n"); + fprint_hex(stdout, key, info.key_size, " "); + printf("\n\nNot found\n"); + } } else { err("lookup failed: %s\n", strerror(errno)); } @@ -640,18 +737,30 @@ static int do_getnext(int argc, char **argv) goto exit_free; } - if (key) { - printf("key:\n"); - fprint_hex(stdout, key, info.key_size, " "); - printf("\n"); + if (json_output) { + jsonw_start_object(json_wtr); + if (key) { + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, info.key_size); + } else { + jsonw_null_field(json_wtr, "key"); + } + jsonw_name(json_wtr, "next_key"); + print_hex_data_json(nextkey, info.key_size); + jsonw_end_object(json_wtr); } else { - printf("key: None\n"); + if (key) { + printf("key:\n"); + fprint_hex(stdout, key, info.key_size, " "); + printf("\n"); + } else { + printf("key: None\n"); + } + printf("next key:\n"); + fprint_hex(stdout, nextkey, info.key_size, " "); + printf("\n"); } - printf("next key:\n"); - fprint_hex(stdout, nextkey, info.key_size, " "); - printf("\n"); - exit_free: free(nextkey); free(key); From 3aaca6bf7a09150e4c87f2932dc8ebe82a586252 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:12 -0700 Subject: [PATCH 1328/2177] tools: bpftool: add JSON output for `bpftool batch file FILE` command `bpftool batch file FILE` takes FILE as an argument and executes all the bpftool commands it finds inside (or stops if an error occurs). To obtain a consistent JSON output, create a root JSON array, then for each command create a new object containing two fields: one with the command arguments, the other with the output (which is the JSON object that the command would have produced, if called on its own). Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 14bfc17cd4de..71b01bf73912 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -155,6 +155,7 @@ static int do_batch(int argc, char **argv) int n_argc; FILE *fp; int err; + int i; if (argc < 2) { err("too few parameters for batch\n"); @@ -174,6 +175,8 @@ static int do_batch(int argc, char **argv) return -1; } + if (json_output) + jsonw_start_array(json_wtr); while (fgets(buf, sizeof(buf), fp)) { if (strlen(buf) == sizeof(buf) - 1) { errno = E2BIG; @@ -197,7 +200,21 @@ static int do_batch(int argc, char **argv) if (!n_argc) continue; + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "command"); + jsonw_start_array(json_wtr); + for (i = 0; i < n_argc; i++) + jsonw_string(json_wtr, n_argv[i]); + jsonw_end_array(json_wtr); + jsonw_name(json_wtr, "output"); + } + err = cmd_select(cmds, n_argc, n_argv, do_help); + + if (json_output) + jsonw_end_object(json_wtr); + if (err) goto err_close; @@ -214,6 +231,9 @@ static int do_batch(int argc, char **argv) err_close: fclose(fp); + if (json_output) + jsonw_end_array(json_wtr); + return err; } From 9a5ab8bf1d6d16ef47fdf55dba1683ec00d751ad Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:13 -0700 Subject: [PATCH 1329/2177] tools: bpftool: turn err() and info() macros into functions Turn err() and info() macros into functions. In order to avoid naming conflicts with variables in the code, rename them as p_err() and p_info() respectively. The behavior of these functions is similar to the one of the macros for plain output. However, when JSON output is requested, these macros return a JSON-formatted "error" object instead of printing a message to stderr. To handle error messages correctly with JSON, a modification was brought to their behavior nonetheless: the functions now append a end-of-line character at the end of the message. This way, we can remove end-of-line characters at the end of the argument strings, and not have them in the JSON output. All error messages are formatted to hold in a single call to p_err(), in order to produce a single JSON field. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 26 ++++++------ tools/bpf/bpftool/main.c | 16 +++---- tools/bpf/bpftool/main.h | 37 +++++++++++++--- tools/bpf/bpftool/map.c | 87 +++++++++++++++++++++----------------- tools/bpf/bpftool/prog.c | 51 +++++++++++----------- 5 files changed, 126 insertions(+), 91 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index e7a756b8ee21..b2533f1cae3e 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -66,8 +66,8 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) fd = bpf_obj_get(path); if (fd < 0) { - err("bpf obj get (%s): %s\n", path, - errno == EACCES && !is_bpffs(dirname(path)) ? + p_err("bpf obj get (%s): %s", path, + errno == EACCES && !is_bpffs(dirname(path)) ? "directory not in bpf file system (bpffs)" : strerror(errno)); return -1; @@ -79,7 +79,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return type; } if (type != exp_type) { - err("incorrect object type: %s\n", get_fd_type_name(type)); + p_err("incorrect object type: %s", get_fd_type_name(type)); close(fd); return -1; } @@ -95,14 +95,14 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) int fd; if (!is_prefix(*argv, "id")) { - err("expected 'id' got %s\n", *argv); + p_err("expected 'id' got %s", *argv); return -1; } NEXT_ARG(); id = strtoul(*argv, &endptr, 0); if (*endptr) { - err("can't parse %s as ID\n", *argv); + p_err("can't parse %s as ID", *argv); return -1; } NEXT_ARG(); @@ -112,15 +112,15 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) fd = get_fd_by_id(id); if (fd < 0) { - err("can't get prog by id (%u): %s\n", id, strerror(errno)); + p_err("can't get prog by id (%u): %s", id, strerror(errno)); return -1; } err = bpf_obj_pin(fd, *argv); close(fd); if (err) { - err("can't pin the object (%s): %s\n", *argv, - errno == EACCES && !is_bpffs(dirname(*argv)) ? + p_err("can't pin the object (%s): %s", *argv, + errno == EACCES && !is_bpffs(dirname(*argv)) ? "directory not in bpf file system (bpffs)" : strerror(errno)); return -1; @@ -153,11 +153,11 @@ int get_fd_type(int fd) n = readlink(path, buf, sizeof(buf)); if (n < 0) { - err("can't read link type: %s\n", strerror(errno)); + p_err("can't read link type: %s", strerror(errno)); return -1; } if (n == sizeof(path)) { - err("can't read link type: path too long!\n"); + p_err("can't read link type: path too long!"); return -1; } @@ -181,7 +181,7 @@ char *get_fdinfo(int fd, const char *key) fdi = fopen(path, "r"); if (!fdi) { - err("can't open fdinfo: %s\n", strerror(errno)); + p_err("can't open fdinfo: %s", strerror(errno)); return NULL; } @@ -196,7 +196,7 @@ char *get_fdinfo(int fd, const char *key) value = strchr(line, '\t'); if (!value || !value[1]) { - err("malformed fdinfo!?\n"); + p_err("malformed fdinfo!?"); free(line); return NULL; } @@ -209,7 +209,7 @@ char *get_fdinfo(int fd, const char *key) return line; } - err("key '%s' not found in fdinfo\n", key); + p_err("key '%s' not found in fdinfo", key); free(line); fclose(fdi); return NULL; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 71b01bf73912..9989a77fdc4a 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -158,20 +158,20 @@ static int do_batch(int argc, char **argv) int i; if (argc < 2) { - err("too few parameters for batch\n"); + p_err("too few parameters for batch"); return -1; } else if (!is_prefix(*argv, "file")) { - err("expected 'file', got: %s\n", *argv); + p_err("expected 'file', got: %s", *argv); return -1; } else if (argc > 2) { - err("too many parameters for batch\n"); + p_err("too many parameters for batch"); return -1; } NEXT_ARG(); fp = fopen(*argv, "r"); if (!fp) { - err("Can't open file (%s): %s\n", *argv, strerror(errno)); + p_err("Can't open file (%s): %s", *argv, strerror(errno)); return -1; } @@ -189,8 +189,8 @@ static int do_batch(int argc, char **argv) while (n_argv[n_argc]) { n_argc++; if (n_argc == ARRAY_SIZE(n_argv)) { - err("line %d has too many arguments, skip\n", - lines); + p_err("line %d has too many arguments, skip", + lines); n_argc = 0; break; } @@ -225,7 +225,7 @@ static int do_batch(int argc, char **argv) perror("reading batch file failed"); err = -1; } else { - info("processed %d lines\n", lines); + p_info("processed %d lines", lines); err = 0; } err_close: @@ -279,7 +279,7 @@ int main(int argc, char **argv) if (json_output) { json_wtr = jsonw_new(stdout); if (!json_wtr) { - err("failed to create JSON writer\n"); + p_err("failed to create JSON writer"); return -1; } jsonw_pretty(json_wtr, pretty_output); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 693fc9710be1..04c88b55d8c7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -45,15 +45,11 @@ #include "json_writer.h" -#define err(msg...) fprintf(stderr, "Error: " msg) -#define warn(msg...) fprintf(stderr, "Warning: " msg) -#define info(msg...) fprintf(stderr, msg) - #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) -#define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; }) +#define BAD_ARG() ({ p_err("what is '%s'?\n", *argv); -1; }) #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" @@ -97,4 +93,35 @@ int prog_parse_fd(int *argc, char ***argv); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); void print_hex_data_json(uint8_t *data, size_t len); +static inline void p_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "error"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + +static inline void p_info(const char *fmt, ...) +{ + va_list ap; + + if (json_output) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 14d89bfabc66..86c128c433ba 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -81,19 +81,19 @@ static unsigned int get_possible_cpus(void) fd = open("/sys/devices/system/cpu/possible", O_RDONLY); if (fd < 0) { - err("can't open sysfs possible cpus\n"); + p_err("can't open sysfs possible cpus"); exit(-1); } n = read(fd, buf, sizeof(buf)); if (n < 2) { - err("can't read sysfs possible cpus\n"); + p_err("can't read sysfs possible cpus"); exit(-1); } close(fd); if (n == sizeof(buf)) { - err("read sysfs possible cpus overflow\n"); + p_err("read sysfs possible cpus overflow"); exit(-1); } @@ -161,14 +161,14 @@ static int map_parse_fd(int *argc, char ***argv) id = strtoul(**argv, &endptr, 0); if (*endptr) { - err("can't parse %s as ID\n", **argv); + p_err("can't parse %s as ID", **argv); return -1; } NEXT_ARGP(); fd = bpf_map_get_fd_by_id(id); if (fd < 0) - err("get map by id (%u): %s\n", id, strerror(errno)); + p_err("get map by id (%u): %s", id, strerror(errno)); return fd; } else if (is_prefix(**argv, "pinned")) { char *path; @@ -181,7 +181,7 @@ static int map_parse_fd(int *argc, char ***argv) return open_obj_pinned_any(path, BPF_OBJ_MAP); } - err("expected 'id' or 'pinned', got: '%s'?\n", **argv); + p_err("expected 'id' or 'pinned', got: '%s'?", **argv); return -1; } @@ -197,7 +197,7 @@ map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) err = bpf_obj_get_info_by_fd(fd, info, info_len); if (err) { - err("can't get map info: %s\n", strerror(errno)); + p_err("can't get map info: %s", strerror(errno)); close(fd); return err; } @@ -288,14 +288,14 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, while (i < n && argv[i]) { val[i] = strtoul(argv[i], &endptr, 0); if (*endptr) { - err("error parsing byte: %s\n", argv[i]); + p_err("error parsing byte: %s", argv[i]); return NULL; } i++; } if (i != n) { - err("%s expected %d bytes got %d\n", name, n, i); + p_err("%s expected %d bytes got %d", name, n, i); return NULL; } @@ -309,16 +309,16 @@ static int parse_elem(char **argv, struct bpf_map_info *info, if (!*argv) { if (!key && !value) return 0; - err("did not find %s\n", key ? "key" : "value"); + p_err("did not find %s", key ? "key" : "value"); return -1; } if (is_prefix(*argv, "key")) { if (!key) { if (key_size) - err("duplicate key\n"); + p_err("duplicate key"); else - err("unnecessary key\n"); + p_err("unnecessary key"); return -1; } @@ -333,9 +333,9 @@ static int parse_elem(char **argv, struct bpf_map_info *info, if (!value) { if (value_size) - err("duplicate value\n"); + p_err("duplicate value"); else - err("unnecessary value\n"); + p_err("unnecessary value"); return -1; } @@ -345,11 +345,11 @@ static int parse_elem(char **argv, struct bpf_map_info *info, int argc = 2; if (value_size != 4) { - err("value smaller than 4B for map in map?\n"); + p_err("value smaller than 4B for map in map?"); return -1; } if (!argv[0] || !argv[1]) { - err("not enough value arguments for map in map\n"); + p_err("not enough value arguments for map in map"); return -1; } @@ -363,11 +363,11 @@ static int parse_elem(char **argv, struct bpf_map_info *info, int argc = 2; if (value_size != 4) { - err("value smaller than 4B for map of progs?\n"); + p_err("value smaller than 4B for map of progs?"); return -1; } if (!argv[0] || !argv[1]) { - err("not enough value arguments for map of progs\n"); + p_err("not enough value arguments for map of progs"); return -1; } @@ -388,7 +388,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || is_prefix(*argv, "exist")) { if (!flags) { - err("flags specified multiple times: %s\n", *argv); + p_err("flags specified multiple times: %s", *argv); return -1; } @@ -403,7 +403,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, value_size, NULL, value_fd); } - err("expected key or value, got: %s\n", *argv); + p_err("expected key or value, got: %s", *argv); return -1; } @@ -499,22 +499,21 @@ static int do_show(int argc, char **argv) if (err) { if (errno == ENOENT) break; - err("can't get next map: %s\n", strerror(errno)); - if (errno == EINVAL) - err("kernel too old?\n"); + p_err("can't get next map: %s%s", strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); return -1; } fd = bpf_map_get_fd_by_id(id); if (fd < 0) { - err("can't get map by id (%u): %s\n", - id, strerror(errno)); + p_err("can't get map by id (%u): %s", + id, strerror(errno)); return -1; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { - err("can't get map info: %s\n", strerror(errno)); + p_err("can't get map info: %s", strerror(errno)); close(fd); return -1; } @@ -547,7 +546,7 @@ static int do_dump(int argc, char **argv) return -1; if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { - err("Dumping maps of maps and program maps not supported\n"); + p_err("Dumping maps of maps and program maps not supported"); close(fd); return -1; } @@ -555,7 +554,7 @@ static int do_dump(int argc, char **argv) key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { - err("mem alloc failed\n"); + p_err("mem alloc failed"); err = -1; goto exit_free; } @@ -577,9 +576,19 @@ static int do_dump(int argc, char **argv) else print_entry_plain(&info, key, value); } else { - info("can't lookup element with key: "); - fprint_hex(stderr, key, info.key_size, " "); - fprintf(stderr, "\n"); + if (json_output) { + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, info.key_size); + jsonw_name(json_wtr, "value"); + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "error", + "can't lookup element"); + jsonw_end_object(json_wtr); + } else { + p_info("can't lookup element with key: "); + fprint_hex(stderr, key, info.key_size, " "); + fprintf(stderr, "\n"); + } } prev_key = key; @@ -619,7 +628,7 @@ static int do_update(int argc, char **argv) key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { - err("mem alloc failed"); + p_err("mem alloc failed"); err = -1; goto exit_free; } @@ -631,7 +640,7 @@ static int do_update(int argc, char **argv) err = bpf_map_update_elem(fd, key, value, flags); if (err) { - err("update failed: %s\n", strerror(errno)); + p_err("update failed: %s", strerror(errno)); goto exit_free; } @@ -663,7 +672,7 @@ static int do_lookup(int argc, char **argv) key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { - err("mem alloc failed"); + p_err("mem alloc failed"); err = -1; goto exit_free; } @@ -687,7 +696,7 @@ static int do_lookup(int argc, char **argv) printf("\n\nNot found\n"); } } else { - err("lookup failed: %s\n", strerror(errno)); + p_err("lookup failed: %s", strerror(errno)); } exit_free: @@ -716,7 +725,7 @@ static int do_getnext(int argc, char **argv) key = malloc(info.key_size); nextkey = malloc(info.key_size); if (!key || !nextkey) { - err("mem alloc failed"); + p_err("mem alloc failed"); err = -1; goto exit_free; } @@ -733,7 +742,7 @@ static int do_getnext(int argc, char **argv) err = bpf_map_get_next_key(fd, key, nextkey); if (err) { - err("can't get next key: %s\n", strerror(errno)); + p_err("can't get next key: %s", strerror(errno)); goto exit_free; } @@ -786,7 +795,7 @@ static int do_delete(int argc, char **argv) key = malloc(info.key_size); if (!key) { - err("mem alloc failed"); + p_err("mem alloc failed"); err = -1; goto exit_free; } @@ -797,7 +806,7 @@ static int do_delete(int argc, char **argv) err = bpf_map_delete_elem(fd, key); if (err) - err("delete failed: %s\n", strerror(errno)); + p_err("delete failed: %s", strerror(errno)); exit_free: free(key); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 43e49799a624..41bd5390b4fc 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -104,21 +104,21 @@ static int prog_fd_by_tag(unsigned char *tag) while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { - err("%s\n", strerror(errno)); + p_err("%s", strerror(errno)); return -1; } fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { - err("can't get prog by id (%u): %s\n", - id, strerror(errno)); + p_err("can't get prog by id (%u): %s", + id, strerror(errno)); return -1; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { - err("can't get prog info (%u): %s\n", - id, strerror(errno)); + p_err("can't get prog info (%u): %s", + id, strerror(errno)); close(fd); return -1; } @@ -142,14 +142,14 @@ int prog_parse_fd(int *argc, char ***argv) id = strtoul(**argv, &endptr, 0); if (*endptr) { - err("can't parse %s as ID\n", **argv); + p_err("can't parse %s as ID", **argv); return -1; } NEXT_ARGP(); fd = bpf_prog_get_fd_by_id(id); if (fd < 0) - err("get by id (%u): %s\n", id, strerror(errno)); + p_err("get by id (%u): %s", id, strerror(errno)); return fd; } else if (is_prefix(**argv, "tag")) { unsigned char tag[BPF_TAG_SIZE]; @@ -159,7 +159,7 @@ int prog_parse_fd(int *argc, char ***argv) if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) != BPF_TAG_SIZE) { - err("can't parse tag\n"); + p_err("can't parse tag"); return -1; } NEXT_ARGP(); @@ -176,7 +176,7 @@ int prog_parse_fd(int *argc, char ***argv) return open_obj_pinned_any(path, BPF_OBJ_PROG); } - err("expected 'id', 'tag' or 'pinned', got: '%s'?\n", **argv); + p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv); return -1; } @@ -311,7 +311,7 @@ static int show_prog(int fd) err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { - err("can't get prog info: %s\n", strerror(errno)); + p_err("can't get prog info: %s", strerror(errno)); return -1; } @@ -349,17 +349,16 @@ static int do_show(int argc, char **argv) err = 0; break; } - err("can't get next program: %s\n", strerror(errno)); - if (errno == EINVAL) - err("kernel too old?\n"); + p_err("can't get next program: %s%s", strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); err = -1; break; } fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { - err("can't get prog by id (%u): %s\n", - id, strerror(errno)); + p_err("can't get prog by id (%u): %s", + id, strerror(errno)); err = -1; break; } @@ -498,7 +497,7 @@ static int do_dump(int argc, char **argv) member_len = &info.xlated_prog_len; member_ptr = &info.xlated_prog_insns; } else { - err("expected 'xlated' or 'jited', got: %s\n", *argv); + p_err("expected 'xlated' or 'jited', got: %s", *argv); return -1; } NEXT_ARG(); @@ -513,7 +512,7 @@ static int do_dump(int argc, char **argv) if (is_prefix(*argv, "file")) { NEXT_ARG(); if (!argc) { - err("expected file path\n"); + p_err("expected file path"); return -1; } @@ -531,12 +530,12 @@ static int do_dump(int argc, char **argv) err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { - err("can't get prog info: %s\n", strerror(errno)); + p_err("can't get prog info: %s", strerror(errno)); return -1; } if (!*member_len) { - info("no instructions returned\n"); + p_info("no instructions returned"); close(fd); return 0; } @@ -545,7 +544,7 @@ static int do_dump(int argc, char **argv) buf = malloc(buf_size); if (!buf) { - err("mem alloc failed\n"); + p_err("mem alloc failed"); close(fd); return -1; } @@ -558,28 +557,28 @@ static int do_dump(int argc, char **argv) err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); if (err) { - err("can't get prog info: %s\n", strerror(errno)); + p_err("can't get prog info: %s", strerror(errno)); goto err_free; } if (*member_len > buf_size) { - err("too many instructions returned\n"); + p_err("too many instructions returned"); goto err_free; } if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { - err("can't open file %s: %s\n", filepath, - strerror(errno)); + p_err("can't open file %s: %s", filepath, + strerror(errno)); goto err_free; } n = write(fd, buf, *member_len); close(fd); if (n != *member_len) { - err("error writing output file: %s\n", - n < 0 ? strerror(errno) : "short write"); + p_err("error writing output file: %s", + n < 0 ? strerror(errno) : "short write"); goto err_free; } } else { From 004b45c0e51a8b6f20320181a946ba2d1bd3548b Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:14 -0700 Subject: [PATCH 1330/2177] tools: bpftool: provide JSON output for all possible commands As all commands can now return JSON output (possibly just a "null" value), output of `bpftool --json batch file FILE` should also be fully JSON compliant. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/bpf/bpftool/main.c | 25 +++++++++++++++++++++---- tools/bpf/bpftool/map.c | 16 +++++++++++++++- tools/bpf/bpftool/prog.c | 12 +++++++++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 9989a77fdc4a..55ba0a04c102 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -64,6 +64,11 @@ void usage(void) static int do_help(int argc, char **argv) { + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + fprintf(stderr, "Usage: %s OBJECT { COMMAND | help }\n" " %s batch file FILE\n" @@ -77,10 +82,22 @@ static int do_help(int argc, char **argv) static int do_version(int argc, char **argv) { - printf("%s v%d.%d.%d\n", bin_name, - LINUX_VERSION_CODE >> 16, - LINUX_VERSION_CODE >> 8 & 0xf, - LINUX_VERSION_CODE & 0xf); + unsigned int version[3]; + + version[0] = LINUX_VERSION_CODE >> 16; + version[1] = LINUX_VERSION_CODE >> 8 & 0xf; + version[2] = LINUX_VERSION_CODE & 0xf; + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "version"); + jsonw_printf(json_wtr, "\"%u.%u.%u\"", + version[0], version[1], version[2]); + jsonw_end_object(json_wtr); + } else { + printf("%s v%u.%u.%u\n", bin_name, + version[0], version[1], version[2]); + } return 0; } diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 86c128c433ba..a611f31f574f 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -651,6 +651,8 @@ exit_free: free(value); close(fd); + if (!err && json_output) + jsonw_null(json_wtr); return err; } @@ -812,16 +814,28 @@ exit_free: free(key); close(fd); + if (!err && json_output) + jsonw_null(json_wtr); return err; } static int do_pin(int argc, char **argv) { - return do_pin_any(argc, argv, bpf_map_get_fd_by_id); + int err; + + err = do_pin_any(argc, argv, bpf_map_get_fd_by_id); + if (!err && json_output) + jsonw_null(json_wtr); + return err; } static int do_help(int argc, char **argv) { + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + fprintf(stderr, "Usage: %s %s show [MAP]\n" " %s %s dump MAP\n" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 41bd5390b4fc..e07f35ff80d1 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -602,11 +602,21 @@ err_free: static int do_pin(int argc, char **argv) { - return do_pin_any(argc, argv, bpf_prog_get_fd_by_id); + int err; + + err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id); + if (!err && json_output) + jsonw_null(json_wtr); + return err; } static int do_help(int argc, char **argv) { + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + fprintf(stderr, "Usage: %s %s show [PROG]\n" " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" From 47ff7ac6d706c6f08153294c87287aeec183b211 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:15 -0700 Subject: [PATCH 1331/2177] tools: bpftool: add cosmetic changes for the manual pages Make the look-and-feel of the manual pages somewhat closer to other manual pages, such as the ones from the utilities from iproute2, by highlighting more keywords. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- .../bpf/bpftool/Documentation/bpftool-map.rst | 25 ++++++++++--------- .../bpftool/Documentation/bpftool-prog.rst | 12 ++++----- tools/bpf/bpftool/Documentation/bpftool.rst | 12 +++++---- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 5210c4fab356..3954b3ea4f26 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -13,23 +13,24 @@ SYNOPSIS **bpftool** **map** *COMMAND* *COMMANDS* := - { show | dump | update | lookup | getnext | delete | pin | help } + { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + | **pin** | **help** } MAP COMMANDS ============= -| **bpftool** map show [*MAP*] -| **bpftool** map dump *MAP* -| **bpftool** map update *MAP* key *BYTES* value *VALUE* [*UPDATE_FLAGS*] -| **bpftool** map lookup *MAP* key *BYTES* -| **bpftool** map getnext *MAP* [key *BYTES*] -| **bpftool** map delete *MAP* key *BYTES* -| **bpftool** map pin *MAP* *FILE* -| **bpftool** map help +| **bpftool** **map show** [*MAP*] +| **bpftool** **map dump** *MAP* +| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] +| **bpftool** **map lookup** *MAP* **key** *BYTES* +| **bpftool** **map getnext** *MAP* [**key** *BYTES*] +| **bpftool** **map delete** *MAP* **key** *BYTES* +| **bpftool** **map pin** *MAP* *FILE* +| **bpftool** **map help** | -| *MAP* := { id MAP_ID | pinned FILE } -| *VALUE* := { BYTES | MAP | PROGRAM } -| *UPDATE_FLAGS* := { any | exist | noexist } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *VALUE* := { *BYTES* | *MAP* | *PROGRAM* } +| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 6620a81d9dc9..685a19e71fec 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -10,13 +10,13 @@ tool for inspection and simple manipulation of eBPF progs SYNOPSIS ======== -| **bpftool** prog show [*PROG*] -| **bpftool** prog dump xlated *PROG* [{file *FILE* | opcodes }] -| **bpftool** prog dump jited *PROG* [{file *FILE* | opcodes }] -| **bpftool** prog pin *PROG* *FILE* -| **bpftool** prog help +| **bpftool** **prog show** [*PROG*] +| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] +| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] +| **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog help** | -| *PROG* := { id *PROG_ID* | pinned *FILE* | tag *PROG_TAG* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 9c04cd6677bd..44e07799d54d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -10,18 +10,20 @@ tool for inspection and simple manipulation of eBPF programs and maps SYNOPSIS ======== - **bpftool** *OBJECT* { *COMMAND* | help } + **bpftool** *OBJECT* { *COMMAND* | **help** } - **bpftool** batch file *FILE* + **bpftool** **batch file** *FILE* - **bpftool** version + **bpftool** **version** *OBJECT* := { **map** | **program** } *MAP-COMMANDS* := - { show | dump | update | lookup | getnext | delete | pin | help } + { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + | **pin** | **help** } - *PROG-COMMANDS* := { show | dump jited | dump xlated | pin | help } + *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** + | **help** } DESCRIPTION =========== From 0641c3c890d480abeb237b92a5ee4b99a22319c6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 23 Oct 2017 09:24:16 -0700 Subject: [PATCH 1332/2177] tools: bpftool: update documentation for --json and --pretty usage Update the documentation to provide help about JSON output generation, and add an example in bpftool-prog manual page. Also reintroduce an example that was left aside when the tool was moved from GitHub to the kernel sources, in order to show how to mount the bpffs file system (to pin programs) inside the bpftool-prog manual page. Signed-off-by: Quentin Monnet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- .../bpf/bpftool/Documentation/bpftool-map.rst | 11 +++- .../bpftool/Documentation/bpftool-prog.rst | 61 ++++++++++++++++++- tools/bpf/bpftool/Documentation/bpftool.rst | 12 +++- tools/bpf/bpftool/main.c | 6 +- tools/bpf/bpftool/main.h | 2 + tools/bpf/bpftool/map.c | 1 + tools/bpf/bpftool/prog.c | 1 + 7 files changed, 88 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3954b3ea4f26..abb9ee940b15 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -10,7 +10,9 @@ tool for inspection and simple manipulation of eBPF maps SYNOPSIS ======== - **bpftool** **map** *COMMAND* + **bpftool** [*OPTIONS*] **map** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } *COMMANDS* := { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** @@ -77,6 +79,13 @@ OPTIONS -v, --version Print version number (similar to **bpftool version**). + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 685a19e71fec..0f25d3c39e05 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -10,6 +10,16 @@ tool for inspection and simple manipulation of eBPF progs SYNOPSIS ======== + **bpftool** [*OPTIONS*] **prog** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **dump xlated** | **dump jited** | **pin** | **help** } + +MAP COMMANDS +============= + | **bpftool** **prog show** [*PROG*] | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] @@ -58,6 +68,13 @@ OPTIONS -v, --version Print version number (similar to **bpftool version**). + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + EXAMPLES ======== **# bpftool prog show** @@ -67,13 +84,33 @@ EXAMPLES loaded_at Sep 29/20:11 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 +**# bpftool --json --pretty prog show** + +:: + + { + "programs": [{ + "id": 10, + "type": "xdp", + "tag": "005a3d2123620c8b", + "loaded_at": "Sep 29/20:11", + "uid": 0, + "bytes_xlated": 528, + "jited": true, + "bytes_jited": 370, + "bytes_memlock": 4096, + "map_ids": [10 + ] + } + ] + } + | | **# bpftool prog dump xlated id 10 file /tmp/t** | **# ls -l /tmp/t** | -rw------- 1 root root 560 Jul 22 01:42 /tmp/t -| -| **# bpftool prog dum jited pinned /sys/fs/bpf/prog** +**# bpftool prog dum jited tag 005a3d2123620c8b** :: @@ -83,6 +120,26 @@ EXAMPLES sub $0x28,%rbp mov %rbx,0x0(%rbp) +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# bpftool prog pin id 10 /sys/fs/bpf/prog** +| **# ls -l /sys/fs/bpf/** +| -rw------- 1 root root 0 Jul 22 01:43 prog + +**# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** + +:: + + push %rbp + 55 + mov %rsp,%rbp + 48 89 e5 + sub $0x228,%rsp + 48 81 ec 28 02 00 00 + sub $0x28,%rbp + 48 83 ed 28 + mov %rbx,0x0(%rbp) + 48 89 5d 00 SEE ALSO diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 44e07799d54d..926c03d5a8da 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -10,7 +10,7 @@ tool for inspection and simple manipulation of eBPF programs and maps SYNOPSIS ======== - **bpftool** *OBJECT* { *COMMAND* | **help** } + **bpftool** [*OPTIONS*] *OBJECT* { *COMMAND* | **help** } **bpftool** **batch file** *FILE* @@ -18,6 +18,9 @@ SYNOPSIS *OBJECT* := { **map** | **program** } + *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } + | { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *MAP-COMMANDS* := { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } @@ -41,6 +44,13 @@ OPTIONS -v, --version Print version number (similar to **bpftool version**). + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 55ba0a04c102..78d9afb74ef4 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -70,11 +70,13 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s OBJECT { COMMAND | help }\n" + "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n" " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map }\n", + " OBJECT := { prog | map }\n" + " " HELP_SPEC_OPTIONS "\n" + "", bin_name, bin_name, bin_name); return 0; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 04c88b55d8c7..2f94bed03a8d 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -55,6 +55,8 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" +#define HELP_SPEC_OPTIONS \ + "OPTIONS := { {-j|--json} [{-p|--pretty}] }" enum bpf_obj_type { BPF_OBJ_UNKNOWN, diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a611f31f574f..e978ab23a77f 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -850,6 +850,7 @@ static int do_help(int argc, char **argv) " " HELP_SPEC_PROGRAM "\n" " VALUE := { BYTES | MAP | PROG }\n" " UPDATE_FLAGS := { any | exist | noexist }\n" + " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e07f35ff80d1..250f80fd46aa 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -625,6 +625,7 @@ static int do_help(int argc, char **argv) " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); From ff42bb9fe3091d996c763848afa3e57c2a780217 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:06 -0700 Subject: [PATCH 1333/2177] nfp: bpf: add helper for emitting nops The need to emitting a few nops will become more common soon as we add stack and map support. Add a helper. This allows for code to be shorter but also may be handy for marking the nops with a "reason" to ease applying optimizations. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 23fb11a41cc4..eb8c905936ac 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -494,6 +494,12 @@ static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) return tmp_reg; } +static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) +{ + while (count--) + emit_nop(nfp_prog); +} + static void wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_special special) @@ -1799,7 +1805,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) static int nfp_translate(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; - int i, err; + int err; nfp_intro(nfp_prog); if (nfp_prog->error) @@ -1831,8 +1837,7 @@ static int nfp_translate(struct nfp_prog *nfp_prog) if (nfp_prog->error) return nfp_prog->error; - for (i = 0; i < NFP_USTORE_PREFETCH_WINDOW; i++) - emit_nop(nfp_prog); + wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); if (nfp_prog->error) return nfp_prog->error; From 70c78fc138b6d0ef76d9920034e25082dd3a36ac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:07 -0700 Subject: [PATCH 1334/2177] nfp: bpf: refactor nfp_bpf_check_ptr() nfp_bpf_check_ptr() mostly looks at the pointer register. Add a temporary variable to shorten the code. While at it make sure we print error messages if translation fails to help users identify the problem (to be carried in ext_ack in due course). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/bpf/verifier.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index e361c0e3b788..4d2ed84a82e0 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -113,17 +113,23 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_verifier_env *env, u8 reg) + const struct bpf_verifier_env *env, u8 reg_no) { - if (env->cur_state.regs[reg].type != PTR_TO_CTX && - env->cur_state.regs[reg].type != PTR_TO_PACKET) - return -EINVAL; + const struct bpf_reg_state *reg = &env->cur_state.regs[reg_no]; - if (meta->ptr.type != NOT_INIT && - meta->ptr.type != env->cur_state.regs[reg].type) + if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_PACKET) { + pr_info("unsupported ptr type: %d\n", reg->type); return -EINVAL; + } - meta->ptr = env->cur_state.regs[reg]; + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { + pr_info("ptr type changed for instruction %d -> %d\n", + meta->ptr.type, reg->type); + return -EINVAL; + } + + meta->ptr = *reg; return 0; } From ee9133a845fe8ad15f989e29bf8e2c8abe7986b8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:08 -0700 Subject: [PATCH 1335/2177] nfp: bpf: add stack write support Stack is implemented by the LMEM register file. Unaligned accesses to LMEM are not allowed. Accesses also have to be 4B wide. To support stack we need to make sure offsets of pointers are known at translation time (for now) and perform correct load/mask/shift operations. Since we can access first 64B of LMEM without much effort support only stacks not bigger than 64B. Following commits will extend the possible sizes beyond that. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 105 ++++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 + .../net/ethernet/netronome/nfp/bpf/offload.c | 14 +++ .../net/ethernet/netronome/nfp/bpf/verifier.c | 30 ++++- 4 files changed, 147 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index eb8c905936ac..d2a3e9065dbe 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -642,6 +642,100 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, return 0; } +typedef int +(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, + unsigned int size); + +static int +wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, + unsigned int size) +{ + u32 idx, dst_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_lm(0, idx), reg_b(src)); + return 0; + } + + dst_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(0, idx); + } else { + reg = imm_a(nfp_prog); + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); + + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + + return 0; +} + +static int +mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, u8 gpr, lmem_step step) +{ + s32 off = nfp_prog->stack_depth + meta->insn.off; + u32 gpr_byte = 0; + int ret; + + while (size) { + u32 slice_end; + u8 slice_size; + + slice_size = min(size, 4 - gpr_byte); + slice_end = min(off + slice_size, round_up(off + 1, 4)); + slice_size = slice_end - off; + + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size); + if (ret) + return ret; + + gpr_byte += slice_size; + if (gpr_byte >= 4) { + gpr_byte -= 4; + gpr++; + } + + size -= slice_size; + off += slice_size; + } + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1298,6 +1392,14 @@ mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, meta->insn.src_reg * 2, size); } +static int +mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, + wrp_lmem_store); +} + static int mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) @@ -1305,6 +1407,9 @@ mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (meta->ptr.type == PTR_TO_PACKET) return mem_stx_data(nfp_prog, meta, size); + if (meta->ptr.type == PTR_TO_STACK) + return mem_stx_stack(nfp_prog, meta, size); + return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d77e88a45409..a31632681e79 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -151,6 +151,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong + * @stack_depth: max stack depth from the verifier * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -171,6 +172,8 @@ struct nfp_prog { unsigned int n_translated; int error; + unsigned int stack_depth; + struct list_head insns; }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index a88bb5bc0082..f215abcbc18e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -146,6 +146,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, { unsigned int code_sz = max_instr * sizeof(u64); enum nfp_bpf_action_type act; + unsigned int stack_size; u16 start_off, done_off; unsigned int max_mtu; int ret; @@ -167,6 +168,19 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + if (cls_bpf->prog->aux->stack_depth > 64) { + nn_info(nn, "large stack not supported: program %dB > 64B\n", + cls_bpf->prog->aux->stack_depth); + return -EOPNOTSUPP; + } + + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (cls_bpf->prog->aux->stack_depth > stack_size) { + nn_info(nn, "stack too large: program %dB > FW stack %dB\n", + cls_bpf->prog->aux->stack_depth, stack_size); + return -EOPNOTSUPP; + } + *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL); if (!*code) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 4d2ed84a82e0..376d9938b823 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -111,18 +111,41 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return 0; } +static int nfp_bpf_check_stack_access(const struct bpf_reg_state *reg) +{ + if (!tnum_is_const(reg->var_off)) { + pr_info("variable ptr stack access\n"); + return -EINVAL; + } + + if (reg->var_off.value || reg->off) { + pr_info("stack access via modified register\n"); + return -EINVAL; + } + + return 0; +} + static int nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_verifier_env *env, u8 reg_no) { const struct bpf_reg_state *reg = &env->cur_state.regs[reg_no]; + int err; if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_STACK && reg->type != PTR_TO_PACKET) { pr_info("unsupported ptr type: %d\n", reg->type); return -EINVAL; } + if (reg->type == PTR_TO_STACK) { + err = nfp_bpf_check_stack_access(reg); + if (err) + return err; + } + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { pr_info("ptr type changed for instruction %d -> %d\n", meta->ptr.type, reg->type); @@ -143,11 +166,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); priv->meta = meta; - if (meta->insn.src_reg == BPF_REG_10 || - meta->insn.dst_reg == BPF_REG_10) { - pr_err("stack not yet supported\n"); - return -EINVAL; - } if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { pr_err("program uses extended registers - jit hardening?\n"); @@ -176,6 +194,8 @@ int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) struct nfp_bpf_analyzer_priv *priv; int ret; + nfp_prog->stack_depth = prog->aux->stack_depth; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; From a82b23fb38eaaaad89332b90029fc4cd7c3f2545 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:09 -0700 Subject: [PATCH 1336/2177] nfp: bpf: add stack read support Add simple stack read support, similar to write in every aspect, but data flowing the other way. Note that unlike write which can be done in smaller than word quantities, if registers are loaded with less-than-word of stack contents - the values have to be zero extended. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 81 ++++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d2a3e9065dbe..094acea35326 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -644,11 +644,65 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size); + unsigned int size, bool new_gpr); + +static int +wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, + unsigned int size, bool new_gpr) +{ + u32 idx, src_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_both(dst), reg_lm(0, idx)); + return 0; + } + + src_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(0, idx); + } else { + reg = imm_a(nfp_prog); + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + + return 0; +} static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size) + unsigned int size, bool new_gpr) { u32 idx, dst_byte; enum shf_sc sc; @@ -705,12 +759,16 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, u8 gpr, lmem_step step) + unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off; + u8 prev_gpr = 255; u32 gpr_byte = 0; int ret; + if (clr_gpr && size < 8) + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + while (size) { u32 slice_end; u8 slice_size; @@ -719,10 +777,12 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, slice_end = min(off + slice_size, round_up(off + 1, 4)); slice_size = slice_end - off; - ret = step(nfp_prog, gpr, gpr_byte, off, slice_size); + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, + gpr != prev_gpr); if (ret) return ret; + prev_gpr = gpr; gpr_byte += slice_size; if (gpr_byte >= 4) { gpr_byte -= 4; @@ -1232,6 +1292,14 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.src_reg * 2, 4); } +static int +mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + return mem_op_stack(nfp_prog, meta, size, meta->insn.dst_reg * 2, true, + wrp_lmem_load); +} + static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 size) { @@ -1315,6 +1383,9 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (meta->ptr.type == PTR_TO_PACKET) return mem_ldx_data(nfp_prog, meta, size); + if (meta->ptr.type == PTR_TO_STACK) + return mem_ldx_stack(nfp_prog, meta, size); + return -EOPNOTSUPP; } @@ -1396,7 +1467,7 @@ static int mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, + return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, false, wrp_lmem_store); } From 9a90c83c09874a2fd03905ef0f73512c9de18799 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:10 -0700 Subject: [PATCH 1337/2177] nfp: bpf: optimize the RMW for stack accesses When we are performing unaligned stack accesses in the 32-64B window we have to do a read-modify-write cycle. E.g. for reading 8 bytes from address 17: 0: tmp = stack[16] 1: gprLo = tmp >> 8 2: tmp = stack[20] 3: gprLo |= tmp << 24 4: tmp = stack[20] 5: gprHi = tmp >> 8 6: tmp = stack[24] 7: gprHi |= tmp << 24 The load on line 4 is unnecessary, because tmp already contains data from stack[20]. For write we can optimize both loads and writebacks away. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 33 +++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 094acea35326..6730690cf9d8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -644,11 +644,11 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size, bool new_gpr); + unsigned int size, bool first, bool new_gpr, bool last); static int wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, - unsigned int size, bool new_gpr) + unsigned int size, bool first, bool new_gpr, bool last) { u32 idx, src_byte; enum shf_sc sc; @@ -692,7 +692,13 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, reg = reg_lm(0, idx); } else { reg = imm_a(nfp_prog); - wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + /* If it's not the first part of the load and we start a new GPR + * that means we are loading a second part of the LMEM word into + * a new GPR. IOW we've already looked that LMEM word and + * therefore it has been loaded into imm_a(). + */ + if (first || !new_gpr) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); } emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); @@ -702,7 +708,7 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size, bool new_gpr) + unsigned int size, bool first, bool new_gpr, bool last) { u32 idx, dst_byte; enum shf_sc sc; @@ -746,13 +752,19 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, reg = reg_lm(0, idx); } else { reg = imm_a(nfp_prog); - wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + /* Only first and last LMEM locations are going to need RMW, + * the middle location will be overwritten fully. + */ + if (first || last) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); } emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); - if (idx > RE_REG_LM_IDX_MAX) - wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (new_gpr || last) { + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + } return 0; } @@ -762,6 +774,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off; + bool first = true, last; u8 prev_gpr = 255; u32 gpr_byte = 0; int ret; @@ -777,12 +790,16 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, slice_end = min(off + slice_size, round_up(off + 1, 4)); slice_size = slice_end - off; + last = slice_size == size; + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, - gpr != prev_gpr); + first, gpr != prev_gpr, last); if (ret) return ret; prev_gpr = gpr; + first = false; + gpr_byte += slice_size; if (gpr_byte >= 4) { gpr_byte -= 4; From d3488480635f453410fd27cea3fc370cedc7e28a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:11 -0700 Subject: [PATCH 1338/2177] nfp: bpf: allow stack accesses via modified stack registers As long as the verifier tells us the stack offset exactly we can render the LMEM reads quite easily. Simply make sure that the offset is constant for a given instruction and add it to the instruction's offset. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 23 ++++++++++-------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 +++ .../net/ethernet/netronome/nfp/bpf/verifier.c | 24 +++++++++++++------ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 6730690cf9d8..073e382cba04 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -771,9 +771,10 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) + unsigned int size, unsigned int ptr_off, u8 gpr, bool clr_gpr, + lmem_step step) { - s32 off = nfp_prog->stack_depth + meta->insn.off; + s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; u8 prev_gpr = 255; u32 gpr_byte = 0; @@ -1311,10 +1312,10 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size) + unsigned int size, unsigned int ptr_off) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.dst_reg * 2, true, - wrp_lmem_load); + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.dst_reg * 2, true, wrp_lmem_load); } static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, @@ -1401,7 +1402,8 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return mem_ldx_data(nfp_prog, meta, size); if (meta->ptr.type == PTR_TO_STACK) - return mem_ldx_stack(nfp_prog, meta, size); + return mem_ldx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); return -EOPNOTSUPP; } @@ -1482,10 +1484,10 @@ mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size) + unsigned int size, unsigned int ptr_off) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, false, - wrp_lmem_store); + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.src_reg * 2, false, wrp_lmem_store); } static int @@ -1496,7 +1498,8 @@ mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return mem_stx_data(nfp_prog, meta, size); if (meta->ptr.type == PTR_TO_STACK) - return mem_stx_stack(nfp_prog, meta, size); + return mem_stx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index a31632681e79..d4f144a62f0f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -56,6 +56,7 @@ enum br_special { enum static_regs { STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_STACK = 22, /* Bank A */ STATIC_REG_PKT_LEN = 22, /* Bank B */ }; @@ -74,6 +75,8 @@ enum nfp_bpf_action_type { #define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) #define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) +#define stack_reg(np) reg_a(STATIC_REG_STACK) +#define stack_imm(np) imm_b(np) #define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) #define pptr_reg(np) pv_ctm_ptr(np) #define imm_a(np) reg_a(STATIC_REG_IMM) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 376d9938b823..633db3e1a11e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -111,19 +111,29 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return 0; } -static int nfp_bpf_check_stack_access(const struct bpf_reg_state *reg) +static int +nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg) { + s32 old_off, new_off; + if (!tnum_is_const(reg->var_off)) { pr_info("variable ptr stack access\n"); return -EINVAL; } - if (reg->var_off.value || reg->off) { - pr_info("stack access via modified register\n"); - return -EINVAL; - } + if (meta->ptr.type == NOT_INIT) + return 0; - return 0; + old_off = meta->ptr.off + meta->ptr.var_off.value; + new_off = reg->off + reg->var_off.value; + + if (old_off == new_off) + return 0; + + pr_info("stack access changed location was:%d is:%d\n", + old_off, new_off); + return -EINVAL; } static int @@ -141,7 +151,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(reg); + err = nfp_bpf_check_stack_access(meta, reg); if (err) return err; } From 2df03a50f14ab6d888c212aa332536933ded040a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:12 -0700 Subject: [PATCH 1339/2177] nfp: bpf: support accessing the stack beyond 64 bytes To access beyond 64th byte of the stack we need to set a new stack pointer register (LMEM is accessed indirectly through those pointers). Add a function for encoding local CSR access instruction. Use stack pointer number 3. Note that stack pointer registers allow us to index into 32 bytes of LMEM (with shift operations i.e. when operands are restricted). This means if access is crossing 32 byte boundary we must not use offsetting, we have to set the pointer to the exact address and move it with post-increments. We depend on the datapath placing the stack base address in GPR A22 for our use. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 114 ++++++++++++++++-- .../net/ethernet/netronome/nfp/bpf/offload.c | 6 - drivers/net/ethernet/netronome/nfp/nfp_asm.h | 5 + 3 files changed, 111 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 073e382cba04..5105b9247839 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -427,6 +427,48 @@ emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); } +static void +__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LCSR_BASE | + FIELD_PREP(OP_LCSR_A_SRC, areg) | + FIELD_PREP(OP_LCSR_B_SRC, breg) | + FIELD_PREP(OP_LCSR_WRITE, wr) | + FIELD_PREP(OP_LCSR_ADDR, addr) | + FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) +{ + struct nfp_insn_ur_regs reg; + int err; + + /* This instruction takes immeds instead of reg_none() for the ignored + * operand, but we can't encode 2 immeds in one instr with our normal + * swreg infra so if param is an immed, we encode as reg_none() and + * copy the immed to both operands. + */ + if (swreg_type(src) == NN_REG_IMM) { + err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); + reg.breg = reg.areg; + } else { + err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); + } + if (err) { + nfp_prog->error = err; + return; + } + + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, + false, reg.src_lmextn); +} + static void emit_nop(struct nfp_prog *nfp_prog) { __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); @@ -644,12 +686,15 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last); + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc); static int wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last) + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) { + bool should_inc = needs_inc && new_gpr && !last; u32 idx, src_byte; enum shf_sc sc; swreg reg; @@ -663,10 +708,14 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, /* Move the entire word */ if (size == 4) { - wrp_mov(nfp_prog, reg_both(dst), reg_lm(0, idx)); + wrp_mov(nfp_prog, reg_both(dst), + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); return 0; } + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + src_byte = off % 4; mask = (1 << size) - 1; @@ -689,7 +738,7 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. */ if (idx <= RE_REG_LM_IDX_MAX) { - reg = reg_lm(0, idx); + reg = reg_lm(lm3 ? 3 : 0, idx); } else { reg = imm_a(nfp_prog); /* If it's not the first part of the load and we start a new GPR @@ -703,13 +752,18 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + return 0; } static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last) + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) { + bool should_inc = needs_inc && new_gpr && !last; u32 idx, dst_byte; enum shf_sc sc; swreg reg; @@ -723,10 +777,15 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, /* Move the entire word */ if (size == 4) { - wrp_mov(nfp_prog, reg_lm(0, idx), reg_b(src)); + wrp_mov(nfp_prog, + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), + reg_b(src)); return 0; } + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + dst_byte = off % 4; mask = (1 << size) - 1; @@ -749,7 +808,7 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. */ if (idx <= RE_REG_LM_IDX_MAX) { - reg = reg_lm(0, idx); + reg = reg_lm(lm3 ? 3 : 0, idx); } else { reg = imm_a(nfp_prog); /* Only first and last LMEM locations are going to need RMW, @@ -764,6 +823,8 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, if (new_gpr || last) { if (idx > RE_REG_LM_IDX_MAX) wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); } return 0; @@ -776,10 +837,44 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, { s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; + bool needs_inc = false; + swreg stack_off_reg; u8 prev_gpr = 255; u32 gpr_byte = 0; + bool lm3 = true; int ret; + if (off + size <= 64) { + /* We can reach bottom 64B with LMaddr0 */ + lm3 = false; + } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { + /* We have to set up a new pointer. If we know the offset + * and the entire access falls into a single 32 byte aligned + * window we won't have to increment the LM pointer. + * The 32 byte alignment is imporant because offset is ORed in + * not added when doing *l$indexN[off]. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), + stack_imm(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + off %= 32; + } else { + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } + if (lm3) { + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); + /* For size < 4 one slot will be filled by zeroing of upper. */ + wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + } + if (clr_gpr && size < 8) wrp_immed(nfp_prog, reg_both(gpr + 1), 0); @@ -793,8 +888,11 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, last = slice_size == size; + if (needs_inc) + off %= 4; + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, - first, gpr != prev_gpr, last); + first, gpr != prev_gpr, last, lm3, needs_inc); if (ret) return ret; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index f215abcbc18e..fbca1ca1f39b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -168,12 +168,6 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - if (cls_bpf->prog->aux->stack_depth > 64) { - nn_info(nn, "large stack not supported: program %dB > 64B\n", - cls_bpf->prog->aux->stack_depth); - return -EOPNOTSUPP; - } - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; if (cls_bpf->prog->aux->stack_depth > stack_size) { nn_info(nn, "stack too large: program %dB > FW stack %dB\n", diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 86e7daee6099..f4d1df3a1925 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -257,6 +257,11 @@ enum lcsr_wr_src { #define OP_CARB_BASE 0x0e000000000ULL #define OP_CARB_OR 0x00000010000ULL +#define NFP_CSR_ACT_LM_ADDR0 0x64 +#define NFP_CSR_ACT_LM_ADDR1 0x6c +#define NFP_CSR_ACT_LM_ADDR2 0x94 +#define NFP_CSR_ACT_LM_ADDR3 0x9c + /* Software register representation, independent of operand type */ #define NN_REG_TYPE GENMASK(31, 24) #define NN_REG_LM_IDX GENMASK(23, 22) From b14157eeed4eff2b293e0ca7738f6a3dbfff51cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:13 -0700 Subject: [PATCH 1340/2177] nfp: bpf: support stack accesses via non-constant pointers If stack pointer has a different value on different paths but the alignment to words (4B) remains the same, we can set a new LMEM access pointer to the calculated value and access whichever word it's pointing to. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 41 +++++++++++++++---- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 + .../net/ethernet/netronome/nfp/bpf/verifier.c | 12 ++++-- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5105b9247839..d84f00b80aac 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -832,8 +832,8 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, unsigned int ptr_off, u8 gpr, bool clr_gpr, - lmem_step step) + unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, + bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; @@ -844,7 +844,19 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool lm3 = true; int ret; - if (off + size <= 64) { + if (meta->ptr_not_const) { + /* Use of the last encountered ptr_off is OK, they all have + * the same alignment. Depend on low bits of value being + * discarded when written to LMaddr register. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } else if (off + size <= 64) { /* We can reach bottom 64B with LMaddr0 */ lm3 = false; } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { @@ -1096,9 +1108,22 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + u8 src = insn->src_reg * 2; - wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); - wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + if (insn->src_reg == BPF_REG_10) { + swreg stack_depth_reg; + + stack_depth_reg = ur_load_imm_any(nfp_prog, + nfp_prog->stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), + stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else { + wrp_reg_mov(nfp_prog, dst, src); + wrp_reg_mov(nfp_prog, dst + 1, src + 1); + } return 0; } @@ -1413,7 +1438,8 @@ mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, unsigned int ptr_off) { return mem_op_stack(nfp_prog, meta, size, ptr_off, - meta->insn.dst_reg * 2, true, wrp_lmem_load); + meta->insn.dst_reg * 2, meta->insn.src_reg * 2, + true, wrp_lmem_load); } static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, @@ -1585,7 +1611,8 @@ mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, unsigned int ptr_off) { return mem_op_stack(nfp_prog, meta, size, ptr_off, - meta->insn.src_reg * 2, false, wrp_lmem_store); + meta->insn.src_reg * 2, meta->insn.dst_reg * 2, + false, wrp_lmem_store); } static int diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d4f144a62f0f..86edc0691a5f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -101,6 +101,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction * @ptr: pointer type for memory operations + * @ptr_not_const: pointer is not always constant * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @skip: skip this instruction (optimized out) @@ -110,6 +111,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); struct nfp_insn_meta { struct bpf_insn insn; struct bpf_reg_state ptr; + bool ptr_not_const; unsigned int off; unsigned short n; bool skip; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 633db3e1a11e..3d3dcac1c942 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -112,7 +112,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } static int -nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, +nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, const struct bpf_reg_state *reg) { s32 old_off, new_off; @@ -128,7 +129,12 @@ nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, old_off = meta->ptr.off + meta->ptr.var_off.value; new_off = reg->off + reg->var_off.value; - if (old_off == new_off) + meta->ptr_not_const |= old_off != new_off; + + if (!meta->ptr_not_const) + return 0; + + if (old_off % 4 == new_off % 4) return 0; pr_info("stack access changed location was:%d is:%d\n", @@ -151,7 +157,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(meta, reg); + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); if (err) return err; } From 9f16c8abcd79fc31a74d3af64f085a009c9d4b5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:14 -0700 Subject: [PATCH 1341/2177] nfp: bpf: optimize mov64 a little Loading 64bit constants require up to 4 load immediates, since we can only load 16 bits at a time. If the 32bit halves of the 64bit constant are the same, however, we can save a cycle by doing a register move instead of two loads of 16 bits. Note that we don't optimize the normal ALU64 load because even though it's a 64 bit load the upper half of the register is a coming from sign extension so we can load it in one cycle anyway. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d84f00b80aac..e7eeb7a07f81 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1384,19 +1384,28 @@ static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), - meta->insn.imm); + struct nfp_insn_meta *prev = nfp_meta_prev(meta); + u32 imm_lo, imm_hi; + u8 dst; + + dst = prev->insn.dst_reg * 2; + imm_lo = prev->insn.imm; + imm_hi = meta->insn.imm; + + wrp_immed(nfp_prog, reg_both(dst), imm_lo); + + /* mov is always 1 insn, load imm may be two, so try to use mov */ + if (imm_hi == imm_lo) + wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); + else + wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); return 0; } static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - const struct bpf_insn *insn = &meta->insn; - meta->double_cb = imm_ld8_part2; - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); - return 0; } From 56fc709b7a9fe191173dc772a881e180458db517 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:45 -0700 Subject: [PATCH 1342/2177] ipv6: addrconf: move ipv6_chk_same_addr() to avoid forward declaration ipv6_chk_same_addr() is only used by ipv6_add_addr_hash(), so moving it avoids a forward declaration. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93f9c0a61911..9228030e3497 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -192,8 +192,6 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev); static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, @@ -957,6 +955,23 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); } +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev) +{ + unsigned int hash = inet6_addr_hash(addr); + struct inet6_ifaddr *ifp; + + hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { + if (!net_eq(dev_net(ifp->idev->dev), net)) + continue; + if (ipv6_addr_equal(&ifp->addr, addr)) { + if (!dev || ifp->idev->dev == dev) + return true; + } + } + return false; +} + static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { unsigned int hash; @@ -1856,22 +1871,6 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, } EXPORT_SYMBOL(ipv6_chk_addr_and_flags); -static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev) -{ - unsigned int hash = inet6_addr_hash(addr); - struct inet6_ifaddr *ifp; - - hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; - if (ipv6_addr_equal(&ifp->addr, addr)) { - if (!dev || ifp->idev->dev == dev) - return true; - } - } - return false; -} /* Compares an address/prefix_len with addresses on device @dev. * If one is found it returns true. From 752a92927e97e88096394dac3f10d12a58555254 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:46 -0700 Subject: [PATCH 1343/2177] ipv6: addrconf: factorize inet6_addr_hash() call ipv6_add_addr_hash() can compute the hash value outside of locked section and pass it to ipv6_chk_same_addr(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9228030e3497..c1a5028f394c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -956,9 +956,8 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) } static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev) + struct net_device *dev, unsigned int hash) { - unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { @@ -974,23 +973,19 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - unsigned int hash; + unsigned int hash = inet6_addr_hash(&ifa->addr); int err = 0; spin_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev)) { + if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { ADBG("ipv6_add_addr: already assigned\n"); err = -EEXIST; - goto out; + } else { + hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); } - /* Add to big hash table */ - hash = inet6_addr_hash(&ifa->addr); - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); - -out: spin_unlock(&addrconf_hash_lock); return err; From 3f27fb23219e75343b094366f2358bff34300493 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:47 -0700 Subject: [PATCH 1344/2177] ipv6: addrconf: add per netns perturbation in inet6_addr_hash() Bring IPv6 in par with IPv4 : - Use net_hash_mix() to spread addresses a bit more. - Use 256 slots hash table instead of 16 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b8b16437c6d5..15b5ffd7253d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -58,7 +58,7 @@ struct in6_validator_info { struct netlink_ext_ack *extack; }; -#define IN6_ADDR_HSIZE_SHIFT 4 +#define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) int addrconf_init(void); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c1a5028f394c..d70d98122053 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -950,9 +950,11 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_add_tail_rcu(&ifp->if_list, p); } -static u32 inet6_addr_hash(const struct in6_addr *addr) +static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr) { - return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); } static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, @@ -973,7 +975,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - unsigned int hash = inet6_addr_hash(&ifa->addr); + unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr); int err = 0; spin_lock(&addrconf_hash_lock); @@ -1838,8 +1840,8 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict, u32 banned_flags) { + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp; - unsigned int hash = inet6_addr_hash(addr); u32 ifp_flags; rcu_read_lock_bh(); @@ -1917,8 +1919,8 @@ EXPORT_SYMBOL(ipv6_chk_prefix); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp, *result = NULL; - unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { @@ -4242,9 +4244,9 @@ void if6_proc_exit(void) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { - int ret = 0; + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp = NULL; - unsigned int hash = inet6_addr_hash(addr); + int ret = 0; rcu_read_lock_bh(); hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { From 480318a0a4d8b0d03ddedba1bd82e78050ede661 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:48 -0700 Subject: [PATCH 1345/2177] ipv6: addrconf: do not block BH in ipv6_chk_addr_and_flags() rcu_read_lock() is enough here, as inet6_ifa_finish_destroy() uses kfree_rcu() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d70d98122053..a6cf37b7e34c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1844,7 +1844,7 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, struct inet6_ifaddr *ifp; u32 ifp_flags; - rcu_read_lock_bh(); + rcu_read_lock(); hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; @@ -1858,12 +1858,12 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, !(ifp_flags&banned_flags) && (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { - rcu_read_unlock_bh(); + rcu_read_unlock(); return 1; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } EXPORT_SYMBOL(ipv6_chk_addr_and_flags); From 24f226da96278bf3956dac6fa293ef95bc22a90c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:49 -0700 Subject: [PATCH 1346/2177] ipv6: addrconf: do not block BH in ipv6_get_ifaddr() rcu_read_lock() is enough here, no need to block BH. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a6cf37b7e34c..6c1e7ffb62ff 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1922,8 +1922,8 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp, *result = NULL; - rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { + rcu_read_lock(); + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1935,7 +1935,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add } } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return result; } From a5c1d98f8ccf4359575772b36ed51f5857dd7165 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:50 -0700 Subject: [PATCH 1347/2177] ipv6: addrconf: do not block BH in /proc/net/if_inet6 handling Table is really RCU protected, no need to block BH Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c1e7ffb62ff..9232e9537082 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4097,9 +4097,9 @@ struct if6_iter_state { static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) { - struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct inet6_ifaddr *ifa = NULL; int p = 0; /* initial bucket if pos is 0 */ @@ -4109,7 +4109,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -4135,7 +4135,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { + hlist_for_each_entry_continue_rcu(ifa, addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; state->offset++; @@ -4144,7 +4144,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; - hlist_for_each_entry_rcu_bh(ifa, + hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -4157,9 +4157,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu_bh) + __acquires(rcu) { - rcu_read_lock_bh(); + rcu_read_lock(); return if6_get_first(seq, *pos); } @@ -4173,9 +4173,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) - __releases(rcu_bh) + __releases(rcu) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int if6_seq_show(struct seq_file *seq, void *v) From 4e5f47ab97ce68d9f766dfedc5762940a90e4c11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Oct 2017 16:17:51 -0700 Subject: [PATCH 1348/2177] ipv6: addrconf: do not block BH in ipv6_chk_home_addr() rcu_read_lock() is enough here, no need to block BH. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9232e9537082..5a8a10229a07 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4248,8 +4248,8 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) struct inet6_ifaddr *ifp = NULL; int ret = 0; - rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { + rcu_read_lock(); + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -4258,7 +4258,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) break; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } #endif From e3cf39706b89001fd7f3eba0aa70aa40379eef30 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Oct 2017 16:54:48 -0500 Subject: [PATCH 1349/2177] net: rxrpc: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 2 ++ net/rxrpc/input.c | 1 + net/rxrpc/sendmsg.c | 1 + 3 files changed, 4 insertions(+) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 054e32872808..344b2dcad52d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) ret = 0; break; } + /* Fall through */ default: ret = -EBUSY; break; @@ -560,6 +561,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) m->msg_name = &rx->connect_srx; m->msg_namelen = sizeof(rx->connect_srx); } + /* Fall through */ case RXRPC_SERVER_BOUND: case RXRPC_SERVER_LISTENING: ret = rxrpc_do_sendmsg(rx, m, len); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index e56e23ed2229..1e37eb1c0c66 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1125,6 +1125,7 @@ void rxrpc_data_ready(struct sock *udp_sk) case RXRPC_PACKET_TYPE_BUSY: if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) goto discard; + /* Fall through */ case RXRPC_PACKET_TYPE_DATA: if (sp->hdr.callNumber == 0) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 2d9edc656ca3..7d2595582c09 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -220,6 +220,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, ktime_get_real()); if (!last) break; + /* Fall through */ case RXRPC_CALL_SERVER_SEND_REPLY: call->state = RXRPC_CALL_SERVER_AWAIT_ACK; rxrpc_notify_end_tx(rx, call, notify_end_tx); From 7f6b437e9b82a6d702a7f8f75c83ffdec6e03c54 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 21 Oct 2017 20:35:30 -0500 Subject: [PATCH 1350/2177] net: smc_close: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I placed the "fall through" comment on its own line, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/smc/smc_close.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index f0d16fb825f7..a6c65595f248 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -360,7 +360,8 @@ static void smc_close_passive_work(struct work_struct *work) case SMC_PEERCLOSEWAIT1: if (rxflags->peer_done_writing) sk->sk_state = SMC_PEERCLOSEWAIT2; - /* fall through to check for closing */ + /* fall through */ + /* to check for closing */ case SMC_PEERCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: if (!smc_cdc_rxed_any_close(&smc->conn)) From 31749468c3f9d77927ed3144259dc208e6625ede Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 23 Oct 2017 19:39:28 +0200 Subject: [PATCH 1351/2177] bpf: cpumap fix potential lost wake-up problem As pointed out by Michael, commit 1c601d829ab0 ("bpf: cpumap xdp_buff to skb conversion and allocation") contains a classical example of the potential lost wake-up problem. We need to recheck the condition __ptr_ring_empty() after changing current->state to TASK_INTERRUPTIBLE, this avoids a race between wake_up_process() and schedule(). After this, a race with wake_up_process() will simply change the state to TASK_RUNNING, and the schedule() call not really put us to sleep. Fixes: 1c601d829ab0 ("bpf: cpumap xdp_buff to skb conversion and allocation") Reported-by: "Michael S. Tsirkin" Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- kernel/bpf/cpumap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b4358d84ddf1..86e29cbf7827 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -288,13 +288,17 @@ static int cpu_map_kthread_run(void *data) /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { - __set_current_state(TASK_INTERRUPTIBLE); - schedule(); - sched = 1; + set_current_state(TASK_INTERRUPTIBLE); + /* Recheck to avoid lost wake-up */ + if (__ptr_ring_empty(rcpu->queue)) { + schedule(); + sched = 1; + } else { + __set_current_state(TASK_RUNNING); + } } else { sched = cond_resched(); } - __set_current_state(TASK_RUNNING); /* Process packets in rcpu->queue */ local_bh_disable(); From 152854025528b30c5ca5113a443ead98c3f1e7a5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Oct 2017 13:08:14 -0500 Subject: [PATCH 1352/2177] ipv4: icmp: use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG in icmp_timestamp. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3c1570d3e22f..1617604c9284 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -968,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb) */ icmp_param.data.times[1] = inet_current_timestamp(); icmp_param.data.times[2] = icmp_param.data.times[1]; - if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) - BUG(); + + BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)); + icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; icmp_param.data.icmph.code = 0; From 49ca1943a7adb429b11b8e05d81bc821694b76c7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Oct 2017 13:10:56 -0500 Subject: [PATCH 1353/2177] ipv4: tcp_minisocks: use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG in tcp_time_wait. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2341b9f857b6..a952357054f4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,8 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) - BUG(); + BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); } } while (0); #endif From b6f4f8484d88b69f700907200a9a9ec73806355f Mon Sep 17 00:00:00 2001 From: Tim Hansen Date: Mon, 23 Oct 2017 15:35:58 -0400 Subject: [PATCH 1354/2177] net/sock: Update sk rcu iterator macro. Mark hlist node in sk rcu iterator as protected by the rcu. hlist_next_rcu accomplishes this and silences the warnings sparse throws. Found with make C=1 net/ipv4/udp.o on linux-next tag next-20171009. Signed-off-by: Tim Hansen Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 4827094f1db4..6f1be9726e02 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -737,10 +737,10 @@ static inline void sk_add_bind_node(struct sock *sk, * */ #define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ - for (pos = rcu_dereference((head)->first); \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = rcu_dereference(hlist_next_rcu(pos))) static inline struct user_namespace *sk_user_ns(struct sock *sk) { From 71c02379c762cb616c00fd5c4ed253fbf6bbe11b Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 23 Oct 2017 13:22:23 -0700 Subject: [PATCH 1355/2177] tcp: Configure TFO without cookie per socket and/or per route We already allow to enable TFO without a cookie by using the fastopen-sysctl and setting it to TFO_SERVER_COOKIE_NOT_REQD (or TFO_CLIENT_NO_COOKIE). This is safe to do in certain environments where we know that there isn't a malicous host (aka., data-centers) or when the application-protocol already provides an authentication mechanism in the first flight of data. A server however might be providing multiple services or talking to both sides (public Internet and data-center). So, this server would want to enable cookie-less TFO for certain services and/or for connections that go to the data-center. This patch exposes a socket-option and a per-route attribute to enable such fine-grained configurations. Signed-off-by: Christoph Paasch Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 3 ++- include/uapi/linux/rtnetlink.h | 2 ++ include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/ipv4/tcp_fastopen.c | 20 +++++++++++++++++--- net/ipv4/tcp_input.c | 2 +- 7 files changed, 37 insertions(+), 6 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1d2c44e09e31..173a7c2f9636 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -215,7 +215,8 @@ struct tcp_sock { u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ - unused:4; + fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ + unused:3; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 2c13484704cb..2392f74074e7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1567,7 +1567,8 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc); + struct tcp_fastopen_cookie *foc, + const struct dst_entry *dst); void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index dab7dad9e01a..fe6679268901 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -430,6 +430,8 @@ enum { #define RTAX_QUICKACK RTAX_QUICKACK RTAX_CC_ALGO, #define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE __RTAX_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 69c7493e42f8..d67e1d40c6d6 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -120,6 +120,7 @@ enum { #define TCP_ULP 31 /* Attach a ULP to a TCP connection */ #define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ #define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ +#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index be07e9b6dbdd..8f36277e82e9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2836,6 +2836,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EOPNOTSUPP; } break; + case TCP_FASTOPEN_NO_COOKIE: + if (val > 1 || val < 0) + err = -EINVAL; + else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + err = -EINVAL; + else + tp->fastopen_no_cookie = val; + break; case TCP_TIMESTAMP: if (!tp->repair) err = -EPERM; @@ -3256,6 +3264,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->fastopen_connect; break; + case TCP_FASTOPEN_NO_COOKIE: + val = tp->fastopen_no_cookie; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 21075ce19cb6..e0a4b56644aa 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -310,13 +310,23 @@ static bool tcp_fastopen_queue_check(struct sock *sk) return true; } +static bool tcp_fastopen_no_cookie(const struct sock *sk, + const struct dst_entry *dst, + int flag) +{ + return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + tcp_sk(sk)->fastopen_no_cookie || + (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); +} + /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). */ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; @@ -333,7 +343,8 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, return NULL; } - if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + if (syn_data && + tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ @@ -370,6 +381,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { unsigned long last_syn_loss = 0; + const struct dst_entry *dst; int syn_loss = 0; tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); @@ -387,7 +399,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, return false; } - if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + dst = __sk_dst_get(sk); + + if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { cookie->len = -1; return true; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5e64d4b5839..893286db4623 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6332,7 +6332,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init_rwin(req, sk, dst); if (!want_cookie) { tcp_reqsk_record_syn(sk, req, skb); - fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc); + fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, From 87b1af8dcc084d3d509d55067185ca74aed70ce2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 23 Oct 2017 14:59:35 -0700 Subject: [PATCH 1356/2177] ipv6: add ip6_null_entry check in rt6_select() In rt6_select(), fn->leaf could be pointing to net->ipv6.ip6_null_entry. In this case, we should directly return instead of trying to carry on with the rest of the process. If not, we could crash at: spin_lock_bh(&leaf->rt6i_table->rt6_lock); because net->ipv6.ip6_null_entry does not have rt6i_table set. Syzkaller recently reported following issue on net-next: Use struct sctp_sack_info instead kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: sctp: [Deprecated]: syz-executor4 (pid 26496) Use of struct sctp_assoc_value in delayed_ack socket option. Use struct sctp_sack_info instead CPU: 1 PID: 26523 Comm: syz-executor6 Not tainted 4.14.0-rc4+ #85 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d147e3c0 task.stack: ffff8801a4328000 RIP: 0010:debug_spin_lock_before kernel/locking/spinlock_debug.c:83 [inline] RIP: 0010:do_raw_spin_lock+0x23/0x1e0 kernel/locking/spinlock_debug.c:112 RSP: 0018:ffff8801a432ed70 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: 0000000000000018 RCX: 0000000000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: 000000000000001c RBP: ffff8801a432ed90 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffffffff8482b279 R12: ffff8801ce2ff3a0 sctp: [Deprecated]: syz-executor1 (pid 26546) Use of int in maxseg socket option. Use struct sctp_assoc_value instead R13: dffffc0000000000 R14: ffff8801d971e000 R15: ffff8801ce2ff0d8 FS: 00007f56e82f5700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001ddbc22000 CR3: 00000001a4a04000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __raw_spin_lock_bh include/linux/spinlock_api_smp.h:136 [inline] _raw_spin_lock_bh+0x39/0x40 kernel/locking/spinlock.c:175 spin_lock_bh include/linux/spinlock.h:321 [inline] rt6_select net/ipv6/route.c:786 [inline] ip6_pol_route+0x1be3/0x3bd0 net/ipv6/route.c:1650 sctp: [Deprecated]: syz-executor1 (pid 26576) Use of int in maxseg socket option. Use struct sctp_assoc_value instead TCP: request_sock_TCPv6: Possible SYN flooding on port 20002. Sending cookies. Check SNMP counters. ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1843 fib6_rule_lookup+0x9e/0x2a0 net/ipv6/ip6_fib.c:309 ip6_route_output_flags+0x1f1/0x2b0 net/ipv6/route.c:1871 ip6_route_output include/net/ip6_route.h:80 [inline] ip6_dst_lookup_tail+0x4ea/0x970 net/ipv6/ip6_output.c:953 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1076 sctp_v6_get_dst+0x675/0x1c30 net/sctp/ipv6.c:274 sctp_transport_route+0xa8/0x430 net/sctp/transport.c:287 sctp_assoc_add_peer+0x4fe/0x1100 net/sctp/associola.c:656 __sctp_connect+0x251/0xc80 net/sctp/socket.c:1187 sctp_connect+0xb4/0xf0 net/sctp/socket.c:4209 inet_dgram_connect+0x16b/0x1f0 net/ipv4/af_inet.c:541 SYSC_connect+0x20a/0x480 net/socket.c:1642 SyS_connect+0x24/0x30 net/socket.c:1623 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 46c59a53c53f..605e5dc1c010 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -752,7 +752,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, bool do_rr = false; int key_plen; - if (!leaf) + if (!leaf || leaf == net->ipv6.ip6_null_entry) return net->ipv6.ip6_null_entry; rt0 = rcu_dereference(fn->rr_ptr); From 907aaa6babe1a606f3da4eb76e76e3ce6286f97f Mon Sep 17 00:00:00 2001 From: Veerasenareddy Burru Date: Mon, 23 Oct 2017 20:33:25 -0700 Subject: [PATCH 1357/2177] liquidio: pass date and time info to NIC firmware Pass date and time information to NIC at the time of loading firmware and periodically update the host time to NIC firmware. This is to make NIC firmware use the same time reference as Host, so that it is easy to correlate logs from firmware and host for debugging. Signed-off-by: Veerasenareddy Burru Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 126 ++++++++++++++++++ .../cavium/liquidio/liquidio_common.h | 8 ++ .../ethernet/cavium/liquidio/octeon_console.c | 32 ++++- .../ethernet/cavium/liquidio/octeon_network.h | 3 + 4 files changed, 166 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index eafae3eb4fed..b4f753c56308 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -83,6 +83,11 @@ static int octeon_console_debug_enabled(u32 console) /* runtime link query interval */ #define LIQUIDIO_LINK_QUERY_INTERVAL_MS 1000 +/* update localtime to octeon firmware every 60 seconds. + * make firmware to use same time reference, so that it will be easy to + * correlate firmware logged events/errors with host events, for debugging. + */ +#define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000 struct liquidio_if_cfg_context { int octeon_id; @@ -901,6 +906,121 @@ static inline void update_link_status(struct net_device *netdev, } } +/** + * lio_sync_octeon_time_cb - callback that is invoked when soft command + * sent by lio_sync_octeon_time() has completed successfully or failed + * + * @oct - octeon device structure + * @status - indicates success or failure + * @buf - pointer to the command that was sent to firmware + **/ +static void lio_sync_octeon_time_cb(struct octeon_device *oct, + u32 status, void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + + if (status) + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon; error=%d\n", status); + + octeon_free_soft_command(oct, sc); +} + +/** + * lio_sync_octeon_time - send latest localtime to octeon firmware so that + * firmware will correct it's time, in case there is a time skew + * + * @work: work scheduled to send time update to octeon firmware + **/ +static void lio_sync_octeon_time(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + struct octeon_device *oct = lio->oct_dev; + struct octeon_soft_command *sc; + struct timespec64 ts; + struct lio_time *lt; + int ret; + + sc = octeon_alloc_soft_command(oct, sizeof(struct lio_time), 0, 0); + if (!sc) { + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon: soft command allocation failed\n"); + return; + } + + lt = (struct lio_time *)sc->virtdptr; + + /* Get time of the day */ + getnstimeofday64(&ts); + lt->sec = ts.tv_sec; + lt->nsec = ts.tv_nsec; + octeon_swap_8B_data((u64 *)lt, (sizeof(struct lio_time)) / 8); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_SYNC_OCTEON_TIME, 0, 0, 0); + + sc->callback = lio_sync_octeon_time_cb; + sc->callback_arg = sc; + sc->wait_time = 1000; + + ret = octeon_send_soft_command(oct, sc); + if (ret == IQ_SEND_FAILED) { + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon: failed to send soft command\n"); + octeon_free_soft_command(oct, sc); + } + + queue_delayed_work(lio->sync_octeon_time_wq.wq, + &lio->sync_octeon_time_wq.wk.work, + msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS)); +} + +/** + * setup_sync_octeon_time_wq - Sets up the work to periodically update + * local time to octeon firmware + * + * @netdev - network device which should send time update to firmware + **/ +static inline int setup_sync_octeon_time_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->sync_octeon_time_wq.wq = + alloc_workqueue("update-octeon-time", WQ_MEM_RECLAIM, 0); + if (!lio->sync_octeon_time_wq.wq) { + dev_err(&oct->pci_dev->dev, "Unable to create wq to update octeon time\n"); + return -1; + } + INIT_DELAYED_WORK(&lio->sync_octeon_time_wq.wk.work, + lio_sync_octeon_time); + lio->sync_octeon_time_wq.wk.ctxptr = lio; + queue_delayed_work(lio->sync_octeon_time_wq.wq, + &lio->sync_octeon_time_wq.wk.work, + msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS)); + + return 0; +} + +/** + * cleanup_sync_octeon_time_wq - stop scheduling and destroy the work created + * to periodically update local time to octeon firmware + * + * @netdev - network device which should send time update to firmware + **/ +static inline void cleanup_sync_octeon_time_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct cavium_wq *time_wq = &lio->sync_octeon_time_wq; + + if (time_wq->wq) { + cancel_delayed_work_sync(&time_wq->wk.work); + destroy_workqueue(time_wq->wq); + } +} + static struct octeon_device *get_other_octeon_device(struct octeon_device *oct) { struct octeon_device *other_oct; @@ -1455,6 +1575,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_sync_octeon_time_wq(netdev); cleanup_link_status_change_wq(netdev); cleanup_rx_oom_poll_fn(netdev); @@ -3611,6 +3732,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) if (setup_link_status_change_wq(netdev)) goto setup_nic_dev_fail; + if ((octeon_dev->fw_info.app_cap_flags & + LIQUIDIO_TIME_SYNC_CAP) && + setup_sync_octeon_time_wq(netdev)) + goto setup_nic_dev_fail; + if (setup_rx_oom_poll_fn(netdev)) goto setup_nic_dev_fail; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 3788c8cd082a..2033a65cd97a 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -84,6 +84,7 @@ enum octeon_tag_type { #define OPCODE_NIC_IF_CFG 0x09 #define OPCODE_NIC_VF_DRV_NOTICE 0x0A #define OPCODE_NIC_INTRMOD_PARAMS 0x0B +#define OPCODE_NIC_SYNC_OCTEON_TIME 0x14 #define VF_DRV_LOADED 1 #define VF_DRV_REMOVED -1 #define VF_DRV_MACADDR_CHANGED 2 @@ -108,6 +109,9 @@ enum octeon_tag_type { #define SCR2_BIT_FW_LOADED 63 +/* App specific capabilities from firmware to pf driver */ +#define LIQUIDIO_TIME_SYNC_CAP 0x1 + static inline u32 incr_index(u32 index, u32 count, u32 max) { if ((index + count) >= max) @@ -901,4 +905,8 @@ union oct_nic_if_cfg { } s; }; +struct lio_time { + s64 sec; /* seconds */ + s64 nsec; /* nanoseconds */ +}; #endif diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index ec3dd69cd6b2..7f97ae48efed 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -803,15 +803,18 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num, } #define FBUF_SIZE (4 * 1024 * 1024) +#define MAX_BOOTTIME_SIZE 80 int octeon_download_firmware(struct octeon_device *oct, const u8 *data, size_t size) { - int ret = 0; + struct octeon_firmware_file_header *h; + char boottime[MAX_BOOTTIME_SIZE]; + struct timespec64 ts; u32 crc32_result; u64 load_addr; u32 image_len; - struct octeon_firmware_file_header *h; + int ret = 0; u32 i, rem; if (size < sizeof(struct octeon_firmware_file_header)) { @@ -890,11 +893,34 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data, load_addr += size; } } + + /* Pass date and time information to NIC at the time of loading + * firmware and periodically update the host time to NIC firmware. + * This is to make NIC firmware use the same time reference as Host, + * so that it is easy to correlate logs from firmware and host for + * debugging. + * + * Octeon always uses UTC time. so timezone information is not sent. + */ + getnstimeofday64(&ts); + ret = snprintf(boottime, MAX_BOOTTIME_SIZE, + " time_sec=%lld time_nsec=%ld", + (s64)ts.tv_sec, ts.tv_nsec); + if ((sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd))) < + ret) { + dev_err(&oct->pci_dev->dev, "Boot command buffer too small\n"); + return -EINVAL; + } + strncat(h->bootcmd, boottime, + sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd))); + dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", h->bootcmd); /* Invoke the bootcmd */ ret = octeon_console_send_cmd(oct, h->bootcmd, 50); + if (ret) + dev_info(&oct->pci_dev->dev, "Boot command send failed\n"); - return 0; + return ret; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 9e36319cead6..433f3619de8f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -136,6 +136,9 @@ struct lio { /* work queue for link status */ struct cavium_wq link_status_wq; + /* work queue to regularly send local time to octeon firmware */ + struct cavium_wq sync_octeon_time_wq; + int netdev_uc_count; }; From 9d452cebd7d69e9eb22b4c0482fdbb6fc762167f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 24 Oct 2017 08:58:02 +0300 Subject: [PATCH 1358/2177] net/sched: Fix actions list corruption when adding offloaded tc flows Prior to commit b3f55bdda8df, the networking core doesn't wire an in-place actions list the when the low level driver is called to offload the flow, but all low level drivers do that (call tcf_exts_to_list()) in their offloading "add" logic. Now, the in-place list is set in the core which goes over the list in a loop, but also by the hw driver when their offloading code is invoked indirectly: cls_xxx add flow -> tc_setup_cb_call -> tc_exts_setup_cb_egdev_call -> hw driver which messes up the core list instance upon driver return. Fix that by avoiding in-place list on the net core code that deals with adding flows. Fixes: b3f55bdda8df ('net: sched: introduce per-egress action device callbacks') Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index cdfdc24b89cf..0e96cdae9995 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1184,14 +1184,13 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, #ifdef CONFIG_NET_CLS_ACT const struct tc_action *a; struct net_device *dev; - LIST_HEAD(actions); - int ret; + int i, ret; if (!tcf_exts_has_actions(exts)) return 0; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + for (i = 0; i < exts->nr_actions; i++) { + a = exts->actions[i]; if (!a->ops->get_dev) continue; dev = a->ops->get_dev(a); From 69715dd50d28deb52f84932794a2fce0b18c8b56 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Tue, 24 Oct 2017 10:11:42 +0200 Subject: [PATCH 1359/2177] mlxsw: spectrum_dpipe: Fix entries dump of the adjacency table During the dump the per netlink packet entry counter should be zeroed out when new packet is created. Fixes: 190d38a52a73 ("mlxsw: spectrum_dpipe: Add support for adjacency table dump") Signed-off-by: Arkadi Sharshevsky Reported-by: David Ahern Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 6ea6435279c0..96fdba78acab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1093,6 +1093,7 @@ start_again: goto err_ctx_prepare; j = 0; nh_skip = nh_count; + nh_count = 0; mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { if (!mlxsw_sp_nexthop_offload(nh) || mlxsw_sp_nexthop_group_has_ipip(nh)) From de3872cd1863dcd077e966dd467f8e50876302aa Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 24 Oct 2017 11:17:15 +0200 Subject: [PATCH 1360/2177] mlxsw: spectrum: mr: Fix various endianness issues Fix various endianness issues in comparisons and assignments. The fix is entirely cosmetic as all the values fixed are endianness-agnostic. Cleans up sparse warnings: spectrum_mr.c:156:49: warning: restricted __be32 degrades to integer spectrum_mr.c:206:26: warning: restricted __be32 degrades to integer spectrum_mr.c:212:31: warning: incorrect type in assignment (different base types) spectrum_mr.c:212:31: expected restricted __be32 [usertype] addr4 spectrum_mr.c:212:31: got unsigned int spectrum_mr.c:214:32: warning: incorrect type in assignment (different base types) spectrum_mr.c:214:32: expected restricted __be32 [usertype] addr4 spectrum_mr.c:214:32: got unsigned int spectrum_mr.c:461:16: warning: restricted __be32 degrades to integer spectrum_mr.c:461:49: warning: restricted __be32 degrades to integer Fixes: c011ec1bbfd6 ("mlxsw: spectrum: Add the multicast routing offloading logic") Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 1f84bb8e9135..3f7d2d1282b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -153,7 +153,7 @@ static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route) { switch (mr_route->mr_table->proto) { case MLXSW_SP_L3_PROTO_IPV4: - return mr_route->key.source_mask.addr4 == INADDR_ANY; + return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY); case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ default: @@ -203,15 +203,15 @@ static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, struct mlxsw_sp_mr_route_key *key, const struct mfc_cache *mfc) { - bool starg = (mfc->mfc_origin == INADDR_ANY); + bool starg = (mfc->mfc_origin == htonl(INADDR_ANY)); memset(key, 0, sizeof(*key)); key->vrid = mr_table->vr_id; key->proto = mr_table->proto; key->group.addr4 = mfc->mfc_mcastgrp; - key->group_mask.addr4 = 0xffffffff; + key->group_mask.addr4 = htonl(0xffffffff); key->source.addr4 = mfc->mfc_origin; - key->source_mask.addr4 = starg ? 0 : 0xffffffff; + key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff); } static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, @@ -458,7 +458,8 @@ int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, /* If the route is a (*,*) route, abort, as these kind of routes are * used for proxy routes. */ - if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) { + if (mfc->mfc_origin == htonl(INADDR_ANY) && + mfc->mfc_mcastgrp == htonl(INADDR_ANY)) { dev_warn(mr_table->mlxsw_sp->bus_info->dev, "Offloading proxy routes is not supported.\n"); return -EINVAL; From 6a30dc29a450566e9c8e07dd16c05b11cb41be20 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 24 Oct 2017 11:17:16 +0200 Subject: [PATCH 1361/2177] mlxsw: spectrum: mr: Make the function mlxsw_sp_mr_dev_vif_lookup static The function is only used internally in spectrum_mr.c and is not declared in the header file, thus make it static. Cleans up sparse warning: symbol 'mlxsw_sp_mr_dev_vif_lookup' was not declared. Should it be static? Fixes: c011ec1bbfd6 ("mlxsw: spectrum: Add the multicast routing offloading logic") Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 3f7d2d1282b2..d20b143de3b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -768,7 +768,7 @@ void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); } -struct mlxsw_sp_mr_vif * +static struct mlxsw_sp_mr_vif * mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, const struct net_device *dev) { From ea00aa3a27c3a52c5d2a1444a90e690d55b6c221 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 24 Oct 2017 11:17:17 +0200 Subject: [PATCH 1362/2177] mlxsw: spectrum: mr_tcam: Include the mr_tcam header file Make the spectrum_mr_tcam.c include the spectrum_mr_tcam.h header file. Cleans up sparse warning: symbol 'mlxsw_sp_mr_tcam_ops' was not declared. Should it be static? Fixes: 0e14c7777acb6 ("mlxsw: spectrum: Add the multicast routing hardware logic") Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 39c21c70ac32..34a0b632e5dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -37,6 +37,7 @@ #include #include +#include "spectrum_mr_tcam.h" #include "reg.h" #include "spectrum.h" #include "core_acl_flex_actions.h" From 2420770b3fe56ca97ecf34e230762cd9f3296dae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Oct 2017 16:46:45 +0200 Subject: [PATCH 1363/2177] netfilter: nat: use test_and_clear_bit when deleting ct from bysource list We can use a single statement for this. While at it, fixup the comment -- we don't have pernet table/ops anymore, the function is only called from module exit path. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af8345fc4fbd..6c38421e31f9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (nf_nat_proto_remove(ct, data)) return 1; - if ((ct->status & IPS_SRC_NAT_DONE) == 0) - return 0; - - /* This netns is being destroyed, and conntrack has nat null binding. + /* This module is being removed and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); - __nf_nat_cleanup_conntrack(ct); + if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status)) + __nf_nat_cleanup_conntrack(ct); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. From c4f3db15958277c03d1c324894255ea3ecbf86e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:40 +0200 Subject: [PATCH 1364/2177] netfilter: conntrack: add and use nf_l4proto_log_invalid We currently pass down the l4 protocol to the conntrack ->packet() function, but the only user of this is the debug info decision. Same information can be derived from struct nf_conn. As a first step, add and use a new log function for this, similar to nf_ct_helper_log(). Add __cold annotation -- invalid packets should be infrequent so gcc can consider all call paths that lead to such a function as unlikely. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 10 +++++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 18 ++++----- .../netfilter/nf_conntrack_proto_icmpv6.c | 14 ++++--- net/netfilter/nf_conntrack_proto.c | 24 +++++++++++ net/netfilter/nf_conntrack_proto_dccp.c | 3 +- net/netfilter/nf_conntrack_proto_sctp.c | 3 +- net/netfilter/nf_conntrack_proto_tcp.c | 22 +++++----- net/netfilter/nf_conntrack_proto_udp.c | 40 +++++++++---------- 8 files changed, 82 insertions(+), 52 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 738a0307a96b..6d79a061d360 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -152,8 +152,18 @@ extern const struct nla_policy nf_ct_port_nla_policy[]; #define LOG_INVALID(net, proto) \ ((net)->ct.sysctl_log_invalid == (proto) || \ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) + +__printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...); #else static inline int LOG_INVALID(struct net *net, int proto) { return 0; } + +static inline __printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, + u16 pf, u8 protonum, const char *fmt, ...) {} #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a046c298413a..7281a7b77a0e 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -165,6 +165,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return NF_ACCEPT; } +static void icmp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); +} + /* Small and modified version of icmp_rcv */ static int icmp_error(struct net *net, struct nf_conn *tmpl, @@ -177,18 +183,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, - NULL, "nf_ct_icmp: short packet "); + icmp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: bad HW ICMP checksum "); + icmp_error_log(skb, net, pf, "bad hw icmp checksum"); return -NF_ACCEPT; } @@ -199,9 +201,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: invalid ICMP type "); + icmp_error_log(skb, net, pf, "invalid icmp type"); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a9e1fd1a8536..0f227ca4a5a2 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -176,6 +176,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); +} + static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -187,17 +193,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: short packet "); + icmpv6_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: ICMPv6 checksum failed "); + icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b3e489c859ec..bcd3ee270d75 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -27,6 +27,7 @@ #include #include #include +#include static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; @@ -63,6 +64,29 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, *header = NULL; *table = NULL; } + +__printf(5, 6) +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (net->ct.sysctl_log_invalid != protonum || + net->ct.sysctl_log_invalid != IPPROTO_RAW) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_proto_%d: %pV ", protonum, &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); #endif const struct nf_conntrack_l4proto * diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 0f5a4d79f6b8..ef501c7edb96 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -604,8 +604,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6303a88af12b..aa630c561361 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -522,8 +522,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, } return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_SCTP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index cba1c6ffe51a..14198b2a2e2c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| [TCPHDR_ACK|TCPHDR_URG] = 1, }; +static void tcp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); +} + /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: short packet "); + tcp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: truncated/malformed packet "); + tcp_error_log(skb, net, pf, "truncated packet"); return -NF_ACCEPT; } @@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: bad TCP checksum "); + tcp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } /* Check TCP flags. */ tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid TCP flag combination "); + tcp_error_log(skb, net, pf, "invalid tcp flag combination"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8af734cd1a94..fc20cf430251 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -99,6 +99,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, } #ifdef CONFIG_NF_CT_PROTO_UDPLITE +static void udplite_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); +} + static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -112,9 +118,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (!hdr) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); + udplite_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } @@ -122,17 +126,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (cscov == 0) { cscov = udplen; } else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); + udplite_error_log(skb, net, pf, "invalid checksum coverage"); return -NF_ACCEPT; } /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); + udplite_error_log(skb, net, pf, "checksum missing"); return -NF_ACCEPT; } @@ -140,9 +140,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); + udplite_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } @@ -150,6 +148,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, } #endif +static void udp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); +} + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t pf, @@ -162,17 +166,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: short packet "); + udp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: truncated/malformed packet "); + udp_error_log(skb, net, pf, "truncated/malformed packet"); return -NF_ACCEPT; } @@ -186,9 +186,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: bad UDP checksum "); + udp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } From 3d0b527bc9dc0e8c4428eb1a98d4cd27bd1114c7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:41 +0200 Subject: [PATCH 1365/2177] netfilter: conntrack: add and use nf_ct_l4proto_log_invalid We currently pass down the l4 protocol to the conntrack ->packet() function, but the only user of this is the debug info decision. Same information can be derived from struct nf_conn. Add a wrapper for the previous patch that extracs the information from nf_conn and passes it to nf_l4proto_log_invalid(). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 ++++++----- net/netfilter/nf_conntrack_proto.c | 23 ++++++++++++++++++ net/netfilter/nf_conntrack_proto_dccp.c | 17 ++++--------- net/netfilter/nf_conntrack_proto_tcp.c | 25 +++++++------------- 4 files changed, 45 insertions(+), 34 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6d79a061d360..5d51255b5bfb 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -149,21 +149,23 @@ int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL -#define LOG_INVALID(net, proto) \ - ((net)->ct.sysctl_log_invalid == (proto) || \ - (net)->ct.sysctl_log_invalid == IPPROTO_RAW) - +__printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...); __printf(5, 6) __cold void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, u16 pf, u8 protonum, const char *fmt, ...); #else -static inline int LOG_INVALID(struct net *net, int proto) { return 0; } - static inline __printf(5, 6) __cold void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, u16 pf, u8 protonum, const char *fmt, ...) {} +static inline __printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index bcd3ee270d75..83f739e9dc08 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -87,6 +87,29 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, va_end(args); } EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); + +__printf(3, 4) +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) +{ + struct va_format vaf; + struct net *net; + va_list args; + + net = nf_ct_net(ct); + if (likely(net->ct.sysctl_log_invalid == 0)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), + nf_ct_protonum(ct), "%pV", &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); #endif const struct nf_conntrack_l4proto * diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ef501c7edb96..49e0abcdc6f4 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, default: dn = dccp_pernet(net); if (dn->dccp_loose == 0) { - msg = "nf_ct_dccp: not picking up existing connection "; + msg = "not picking up existing connection "; goto out_invalid; } case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: - msg = "nf_ct_dccp: invalid state transition "; + msg = "invalid state transition "; goto out_invalid; } @@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, - NULL, "%s", msg); + nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); return false; } @@ -472,7 +470,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t pf, unsigned int *timeouts) { - struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -534,15 +531,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid packet ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid state transition "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 14198b2a2e2c..dced574f6006 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -702,9 +702,9 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal) res = true; - if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: %s ", + if (!res) { + nf_ct_l4proto_log_invalid(skb, ct, + "%s", before(seq, sender->td_maxend + 1) ? in_recv_win ? before(sack, receiver->td_end + 1) ? @@ -713,6 +713,7 @@ static bool tcp_in_window(const struct nf_conn *ct, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); + } } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " @@ -937,10 +938,8 @@ static int tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid packet ignored in " - "state %s ", tcp_conntrack_names[old_state]); + nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " + "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or @@ -962,9 +961,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid state "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" @@ -979,9 +976,7 @@ static int tcp_packet(struct nf_conn *ct, /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: challenge-ACK ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; @@ -991,9 +986,7 @@ static int tcp_packet(struct nf_conn *ct, && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid RST "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); return -NF_ACCEPT; } if (index == TCP_RST_SET From eb6fad5a4a328b85d3faa8b301b522e3f316b49d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2017 10:47:42 +0200 Subject: [PATCH 1366/2177] netfilter: conntrack: remove pf argument from l4 packet functions not needed/used anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 1 - net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 1 - net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1 - net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 1 - net/netfilter/nf_conntrack_proto_generic.c | 1 - net/netfilter/nf_conntrack_proto_gre.c | 1 - net/netfilter/nf_conntrack_proto_sctp.c | 1 - net/netfilter/nf_conntrack_proto_tcp.c | 6 ++---- net/netfilter/nf_conntrack_proto_udp.c | 1 - 10 files changed, 3 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 5d51255b5bfb..e06518874144 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -42,7 +42,6 @@ struct nf_conntrack_l4proto { const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts); /* Called when a new connection for this protocol found; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7281a7b77a0e..8969420cecc3 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { /* Do not immediately delete the connection after the first diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0f227ca4a5a2..dca921df28e1 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { /* Do not immediately delete the connection after the first diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01130392b7c0..28e675150853 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1419,7 +1419,7 @@ repeat: /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 49e0abcdc6f4..2a446f4a554c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -467,7 +467,6 @@ static unsigned int *dccp_get_timeouts(struct net *net) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 9cd40700842e..1f86ddf6649a 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 09a90484c27d..a2503005d80b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { /* If we've seen traffic both ways, this is a GRE connection. diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index aa630c561361..80faf04ddf15 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { enum sctp_conntrack new_state, old_state; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index dced574f6006..8f283294d70f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *tcph, - u_int8_t pf) + const struct tcphdr *tcph) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = tcp_pernet(net); @@ -801,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); @@ -1013,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct, } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, - skb, dataoff, th, pf)) { + skb, dataoff, th)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index fc20cf430251..3a5f727103af 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP From 80055dab5de0c8677bc148c4717ddfc753a9148e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2017 01:13:50 +0200 Subject: [PATCH 1367/2177] netfilter: x_tables: make xt_replace_table wait until old rules are not used anymore xt_replace_table relies on table replacement counter retrieval (which uses xt_recseq to synchronize pcpu counters). This is fine, however with large rule set get_counters() can take a very long time -- it needs to synchronize all counters because it has to assume concurrent modifications can occur. Make xt_replace_table synchronize by itself by waiting until all cpus had an even seqcount. This allows a followup patch to copy the counters of the old ruleset without any synchonization after xt_replace_table has completed. Cc: Dan Williams Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c83a3b5e1c6c..a164e5123d59 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table, smp_wmb(); table->private = newinfo; + /* make sure all cpus see new ->private value */ + smp_wmb(); + /* * Even though table entries have now been swapped, other CPU's - * may still be using the old entries. This is okay, because - * resynchronization happens because of the locking done - * during the get_counters() routine. + * may still be using the old entries... */ local_bh_enable(); + /* ... so wait for even xt_recseq on all cpus */ + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + u32 seq = raw_read_seqcount(s); + + if (seq & 1) { + do { + cond_resched(); + cpu_relax(); + } while (seq == raw_read_seqcount(s)); + } + } + #ifdef CONFIG_AUDIT if (audit_enabled) { audit_log(current->audit_context, GFP_KERNEL, From d13e7b2e65f6dfbe97b845d75741a970181b9fec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2017 01:13:51 +0200 Subject: [PATCH 1368/2177] netfilter: x_tables: don't use seqlock when fetching old counters after previous commit xt_replace_table will wait until all cpus had even seqcount (i.e., no cpu is accessing old ruleset). Add a 'old' counter retrival version that doesn't synchronize counters. Its not needed, the old counters are not in use anymore at this point. This speeds up table replacement on busy systems with large tables (and many cores). Cc: Dan Williams Cc: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 22 ++++++++++++++++++++-- net/ipv4/netfilter/ip_tables.c | 23 +++++++++++++++++++++-- net/ipv6/netfilter/ip6_tables.c | 22 ++++++++++++++++++++-- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 9e2770fd00be..f88221aebc9d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; + } + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 39286e543ee6..4cbe5e80f3bf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; /* macro does multi eval of i */ + } + + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 01bd3ee5ebc6..f06e25065a34 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ip6t_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; + } + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) From 28efb0046512e8a13ed9f9bdf0d68d10bbfbe9cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2017 09:38:30 +0200 Subject: [PATCH 1369/2177] netfilter: conntrack: make l3proto trackers const previous patches removed all writes to them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 2 +- include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..5534ecca7a5d 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -10,7 +10,7 @@ #define _NF_CONNTRACK_IPV4_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..30dc57980866 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -1,7 +1,7 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fe374da4bc13..89af9d88ca21 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv4_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .l3proto = PF_INET, .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fe01dc953c56..3b80a38f62b8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv6_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .l3proto = PF_INET6, .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, From 67704c2a05860edf8529c8271550148348737a8f Mon Sep 17 00:00:00 2001 From: Harsha Sharma Date: Fri, 13 Oct 2017 04:23:57 +0530 Subject: [PATCH 1370/2177] netfilter: nf_conntrack_h323: Remove typedef struct Remove typedef from struct as linux-kernel coding style tends to avoid using typedefs. Done using following coccinelle semantic patch @r1@ type T; @@ typedef struct { ... } T; @script:python c1@ T2; T << r1.T; @@ if T[-2:] =="_t" or T[-2:] == "_T": coccinelle.T2 = T[:-2]; else: coccinelle.T2 = T; print T, coccinelle.T2 @r2@ type r1.T; identifier c1.T2; @@ -typedef struct + T2 { ... } -T ; @r3@ type r1.T; identifier c1.T2; @@ -T +struct T2 Signed-off-by: Harsha Sharma Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 80 +++++++++++++------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 89b2e46925c4..7831aa1effc9 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -91,41 +91,41 @@ typedef struct field_t { } field_t; /* Bit Stream */ -typedef struct { +struct bitstr { unsigned char *buf; unsigned char *beg; unsigned char *end; unsigned char *cur; unsigned int bit; -} bitstr_t; +}; /* Tool Functions */ #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} #define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) -static unsigned int get_len(bitstr_t *bs); -static unsigned int get_bit(bitstr_t *bs); -static unsigned int get_bits(bitstr_t *bs, unsigned int b); -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b); -static unsigned int get_uint(bitstr_t *bs, int b); +static unsigned int get_len(struct bitstr *bs); +static unsigned int get_bit(struct bitstr *bs); +static unsigned int get_bits(struct bitstr *bs, unsigned int b); +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b); +static unsigned int get_uint(struct bitstr *bs, int b); /* Decoder Functions */ -static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level); +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level); /* Decoder Functions Vector */ -typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int); +typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int); static const decoder_t Decoders[] = { decode_nul, decode_bool, @@ -150,7 +150,7 @@ static const decoder_t Decoders[] = { * Functions ****************************************************************************/ /* Assume bs is aligned && v < 16384 */ -static unsigned int get_len(bitstr_t *bs) +static unsigned int get_len(struct bitstr *bs) { unsigned int v; @@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs) } /****************************************************************************/ -static unsigned int get_bit(bitstr_t *bs) +static unsigned int get_bit(struct bitstr *bs) { unsigned int b = (*bs->cur) & (0x80 >> bs->bit); @@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs) /****************************************************************************/ /* Assume b <= 8 */ -static unsigned int get_bits(bitstr_t *bs, unsigned int b) +static unsigned int get_bits(struct bitstr *bs, unsigned int b) { unsigned int v, l; @@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b) /****************************************************************************/ /* Assume b <= 32 */ -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) { unsigned int v, l, shift, bytes; @@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) /**************************************************************************** * Assume bs is aligned and sizeof(unsigned int) == 4 ****************************************************************************/ -static unsigned int get_uint(bitstr_t *bs, int b) +static unsigned int get_uint(struct bitstr *bs, int b) { unsigned int v = 0; @@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b) } /****************************************************************************/ -static int decode_nul(bitstr_t *bs, const struct field_t *f, +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bool(bitstr_t *bs, const struct field_t *f, +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_oid(bitstr_t *bs, const struct field_t *f, +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level) { int len; @@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_int(bitstr_t *bs, const struct field_t *f, +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_enum(bitstr_t *bs, const struct field_t *f, +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_numstr(bitstr_t *bs, const struct field_t *f, +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_octstr(bitstr_t *bs, const struct field_t *f, +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seq(bitstr_t *bs, const struct field_t *f, +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len; @@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seqof(bitstr_t *bs, const struct field_t *f, +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int count, effective_count = 0, i, len = 0; @@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, /****************************************************************************/ -static int decode_choice(bitstr_t *bs, const struct field_t *f, +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int type, ext, len = 0; @@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 0, _RasMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; @@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 0, _H323_UserInformation }; - bitstr_t bs; + struct bitstr bs; bs.buf = buf; bs.beg = bs.cur = beg; @@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, DECODE | EXT, 0, _MultimediaSystemControlMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; From ce49480dba8666cba0106e8e31a942c9ce4c438a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 15 Oct 2017 11:00:34 +0200 Subject: [PATCH 1371/2177] netfilter: xt_connlimit: don't store address in the conn nodes Only stored, never read. This is a leftover from commit 7d08487777c8 ("netfilter: connlimit: use rbtree for per-host conntrack obj storage"), which added the rbtree node struct that stores the address instead. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connlimit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ffa8eec980e9..ce2870428631 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -46,7 +46,6 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; - union nf_inet_addr addr; }; struct xt_connlimit_rb { @@ -125,7 +124,6 @@ static bool add_hlist(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; - conn->addr = *addr; hlist_add_head(&conn->node, head); return true; } @@ -270,7 +268,6 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; - conn->addr = *addr; rbconn->addr = *addr; INIT_HLIST_HEAD(&rbconn->hhead); From e8daf27c2fea38e16a791780952aa5dff1c409fe Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 9 Oct 2017 07:01:14 +0200 Subject: [PATCH 1372/2177] netfilter: nf_ct_h323: Out Of Bound Read in Netfilter Conntrack Add missing counter decrement to prevent out of bounds memory read. Signed-off-by: Eric Sesterhenn Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 7831aa1effc9..cf1bf2605c10 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) if (sz < 1) break; len = *p++; + sz--; if (sz < len) break; p += len; From 908d140a87a794bf89717ceae54aba5ce86c52e4 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sat, 21 Oct 2017 00:25:15 +0300 Subject: [PATCH 1373/2177] ip6_tunnel: Allow rcv/xmit even if remote address is a local address Currently, ip6_tnl_xmit_ctl drops tunneled packets if the remote address (outer v6 destination) is one of host's locally configured addresses. Same applies to ip6_tnl_rcv_ctl: it drops packets if the remote address (outer v6 source) is a local address. This prevents using ipxip6 (and ip6_gre) tunnels whose local/remote endpoints are on same host; OTOH v4 tunnels (ipip or gre) allow such configurations. An example where this proves useful is a system where entities are identified by their unique v6 addresses, and use tunnels to encapsulate traffic between them. The limitation prevents placing several entities on same host. Introduce IP6_TNL_F_ALLOW_LOCAL_REMOTE which allows to bypass this restriction. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/uapi/linux/ip6_tunnel.h | 2 ++ net/ipv6/ip6_tunnel.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h index 425926c467d7..ffebbe365478 100644 --- a/include/uapi/linux/ip6_tunnel.h +++ b/include/uapi/linux/ip6_tunnel.h @@ -20,6 +20,8 @@ #define IP6_TNL_F_RCV_DSCP_COPY 0x10 /* copy fwmark from inner packet */ #define IP6_TNL_F_USE_ORIG_FWMARK 0x20 +/* allow remote endpoint on the local node */ +#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0x40 struct ip6_tnl_parm { char name[IFNAMSIZ]; /* name of tunnel device */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4212879ff35e..439d65f7e094 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -770,7 +770,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, if ((ipv6_addr_is_multicast(laddr) || likely(ipv6_chk_addr(net, laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, raddr, NULL, 0))) + ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || + likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) ret = 1; } return ret; @@ -1000,7 +1001,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(raddr) && + else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && + !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); From 0b4c6841fee03e096b735074a0c4aab3a8e92986 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:07 -0700 Subject: [PATCH 1374/2177] bpf: use the same condition in perf event set/free bpf handler This is a cleanup such that doing the same check in perf_event_free_bpf_prog as we already do in perf_event_set_bpf_prog step. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 31ee304a5844..9f78a6825bbe 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8191,10 +8191,10 @@ static void perf_event_free_bpf_prog(struct perf_event *event) { struct bpf_prog *prog; - perf_event_free_bpf_handler(event); - - if (!event->tp_event) + if (event->attr.type != PERF_TYPE_TRACEPOINT) { + perf_event_free_bpf_handler(event); return; + } prog = event->tp_event->prog; if (prog && event->tp_event->bpf_prog_owner == event) { From e87c6bc3852b981e71c757be20771546ce9f76f3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:08 -0700 Subject: [PATCH 1375/2177] bpf: permit multiple bpf attachments for a single perf event This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/bpf.h | 30 ++++++++++--- include/linux/trace_events.h | 43 ++++++++++++++++-- include/trace/perf.h | 6 +-- kernel/bpf/core.c | 81 ++++++++++++++++++++++++++++++++++ kernel/events/core.c | 26 ++++------- kernel/trace/bpf_trace.c | 82 ++++++++++++++++++++++++++++++++--- kernel/trace/trace_kprobe.c | 6 +-- kernel/trace/trace_syscalls.c | 34 ++++++++------- kernel/trace/trace_uprobe.c | 3 +- 9 files changed, 255 insertions(+), 56 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e334b248ff6..172be7faf7ba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt); -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog; \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ u32 _ret = 1; \ rcu_read_lock(); \ - _prog = rcu_dereference(array)->progs; \ - for (; *_prog; _prog++) \ - _ret &= func(*_prog, ctx); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ rcu_read_unlock(); \ _ret; \ }) +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..fc6aeca945db 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -271,14 +271,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/trace/perf.h b/include/trace/perf.h index 04fe68bbe767..14f127b6acf5 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ - struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ u64 __count = 1; \ struct task_struct *__task = NULL; \ @@ -46,8 +45,9 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (!prog && __builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8e7c8bf2b687..7fe448799d76 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +static unsigned int __bpf_prog_ret1(const void *ctx, + const struct bpf_insn *insn) +{ + return 1; +} + +static struct bpf_prog_dummy { + struct bpf_prog prog; +} dummy_bpf_prog = { + .prog = { + .bpf_func = __bpf_prog_ret1, + }, +}; + /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() @@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return 0; } +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog) +{ + struct bpf_prog **prog = progs->progs; + + for (; *prog; prog++) + if (*prog == old_prog) { + WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + break; + } +} + +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array) +{ + int new_prog_cnt, carry_prog_cnt = 0; + struct bpf_prog **existing_prog; + struct bpf_prog_array *array; + int new_prog_idx = 0; + + /* Figure out how many existing progs we need to carry over to + * the new array. + */ + if (old_array) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) { + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + carry_prog_cnt++; + if (*existing_prog == include_prog) + return -EEXIST; + } + } + + /* How many progs (not NULL) will be in the new array? */ + new_prog_cnt = carry_prog_cnt; + if (include_prog) + new_prog_cnt += 1; + + /* Do we have any prog (not NULL) in the new array? */ + if (!new_prog_cnt) { + *new_array = NULL; + return 0; + } + + /* +1 as the end of prog_array is marked with NULL */ + array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); + if (!array) + return -ENOMEM; + + /* Fill in the new prog array */ + if (carry_prog_cnt) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + array->progs[new_prog_idx++] = *existing_prog; + } + if (include_prog) + array->progs[new_prog_idx++] = include_prog; + array->progs[new_prog_idx] = NULL; + *new_array = array; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9f78a6825bbe..9660ee65fbef 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task) { - struct bpf_prog *prog = call->prog; - - if (prog) { + if (bpf_prog_array_valid(call)) { *(struct pt_regs **)raw_data = regs; - if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; } @@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; + int ret; if (event->attr.type != PERF_TYPE_TRACEPOINT) return perf_event_set_bpf_handler(event, prog_fd); - if (event->tp_event->prog) - return -EEXIST; - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); @@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EACCES; } } - event->tp_event->prog = prog; - event->tp_event->bpf_prog_owner = event; - return 0; + ret = perf_event_attach_bpf_prog(event, prog); + if (ret) + bpf_prog_put(prog); + return ret; } static void perf_event_free_bpf_prog(struct perf_event *event) { - struct bpf_prog *prog; - if (event->attr.type != PERF_TYPE_TRACEPOINT) { perf_event_free_bpf_handler(event); return; } - - prog = event->tp_event->prog; - if (prog && event->tp_event->bpf_prog_owner == event) { - event->tp_event->prog = NULL; - bpf_prog_put(prog); - } + perf_event_detach_bpf_prog(event); } #else diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3126da2f468a..b65011d320e3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,7 +17,7 @@ /** * trace_call_bpf - invoke BPF program - * @prog: BPF program + * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. @@ -29,7 +29,7 @@ * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; @@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) goto out; } - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, ctx); - rcu_read_unlock(); + /* + * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock + * to all call sites, we did a bpf_prog_array_valid() there to check + * whether call->prog_array is empty or not, which is + * a heurisitc to speed up execution. + * + * If bpf_prog_array_valid() fetched prog_array was + * non-NULL, we go into trace_call_bpf() and do the actual + * proper rcu_dereference() under RCU lock. + * If it turns out that prog_array is NULL then, we bail out. + * For the opposite, if the bpf_prog_array_valid() fetched pointer + * was NULL, you'll skip the prog_array with the risk of missing + * out of events when it was updated in between this and the + * rcu_dereference() which is accepted risk. + */ + ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); out: __this_cpu_dec(bpf_prog_active); @@ -741,3 +754,62 @@ const struct bpf_verifier_ops perf_event_verifier_ops = { const struct bpf_prog_ops perf_event_prog_ops = { }; + +static DEFINE_MUTEX(bpf_event_mutex); + +int perf_event_attach_bpf_prog(struct perf_event *event, + struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret = -EEXIST; + + mutex_lock(&bpf_event_mutex); + + if (event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); + if (ret < 0) + goto out; + + /* set the new array to event->tp_event and set event->prog */ + event->prog = prog; + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + +out: + mutex_unlock(&bpf_event_mutex); + return ret; +} + +void perf_event_detach_bpf_prog(struct perf_event *event) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret; + + mutex_lock(&bpf_event_mutex); + + if (!event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + + ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); + } else { + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + } + + bpf_prog_put(event->prog); + event->prog = NULL; + +out: + mutex_unlock(&bpf_event_mutex); +} diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8a907e12b6b9..abf92e478cfb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1174,13 +1174,12 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); @@ -1210,13 +1209,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 696afe72d3b1..71a6af34d7a9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -559,9 +559,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; -static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_metadata *sys_data, - struct syscall_trace_enter *rec) { +static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -573,7 +574,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) param.args[i] = rec->args[i]; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) @@ -581,7 +582,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -596,9 +597,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; - prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); + if (!valid_prog_array && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -614,7 +615,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + if ((valid_prog_array && + !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; @@ -659,8 +661,9 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } -static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_trace_exit *rec) { +static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_trace_exit *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -670,7 +673,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) @@ -678,7 +681,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -693,9 +696,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; - prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); + if (!valid_prog_array && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -709,7 +712,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + if ((valid_prog_array && + !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 4525e0271a53..153c0e411461 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1113,13 +1113,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; - struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); From a678be5cc747bafc8c66f2fe00a103422587a5eb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:09 -0700 Subject: [PATCH 1376/2177] bpf: add a test case to test single tp multiple bpf attachment The bpf sample program syscall_tp is modified to show attachment of more than bpf programs for a particular kernel tracepoint. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- samples/bpf/syscall_tp_user.c | 66 ++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index a3cb91ebf4e7..9169d3207f18 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -23,6 +23,13 @@ * This requires kernel CONFIG_FTRACE_SYSCALLS to be set. */ +static void usage(const char *cmd) +{ + printf("USAGE: %s [-i num_progs] [-h]\n", cmd); + printf(" -i num_progs # number of progs of the test\n"); + printf(" -h # help\n"); +} + static void verify_map(int map_id) { __u32 key = 0; @@ -32,22 +39,29 @@ static void verify_map(int map_id) fprintf(stderr, "map_lookup failed: %s\n", strerror(errno)); return; } - if (val == 0) + if (val == 0) { fprintf(stderr, "failed: map #%d returns value 0\n", map_id); + return; + } + val = 0; + if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) { + fprintf(stderr, "map_update failed: %s\n", strerror(errno)); + return; + } } -int main(int argc, char **argv) +static int test(char *filename, int num_progs) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - char filename[256]; - int fd; + int i, fd, map0_fds[num_progs], map1_fds[num_progs]; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - if (load_bpf_file(filename)) { - fprintf(stderr, "%s", bpf_log_buf); - return 1; + for (i = 0; i < num_progs; i++) { + if (load_bpf_file(filename)) { + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]); + map0_fds[i] = map_fd[0]; + map1_fds[i] = map_fd[1]; } /* current load_bpf_file has perf_event_open default pid = -1 @@ -64,8 +78,34 @@ int main(int argc, char **argv) close(fd); /* verify the map */ - verify_map(map_fd[0]); - verify_map(map_fd[1]); + for (i = 0; i < num_progs; i++) { + verify_map(map0_fds[i]); + verify_map(map1_fds[i]); + } return 0; } + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int opt, num_progs = 1; + char filename[256]; + + while ((opt = getopt(argc, argv, "i:h")) != -1) { + switch (opt) { + case 'i': + num_progs = atoi(optarg); + break; + case 'h': + default: + usage(argv[0]); + return 0; + } + } + + setrlimit(RLIMIT_MEMLOCK, &r); + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + return test(filename, num_progs); +} From fbb85b3c0112e6aec82863358bdb1b25c7edaec1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Oct 2017 13:54:17 +0800 Subject: [PATCH 1377/2177] bridge: remove rtmsg_ifinfo called in add_del_if Since commit dc709f375743 ("rtnetlink: bring NETDEV_CHANGEUPPER event process back in rtnetlink_event"), rtnetlink_event would process the NETDEV_CHANGEUPPER event and send a notification to userspace. In add_del_if, it would generate NETDEV_CHANGEUPPER event by whether netdev_master_upper_dev_link or netdev_upper_dev_unlink. There's no need to call rtmsg_ifinfo to send the notification any more. So this patch is to remove it from add_del_if also to avoid redundant notifications. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 8f29103935a3..16d01a3ff33f 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -102,9 +102,6 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) else ret = br_del_if(br, dev); - if (!ret) - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); - return ret; } From 4597efe312567591e253248246ef58d77df471c6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Oct 2017 13:54:18 +0800 Subject: [PATCH 1378/2177] bonding: remove rtmsg_ifinfo called in bond_master_upper_dev_link Since commit 42e52bf9e3ae ("net: add netnotifier event for upper device change"), netdev_master_upper_dev_link has generated NETDEV_CHANGEUPPER event which would send a notification to userspace in rtnetlink_event. There's no need to call rtmsg_ifinfo to send the notification any more. So this patch is to remove it from bond_master_upper_dev_link as well as bond_upper_dev_unlink to avoid the redundant notifications. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 172eeeb68152..18b58e1376f1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1221,22 +1221,17 @@ static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; - int err; lag_upper_info.tx_type = bond_lag_tx_type(bond); - err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, - &lag_upper_info, extack); - if (err) - return err; - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); - return 0; + + return netdev_master_upper_dev_link(slave->dev, bond->dev, slave, + &lag_upper_info, extack); } static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) { netdev_upper_dev_unlink(slave->dev, bond->dev); slave->dev->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); } static struct slave *bond_alloc_slave(struct bonding *bond) From eeda3fb9e132bd5f9592c4c664ba944cf1ac5b9f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Oct 2017 13:54:19 +0800 Subject: [PATCH 1379/2177] rtnetlink: bring NETDEV_CHANGELOWERSTATE event process back to rtnetlink_event This patch is to bring NETDEV_CHANGELOWERSTATE event process back to rtnetlink_event so that bonding could use it instead of calling rtmsg_ifinfo to send a notification to userspace after netdev lower state is changed in the later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index df8dba998c48..854a848842ea 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4385,6 +4385,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: + case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), GFP_KERNEL, NULL); From ef5201c83d1400570a3b6f004ad7a23d71934411 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Oct 2017 13:54:20 +0800 Subject: [PATCH 1380/2177] bonding: remove rtmsg_ifinfo called after bond_lower_state_changed After the patch 'rtnetlink: bring NETDEV_CHANGELOWERSTATE event process back to rtnetlink_event', bond_lower_state_changed would generate NETDEV_CHANGEUPPER event which would send a notification to userspace in rtnetlink_event. There's no need to call rtmsg_ifinfo to send the notification any more. So this patch is to remove it from these places after bond_lower_state_changed. Besides, after this, rtmsg_ifinfo is not needed to be exported. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/bonding.h | 4 ---- net/core/rtnetlink.c | 1 - 2 files changed, 5 deletions(-) diff --git a/include/net/bonding.h b/include/net/bonding.h index 2860cc66c2bb..f801fc940b29 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -330,7 +330,6 @@ static inline void bond_set_active_slave(struct slave *slave) slave->backup = 0; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -340,7 +339,6 @@ static inline void bond_set_backup_slave(struct slave *slave) slave->backup = 1; bond_queue_slave_event(slave); bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -353,7 +351,6 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { bond_lower_state_changed(slave); - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; } else { @@ -385,7 +382,6 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { bond_lower_state_changed(tmp); - rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 854a848842ea..de24d394c69e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2989,7 +2989,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); } -EXPORT_SYMBOL(rtmsg_ifinfo); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid) From 9c3b57518363577d4e2ea1964ef4fa03e100beaa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:31 -0700 Subject: [PATCH 1381/2177] net: sctp: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Vlad Yasevich Cc: Neil Horman Cc: "David S. Miller" Cc: linux-sctp@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 10 ++--- net/sctp/associola.c | 3 +- net/sctp/protocol.c | 7 ++-- net/sctp/sm_sideeffect.c | 85 ++++++++++++++++++++++++---------------- net/sctp/transport.c | 13 +++--- 5 files changed, 66 insertions(+), 52 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2db3d3a9ce1d..13cc4963e905 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,7 +72,7 @@ typedef enum sctp_disposition (sctp_state_fn_t) ( const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); -typedef void (sctp_timer_event_t) (unsigned long); +typedef void (sctp_timer_event_t) (struct timer_list *); struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; @@ -314,10 +314,10 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type, void *event_arg, gfp_t gfp); /* 2nd level prototypes */ -void sctp_generate_t3_rtx_event(unsigned long peer); -void sctp_generate_heartbeat_event(unsigned long peer); -void sctp_generate_reconf_event(unsigned long peer); -void sctp_generate_proto_unreach_event(unsigned long peer); +void sctp_generate_t3_rtx_event(struct timer_list *t); +void sctp_generate_heartbeat_event(struct timer_list *t); +void sctp_generate_reconf_event(struct timer_list *t); +void sctp_generate_proto_unreach_event(struct timer_list *t); void sctp_ootb_pkt_free(struct sctp_packet *packet); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dfb9651e818b..69394f4d6091 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init( /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) - setup_timer(&asoc->timers[i], sctp_timer_events[i], - (unsigned long)asoc); + timer_setup(&asoc->timers[i], sctp_timer_events[i], 0); /* Pull default initialization values from the sock options. * Note: This assumes that the values have already been diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fcd80feb293f..f5172c21349b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } -static void sctp_addr_wq_timeout_handler(unsigned long arg) +static void sctp_addr_wq_timeout_handler(struct timer_list *t) { - struct net *net = (struct net *)arg; + struct net *net = from_timer(net, t, sctp.addr_wq_timer); struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; @@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net) INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); spin_lock_init(&net->sctp.addr_wq_lock); net->sctp.addr_wq_timer.expires = 0; - setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, - (unsigned long)net); + timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0); return 0; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 402bfbb888cd..1c2699b424af 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -243,9 +243,10 @@ nomem: /* When the T3-RTX timer expires, it calls this function to create the * relevant state machine event. */ -void sctp_generate_t3_rtx_event(unsigned long peer) +void sctp_generate_t3_rtx_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *) peer; + struct sctp_transport *transport = + from_timer(transport, t, T3_rtx_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -319,50 +320,63 @@ out_unlock: sctp_association_put(asoc); } -static void sctp_generate_t1_cookie_event(unsigned long data) +static void sctp_generate_t1_cookie_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE); } -static void sctp_generate_t1_init_event(unsigned long data) +static void sctp_generate_t1_init_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT); } -static void sctp_generate_t2_shutdown_event(unsigned long data) +static void sctp_generate_t2_shutdown_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN); } -static void sctp_generate_t4_rto_event(unsigned long data) +static void sctp_generate_t4_rto_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO); } -static void sctp_generate_t5_shutdown_guard_event(unsigned long data) +static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *)data; + struct sctp_association *asoc = + from_timer(asoc, t, + timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD); } /* sctp_generate_t5_shutdown_guard_event() */ -static void sctp_generate_autoclose_event(unsigned long data) +static void sctp_generate_autoclose_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE); } /* Generate a heart beat event. If the sock is busy, reschedule. Make * sure that the transport is still valid. */ -void sctp_generate_heartbeat_event(unsigned long data) +void sctp_generate_heartbeat_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_transport *transport = from_timer(transport, t, hb_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -405,9 +419,10 @@ out_unlock: /* Handle the timeout of the ICMP protocol unreachable timer. Trigger * the correct state machine transition that will close the association. */ -void sctp_generate_proto_unreach_event(unsigned long data) +void sctp_generate_proto_unreach_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *)data; + struct sctp_transport *transport = + from_timer(transport, t, proto_unreach_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -439,9 +454,10 @@ out_unlock: } /* Handle the timeout of the RE-CONFIG timer. */ -void sctp_generate_reconf_event(unsigned long data) +void sctp_generate_reconf_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *)data; + struct sctp_transport *transport = + from_timer(transport, t, reconf_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -471,24 +487,27 @@ out_unlock: } /* Inject a SACK Timeout event into the state machine. */ -static void sctp_generate_sack_event(unsigned long data) +static void sctp_generate_sack_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *)data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK); } sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { - NULL, - sctp_generate_t1_cookie_event, - sctp_generate_t1_init_event, - sctp_generate_t2_shutdown_event, - NULL, - sctp_generate_t4_rto_event, - sctp_generate_t5_shutdown_guard_event, - NULL, - NULL, - sctp_generate_sack_event, - sctp_generate_autoclose_event, + [SCTP_EVENT_TIMEOUT_NONE] = NULL, + [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event, + [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event, + [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event, + [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL, + [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event, + [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] = + sctp_generate_t5_shutdown_guard_event, + [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL, + [SCTP_EVENT_TIMEOUT_RECONF] = NULL, + [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event, + [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event, }; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2d9bd3776bc8..1e5a22430cf5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net, INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); - setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, - (unsigned long)peer); - setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, - (unsigned long)peer); - setup_timer(&peer->reconf_timer, sctp_generate_reconf_event, - (unsigned long)peer); - setup_timer(&peer->proto_unreach_timer, - sctp_generate_proto_unreach_event, (unsigned long)peer); + timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0); + timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0); + timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0); + timer_setup(&peer->proto_unreach_timer, + sctp_generate_proto_unreach_event, 0); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); From 8dbd05ff5c4e50a3e5b1ed4089c2d0b4210379c6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:39 -0700 Subject: [PATCH 1382/2177] net: ax25: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Joerg Reuter Cc: Ralf Baechle Cc: "David S. Miller" Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 7 +++---- net/ax25/ax25_ds_timer.c | 9 ++++----- net/ax25/ax25_timer.c | 41 ++++++++++++++++++++-------------------- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f3f9d18891de..06eac1f50c5e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *); /* * Handler for deferred kills. */ -static void ax25_destroy_timer(unsigned long data) +static void ax25_destroy_timer(struct timer_list *t) { - ax25_cb *ax25=(ax25_cb *)data; + ax25_cb *ax25 = from_timer(ax25, t, dtimer); struct sock *sk; sk=ax25->sk; @@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25) if (ax25->sk != NULL) { if (sk_has_allocations(ax25->sk)) { /* Defer: outstanding buffers */ - setup_timer(&ax25->dtimer, ax25_destroy_timer, - (unsigned long)ax25); + timer_setup(&ax25->dtimer, ax25_destroy_timer, 0); ax25->dtimer.expires = jiffies + 2 * HZ; add_timer(&ax25->dtimer); } else { diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 5fb2104b7304..e9d11313d45b 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -29,7 +29,7 @@ #include #include -static void ax25_ds_timeout(unsigned long); +static void ax25_ds_timeout(struct timer_list *); /* * Add DAMA slave timeout timer to timer list. @@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long); void ax25_ds_setup_timer(ax25_dev *ax25_dev) { - setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout, - (unsigned long)ax25_dev); + timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0); } void ax25_ds_del_timer(ax25_dev *ax25_dev) @@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev) * Silently discard all (slave) connections in case our master forgot us... */ -static void ax25_ds_timeout(unsigned long arg) +static void ax25_ds_timeout(struct timer_list *t) { - ax25_dev *ax25_dev = (struct ax25_dev *) arg; + ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer); ax25_cb *ax25; if (ax25_dev == NULL || !ax25_dev->dama.slave) diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index 23a6f38a80bf..c47b7ee1e4da 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -33,20 +33,19 @@ #include #include -static void ax25_heartbeat_expiry(unsigned long); -static void ax25_t1timer_expiry(unsigned long); -static void ax25_t2timer_expiry(unsigned long); -static void ax25_t3timer_expiry(unsigned long); -static void ax25_idletimer_expiry(unsigned long); +static void ax25_heartbeat_expiry(struct timer_list *); +static void ax25_t1timer_expiry(struct timer_list *); +static void ax25_t2timer_expiry(struct timer_list *); +static void ax25_t3timer_expiry(struct timer_list *); +static void ax25_idletimer_expiry(struct timer_list *); void ax25_setup_timers(ax25_cb *ax25) { - setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25); - setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->idletimer, ax25_idletimer_expiry, - (unsigned long)ax25); + timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0); + timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0); + timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0); + timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0); + timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0); } void ax25_start_heartbeat(ax25_cb *ax25) @@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer) EXPORT_SYMBOL(ax25_display_timer); -static void ax25_heartbeat_expiry(unsigned long param) +static void ax25_heartbeat_expiry(struct timer_list *t) { int proto = AX25_PROTO_STD_SIMPLEX; - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, timer); if (ax25->ax25_dev) proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; @@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param) } } -static void ax25_t1timer_expiry(unsigned long param) +static void ax25_t1timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t1timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param) } } -static void ax25_t2timer_expiry(unsigned long param) +static void ax25_t2timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t2timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param) } } -static void ax25_t3timer_expiry(unsigned long param) +static void ax25_t3timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t3timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param) } } -static void ax25_idletimer_expiry(unsigned long param) +static void ax25_idletimer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, idletimer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: From fc8bcaa05160528d56432e4612f522e3ceafc513 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:48 -0700 Subject: [PATCH 1383/2177] net: LLC: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hans Liljestrand Cc: "Paul E. McKenney" Cc: "Reshetova, Elena" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/llc_c_ac.h | 8 ++++---- net/llc/llc_c_ac.c | 27 +++++++++++++++++---------- net/llc/llc_conn.c | 12 ++++-------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index f3be818e73c1..e766300b3e99 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -171,10 +171,10 @@ int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb); int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb); -void llc_conn_busy_tmr_cb(unsigned long timeout_data); -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data); -void llc_conn_ack_tmr_cb(unsigned long timeout_data); -void llc_conn_rej_tmr_cb(unsigned long timeout_data); +void llc_conn_busy_tmr_cb(struct timer_list *t); +void llc_conn_pf_cycle_tmr_cb(struct timer_list *t); +void llc_conn_ack_tmr_cb(struct timer_list *t); +void llc_conn_rej_tmr_cb(struct timer_list *t); void llc_conn_set_p_flag(struct sock *sk, u8 value); #endif /* LLC_C_AC_H */ diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index ea225bd2672c..f59648018060 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) return 0; } -static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) +static void llc_conn_tmr_common_cb(struct sock *sk, u8 type) { - struct sock *sk = (struct sock *)timeout_data; struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); bh_lock_sock(sk); @@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) bh_unlock_sock(sk); } -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) +void llc_conn_pf_cycle_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); + struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR); } -void llc_conn_busy_tmr_cb(unsigned long timeout_data) +void llc_conn_busy_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); + struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR); } -void llc_conn_ack_tmr_cb(unsigned long timeout_data) +void llc_conn_ack_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); + struct llc_sock *llc = from_timer(llc, t, ack_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR); } -void llc_conn_rej_tmr_cb(unsigned long timeout_data) +void llc_conn_rej_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); + struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR); } int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 5e91b47f0d2a..9177dbb16dce 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk) llc->inc_cntr = llc->dec_cntr = 2; llc->dec_step = llc->connect_step = 1; - setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0); llc->ack_timer.expire = sysctl_llc2_ack_timeout; - setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0); llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; - setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0); llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; - setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0); llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; llc->n2 = 2; /* max retransmit */ From 7aa1402e2eb4988b09bf1671e9f968e6e5689b1d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:45:59 -0700 Subject: [PATCH 1384/2177] net: ethernet/sfc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Solarflare linux maintainers Cc: Edward Cree Cc: Bert Kenward Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: Ingo Molnar Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 6 ++---- drivers/net/ethernet/sfc/efx.h | 2 +- drivers/net/ethernet/sfc/falcon/efx.c | 6 ++---- drivers/net/ethernet/sfc/falcon/efx.h | 2 +- drivers/net/ethernet/sfc/falcon/falcon.c | 11 ++++++----- drivers/net/ethernet/sfc/falcon/nic.h | 2 ++ drivers/net/ethernet/sfc/falcon/rx.c | 4 ++-- drivers/net/ethernet/sfc/mcdi.c | 9 ++++----- drivers/net/ethernet/sfc/rx.c | 4 ++-- 9 files changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..8fdcf7aaf997 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -471,8 +471,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->efx = efx; - setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); return channel; } @@ -511,8 +510,7 @@ efx_copy_channel(const struct efx_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->buffer = NULL; memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); return channel; } diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index d407adf59610..52c84b782901 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -46,7 +46,7 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); void efx_init_rx_queue(struct efx_rx_queue *rx_queue); void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); -void efx_rx_slow_fill(unsigned long context); +void efx_rx_slow_fill(struct timer_list *t); void __efx_rx_packet(struct efx_channel *channel); void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 29614da91cbf..6685a66ee1a3 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -449,8 +449,7 @@ ef4_alloc_channel(struct ef4_nic *efx, int i, struct ef4_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->efx = efx; - setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0); return channel; } @@ -489,8 +488,7 @@ ef4_copy_channel(const struct ef4_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->buffer = NULL; memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0); return channel; } diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h index 4f3bb30661ea..a4e4d8ea4078 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.h +++ b/drivers/net/ethernet/sfc/falcon/efx.h @@ -45,7 +45,7 @@ void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic); -void ef4_rx_slow_fill(unsigned long context); +void ef4_rx_slow_fill(struct timer_list *t); void __ef4_rx_packet(struct ef4_channel *channel); void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index 93c713c1f627..ccda017b6794 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -1454,10 +1454,11 @@ static void falcon_stats_complete(struct ef4_nic *efx) } } -static void falcon_stats_timer_func(unsigned long context) +static void falcon_stats_timer_func(struct timer_list *t) { - struct ef4_nic *efx = (struct ef4_nic *)context; - struct falcon_nic_data *nic_data = efx->nic_data; + struct falcon_nic_data *nic_data = from_timer(nic_data, t, + stats_timer); + struct ef4_nic *efx = nic_data->efx; spin_lock(&efx->stats_lock); @@ -2295,6 +2296,7 @@ static int falcon_probe_nic(struct ef4_nic *efx) if (!nic_data) return -ENOMEM; efx->nic_data = nic_data; + nic_data->efx = efx; rc = -ENODEV; @@ -2402,8 +2404,7 @@ static int falcon_probe_nic(struct ef4_nic *efx) } nic_data->stats_disable_count = 1; - setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func, - (unsigned long)efx); + timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0); return 0; diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index a4c4592f6023..e2e3c008d073 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -267,6 +267,7 @@ enum { /** * struct falcon_nic_data - Falcon NIC state * @pci_dev2: Secondary function of Falcon A + * @efx: ef4_nic pointer * @board: Board state and functions * @stats: Hardware statistics * @stats_disable_count: Nest count for disabling statistics fetches @@ -280,6 +281,7 @@ enum { */ struct falcon_nic_data { struct pci_dev *pci_dev2; + struct ef4_nic *efx; struct falcon_board board; u64 stats[FALCON_STAT_COUNT]; unsigned int stats_disable_count; diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 6a8406dc0c2b..382019b302db 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -376,9 +376,9 @@ void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic) ef4_nic_notify_rx_desc(rx_queue); } -void ef4_rx_slow_fill(unsigned long context) +void ef4_rx_slow_fill(struct timer_list *t) { - struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context; + struct ef4_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); /* Post an event to cause NAPI to run and refill the queue */ ef4_nic_generate_fill_event(rx_queue); diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 3df872f56289..9c2567b0d93e 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -48,7 +48,7 @@ struct efx_mcdi_async_param { /* followed by request/response buffer */ }; -static void efx_mcdi_timeout_async(unsigned long context); +static void efx_mcdi_timeout_async(struct timer_list *t); static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, bool *was_attached_out); static bool efx_mcdi_poll_once(struct efx_nic *efx); @@ -87,8 +87,7 @@ int efx_mcdi_init(struct efx_nic *efx) mcdi->mode = MCDI_MODE_POLL; spin_lock_init(&mcdi->async_lock); INIT_LIST_HEAD(&mcdi->async_list); - setup_timer(&mcdi->async_timer, efx_mcdi_timeout_async, - (unsigned long)mcdi); + timer_setup(&mcdi->async_timer, efx_mcdi_timeout_async, 0); (void) efx_mcdi_poll_reboot(efx); mcdi->new_epoch = true; @@ -608,9 +607,9 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, } } -static void efx_mcdi_timeout_async(unsigned long context) +static void efx_mcdi_timeout_async(struct timer_list *t) { - struct efx_mcdi_iface *mcdi = (struct efx_mcdi_iface *)context; + struct efx_mcdi_iface *mcdi = from_timer(mcdi, t, async_timer); efx_mcdi_complete_async(mcdi, true); } diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 42443f434569..8cb60513dca2 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -376,9 +376,9 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) efx_nic_notify_rx_desc(rx_queue); } -void efx_rx_slow_fill(unsigned long context) +void efx_rx_slow_fill(struct timer_list *t) { - struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context; + struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); /* Post an event to cause NAPI to run and refill the queue */ efx_nic_generate_fill_event(rx_queue); From 839a6094140ade97ccc548fcd63179506c5d7fe4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:46:09 -0700 Subject: [PATCH 1385/2177] net: dccp: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Adds a pointer back to the sock. Cc: Gerrit Renker Cc: "David S. Miller" Cc: Soheil Hassas Yeganeh Cc: Hannes Frederic Sowa Cc: Eric Dumazet Cc: dccp@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 10 +++++----- net/dccp/ccids/ccid2.h | 1 + net/dccp/ccids/ccid3.c | 11 ++++++----- net/dccp/ccids/ccid3.h | 1 + net/dccp/timer.c | 12 +++++++----- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e1295d5f2c56..1c75cd1255f6 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val) DCCPF_SEQ_WMAX)); } -static void ccid2_hc_tx_rto_expire(unsigned long data) +static void ccid2_hc_tx_rto_expire(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer); + struct sock *sk = hc->sk; const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); bh_lock_sock(sk); @@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rpdupack = -1; hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; hc->tx_cwnd_used = 0; - setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, - (unsigned long)sk); + hc->sk = sk; + timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0); INIT_LIST_HEAD(&hc->tx_av_chunks); return 0; } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 6e50ef2898fb..1af0116dc6ce 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock { tx_rto; u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + struct sock *sk; /* Congestion Window validation (optional, RFC 2861) */ u32 tx_cwnd_used, diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 119c04317d48..8b5ba6dffac7 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc, } } -static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer); + struct sock *sk = hc->sk; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_state = TFRC_SSTATE_NO_SENT; hc->tx_hist = NULL; - setup_timer(&hc->tx_no_feedback_timer, - ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); + hc->sk = sk; + timer_setup(&hc->tx_no_feedback_timer, + ccid3_hc_tx_no_feedback_timer, 0); return 0; } diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 1a9933c29672..813d91c6e1e2 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock { u8 tx_last_win_count; ktime_t tx_t_last_win_count; struct timer_list tx_no_feedback_timer; + struct sock *sk; ktime_t tx_t_ld; ktime_t tx_t_nom; struct tfrc_tx_hist_entry *tx_hist; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1e35526bf436..b50a8732ff43 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -234,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data) bh_unlock_sock(sk); } -static void dccp_write_xmit_timer(unsigned long data) +static void dccp_write_xmit_timer(struct timer_list *t) { - dccp_write_xmitlet(data); - sock_put((struct sock *)data); + struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer); + struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; + + dccp_write_xmitlet((unsigned long)sk); + sock_put(sk); } void dccp_init_xmit_timers(struct sock *sk) @@ -245,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); - setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, - (unsigned long)sk); + timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } From dda436b7accffd5313e8fd8338f724376c6fcda2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:46:16 -0700 Subject: [PATCH 1386/2177] net: hsr: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Arvid Brodin Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 9 ++++----- net/hsr/hsr_framereg.c | 6 ++---- net/hsr/hsr_framereg.h | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 172d8309f89e..b8cd43c9ed5b 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -328,12 +328,12 @@ out: /* Announce (supervision frame) timer function */ -static void hsr_announce(unsigned long data) +static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; - hsr = (struct hsr_priv *) data; + hsr = from_timer(hsr, t, announce_timer); rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); @@ -463,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; - setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr); - - setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr); + timer_setup(&hsr->announce_timer, hsr_announce, 0); + timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 284a9b820df8..286ceb41ac0c 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr, /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ -void hsr_prune_nodes(unsigned long data) +void hsr_prune_nodes(struct timer_list *t) { - struct hsr_priv *hsr; + struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); struct hsr_node *node; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; - hsr = (struct hsr_priv *) data; - rcu_read_lock(); list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { /* Shorthand */ diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 4e04f0e868e9..370b45998121 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr); -void hsr_prune_nodes(unsigned long data); +void hsr_prune_nodes(struct timer_list *t); int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], From 17bfd8c89fb62219fa27a10b9475daea74d20a15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:46:26 -0700 Subject: [PATCH 1387/2177] net: af_packet: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Mike Maloney Cc: Jarno Rajahalme Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/packet/af_packet.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4f4fa323171d..9603f6ff17a4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -201,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *, static int prb_queue_frozen(struct tpacket_kbdq_core *); static void prb_open_block(struct tpacket_kbdq_core *, struct tpacket_block_desc *); -static void prb_retire_rx_blk_timer_expired(unsigned long); +static void prb_retire_rx_blk_timer_expired(struct timer_list *); static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); -static void prb_init_blk_timer(struct packet_sock *, - struct tpacket_kbdq_core *, - void (*func) (unsigned long)); static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_clear_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); @@ -540,20 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, prb_del_retire_blk_timer(pkc); } -static void prb_init_blk_timer(struct packet_sock *po, - struct tpacket_kbdq_core *pkc, - void (*func) (unsigned long)) -{ - setup_timer(&pkc->retire_blk_timer, func, (long)po); - pkc->retire_blk_timer.expires = jiffies; -} - static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); - prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); + timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, + 0); + pkc->retire_blk_timer.expires = jiffies; } static int prb_calc_retire_blk_tmo(struct packet_sock *po, @@ -671,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) * prb_calc_retire_blk_tmo() calculates the tmo. * */ -static void prb_retire_rx_blk_timer_expired(unsigned long data) +static void prb_retire_rx_blk_timer_expired(struct timer_list *t) { - struct packet_sock *po = (struct packet_sock *)data; + struct packet_sock *po = + from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer); struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; From 5c658e19a7b53b6d712c3fb08bb88649d362db9f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:46:45 -0700 Subject: [PATCH 1388/2177] net: atm/mpc: Stop using open-coded timer .data field In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using an explicit static variable to hold additional expiration details. Cc: "David S. Miller" Cc: Bhumika Goyal Cc: Andrew Morton Cc: Alexey Dobriyan Cc: "Reshetova, Elena" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/atm/mpc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index b43d99430eb6..883d25778fa4 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev); static void mpc_timer_refresh(void); -static void mpc_cache_check(unsigned long checking_time); +static void mpc_cache_check(struct timer_list *unused); static struct llc_snap_hdr llc_snap_mpoa_ctrl = { 0xaa, 0xaa, 0x03, @@ -1407,15 +1407,17 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action) msg_to_mpoad(msg, mpc); } +static unsigned long checking_time; + static void mpc_timer_refresh(void) { mpc_timer.expires = jiffies + (MPC_P2 * HZ); - mpc_timer.data = mpc_timer.expires; - mpc_timer.function = mpc_cache_check; + checking_time = mpc_timer.expires; + mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check; add_timer(&mpc_timer); } -static void mpc_cache_check(unsigned long checking_time) +static void mpc_cache_check(struct timer_list *unused) { struct mpoa_client *mpc = mpcs; static unsigned long previous_resolving_check_time; From fd71e13bc7c6ac2d34f08380707662cf07f8234c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:46:52 -0700 Subject: [PATCH 1389/2177] drivers/net: sis: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Francois Romieu Cc: Daniele Venzano Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Daniele Venzano Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis190.c | 10 ++++------ drivers/net/ethernet/sis/sis900.c | 10 +++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 445109bd6910..c2c50522b96d 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1018,10 +1018,10 @@ out_unlock: rtnl_unlock(); } -static void sis190_phy_timer(unsigned long __opaque) +static void sis190_phy_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)__opaque; - struct sis190_private *tp = netdev_priv(dev); + struct sis190_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; if (likely(netif_running(dev))) schedule_work(&tp->phy_task); @@ -1039,10 +1039,8 @@ static inline void sis190_request_timer(struct net_device *dev) struct sis190_private *tp = netdev_priv(dev); struct timer_list *timer = &tp->timer; - init_timer(timer); + timer_setup(timer, sis190_phy_timer, 0); timer->expires = jiffies + SIS190_PHY_TIMEOUT; - timer->data = (unsigned long)dev; - timer->function = sis190_phy_timer; add_timer(timer); } diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index cb61247b0526..4bb89f74742c 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -218,7 +218,7 @@ static void sis900_init_rxfilter (struct net_device * net_dev); static u16 read_eeprom(void __iomem *ioaddr, int location); static int mdio_read(struct net_device *net_dev, int phy_id, int location); static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val); -static void sis900_timer(unsigned long data); +static void sis900_timer(struct timer_list *t); static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy); static void sis900_tx_timeout(struct net_device *net_dev); static void sis900_init_tx_ring(struct net_device *net_dev); @@ -1065,7 +1065,7 @@ sis900_open(struct net_device *net_dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - setup_timer(&sis_priv->timer, sis900_timer, (unsigned long)net_dev); + timer_setup(&sis_priv->timer, sis900_timer, 0); sis_priv->timer.expires = jiffies + HZ; add_timer(&sis_priv->timer); @@ -1300,10 +1300,10 @@ static void sis630_set_eq(struct net_device *net_dev, u8 revision) * link status (ON/OFF) and link mode (10/100/Full/Half) */ -static void sis900_timer(unsigned long data) +static void sis900_timer(struct timer_list *t) { - struct net_device *net_dev = (struct net_device *)data; - struct sis900_private *sis_priv = netdev_priv(net_dev); + struct sis900_private *sis_priv = from_timer(sis_priv, t, timer); + struct net_device *net_dev = sis_priv->mii_info.dev; struct mii_phy *mii_phy = sis_priv->mii; static const int next_tick = 5*HZ; int speed = 0, duplex = 0; From 56546e3b9f2284a750c9ca24617544ff5cf56af4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 01:47:00 -0700 Subject: [PATCH 1390/2177] drivers/net: wan/sbni: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: David Howells Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wan/sbni.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index bde8c0339831..8e8c4c0e1b64 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -71,6 +71,7 @@ struct net_local { struct timer_list watchdog; + struct net_device *watchdog_dev; spinlock_t lock; struct sk_buff *rx_buf_p; /* receive buffer ptr */ @@ -128,7 +129,7 @@ static void send_frame( struct net_device * ); static int upload_data( struct net_device *, unsigned, unsigned, unsigned, u32 ); static void download_data( struct net_device *, u32 * ); -static void sbni_watchdog( unsigned long ); +static void sbni_watchdog(struct timer_list *); static void interpret_ack( struct net_device *, unsigned ); static int append_frame_to_pkt( struct net_device *, unsigned, u32 ); static void indicate_pkt( struct net_device * ); @@ -1029,11 +1030,10 @@ indicate_pkt( struct net_device *dev ) */ static void -sbni_watchdog( unsigned long arg ) +sbni_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct net_local *nl = netdev_priv(dev); - struct timer_list *w = &nl->watchdog; + struct net_local *nl = from_timer(nl, t, watchdog); + struct net_device *dev = nl->watchdog_dev; unsigned long flags; unsigned char csr0; @@ -1060,11 +1060,7 @@ sbni_watchdog( unsigned long arg ) outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); - init_timer( w ); - w->expires = jiffies + SBNI_TIMEOUT; - w->data = arg; - w->function = sbni_watchdog; - add_timer( w ); + mod_timer(t, jiffies + SBNI_TIMEOUT); spin_unlock_irqrestore( &nl->lock, flags ); } @@ -1195,10 +1191,9 @@ handler_attached: netif_start_queue( dev ); /* set timer watchdog */ - init_timer( w ); + nl->watchdog_dev = dev; + timer_setup(w, sbni_watchdog, 0); w->expires = jiffies + SBNI_TIMEOUT; - w->data = (unsigned long) dev; - w->function = sbni_watchdog; add_timer( w ); spin_unlock( &nl->lock ); From f65163fed0e7dc3c79be4f96a8fe97fc89328b93 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 25 Oct 2017 16:19:52 +0200 Subject: [PATCH 1391/2177] tipc: eliminate KASAN warning The following warning was reported by syzbot on Oct 24. 2017: KASAN: slab-out-of-bounds Read in tipc_nametbl_lookup_dst_nodes This is a harmless bug, but we still want to get rid of the warning, so we swap the two conditions in question. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 2856e19e036e..b3829bcf63c7 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -697,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, spin_lock_bh(&seq->lock); sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); stop = seq->sseqs + seq->first_free; - for (; sseq->lower <= upper && sseq != stop; sseq++) { + for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { if (tipc_in_scope(domain, publ->node)) From 1769af432a9451774edb4c1221c52437936a01af Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 25 Oct 2017 11:44:32 -0500 Subject: [PATCH 1392/2177] ethernet: cavium: octeon: Switch to using netdev_info(). Signed-off-by: Steven J. Hill Signed-off-by: David Daney Signed-off-by: David S. Miller --- .../net/ethernet/cavium/octeon/octeon_mgmt.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 2887bcaf6af5..3f6afb54a5eb 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -705,14 +705,15 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, u64 clock_comp = (NSEC_PER_SEC << 32) / octeon_get_io_clock_rate(); if (!ptp.s.ptp_en) cvmx_write_csr(CVMX_MIO_PTP_CLOCK_COMP, clock_comp); - pr_info("PTP Clock: Using sclk reference at %lld Hz\n", - (NSEC_PER_SEC << 32) / clock_comp); + netdev_info(netdev, + "PTP Clock using sclk reference @ %lldHz\n", + (NSEC_PER_SEC << 32) / clock_comp); } else { /* The clock is already programmed to use a GPIO */ u64 clock_comp = cvmx_read_csr(CVMX_MIO_PTP_CLOCK_COMP); - pr_info("PTP Clock: Using GPIO %d at %lld Hz\n", - ptp.s.ext_clk_in, - (NSEC_PER_SEC << 32) / clock_comp); + netdev_info(netdev, + "PTP Clock using GPIO%d @ %lld Hz\n", + ptp.s.ext_clk_in, (NSEC_PER_SEC << 32) / clock_comp); } /* Enable the clock if it wasn't done already */ @@ -926,14 +927,11 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev) spin_unlock_irqrestore(&p->lock, flags); if (link_changed != 0) { - if (link_changed > 0) { - pr_info("%s: Link is up - %d/%s\n", netdev->name, - phydev->speed, - phydev->duplex == DUPLEX_FULL ? - "Full" : "Half"); - } else { - pr_info("%s: Link is down\n", netdev->name); - } + if (link_changed > 0) + netdev_info(netdev, "Link is up - %d/%s\n", + phydev->speed, phydev->duplex == DUPLEX_FULL ? "Full" : "Half"); + else + netdev_info(netdev, "Link is down\n"); } } From 88ca59d1aaf28c25b47a9f933090e480ba6dc92a Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Wed, 25 Oct 2017 12:26:43 -0700 Subject: [PATCH 1393/2177] macvlan: remove unused fields in struct macvlan_dev commit 635b8c8ecdd2 ("tap: Renaming tap related APIs, data structures, macros") captured all the tap related fields into a new struct tap_dev. However, it failed to remove those fields from struct macvlan_dev. Those fields are currently unused and must be removed. While there I moved the comment for MAX_TAP_QUEUES to the right place. Fixes: 635b8c8ecdd27142 (tap: Renaming tap related APIs, data structures, macros) Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 15 --------------- include/linux/if_tap.h | 4 ++++ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 10e319f41fb1..e13b369df02b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -10,13 +10,6 @@ #include struct macvlan_port; -struct macvtap_queue; - -/* - * Maximum times a macvtap device can be opened. This can be used to - * configure the number of receive queue, e.g. for multiqueue virtio. - */ -#define MAX_TAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) @@ -35,14 +28,6 @@ struct macvlan_dev { netdev_features_t set_features; enum macvlan_mode mode; u16 flags; - /* This array tracks active taps. */ - struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; - /* This list tracks all taps (both enabled and disabled) */ - struct list_head queue_list; - int numvtaps; - int numqueues; - netdev_features_t tap_features; - int minor; int nest_level; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 4837157da0dc..d1b5173ad8f0 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -22,6 +22,10 @@ static inline struct skb_array *tap_get_skb_array(struct file *f) #include #include +/* + * Maximum times a tap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ #define MAX_TAP_QUEUES 256 struct tap_queue; From 0d314502bbfbef7560e5a3e817722128d5c5fc5d Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:11 -0700 Subject: [PATCH 1394/2177] net: bcmgenet: correct bad merge As noted in the net-next submission for GENETv5 support [1], there were merge conflicts with an earlier net submission [2] that had not yet found its way to the net-next repository. Unfortunately, when the branches were merged the conflicts were not correctly resolved. This commit attempts to correct that. [1] https://lkml.org/lkml/2017/3/13/1145 [2] https://lkml.org/lkml/2017/3/9/890 Fixes: 101c431492d2 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net") Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9cebca896913..f6e8e01be1c8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2602,12 +2602,6 @@ static void bcmgenet_irq_task(struct work_struct *work) priv->irq0_stat = 0; spin_unlock_irqrestore(&priv->lock, flags); - if (status & UMAC_IRQ_MPD_R) { - netif_dbg(priv, wol, priv->dev, - "magic packet detected, waking up\n"); - bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); - } - /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, @@ -2698,23 +2692,13 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | - UMAC_IRQ_PHY_DET_F | - UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_HFB_SM | - UMAC_IRQ_HFB_MM)) { - /* all other interested interrupts handled in bottom half */ - schedule_work(&priv->bcmgenet_irq_work); - } - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { wake_up(&priv->wq); } /* all other interested interrupts handled in bottom half */ - status &= (UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_MPD_R); + status &= UMAC_IRQ_LINK_EVENT; if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); From 4fd6dc98c19369d24c4a4819b27c114948720d16 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:12 -0700 Subject: [PATCH 1395/2177] net: bcmgenet: prevent duplicate calls of bcmgenet_dma_teardown When bcmgenet_dma_teardown is called from bcmgenet_fini_dma it ends up getting called twice from the bcmgenet_close and bcmgenet_suspend functions (once directly and once inside the bcmgenet_fini_dma call). This commit removes the call from bcmgenet_fini_dma and ensures that bcmgenet_dma_teardown is called before bcmgenet_fini_dma in all paths of execution. Fixes: 4a0c081eff43 ("net: bcmgenet: call bcmgenet_dma_teardown in bcmgenet_fini_dma") Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f6e8e01be1c8..78368466eb70 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2505,9 +2505,6 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) bcmgenet_fini_rx_napi(priv); bcmgenet_fini_tx_napi(priv); - /* disable DMA */ - bcmgenet_dma_teardown(priv); - for (i = 0; i < priv->num_tx_bds; i++) { cb = priv->tx_cbs + i; skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb); @@ -2930,6 +2927,7 @@ err_irq1: err_irq0: free_irq(priv->irq0, priv); err_fini_dma: + bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); err_clk_disable: if (priv->internal_phy) From 28c2d1a7a0bfdf3617800d2beae1c67983c03d15 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:13 -0700 Subject: [PATCH 1396/2177] net: bcmgenet: enable loopback during UniMAC sw_reset It is necessary for the UniMAC to be clocked at least 5 cycles while the sw_reset is asserted to ensure a clean reset. It was discovered that this condition was not being met when connected to an external RGMII PHY that disabled the Rx clock in the Power Save state. This commit modifies the reset_umac function to place the (RG)MII interface into a local loopback mode where the Rx clock comes from the GENET sourced Tx clk during the sw_reset to ensure the presence and stability of the clock. In addition, it turns out that the sw_reset of the UniMAC is not self clearing, but this was masked by a bug in the timeout code. The sw_reset is now explicitly cleared by zeroing the UMAC_CMD register before returning from reset_umac which makes it no longer necessary to do so in init_umac and makes the clearing of CMD_TX_EN and CMD_RX_EN by umac_enable_set redundant. The timeout code (and its associated bug) are removed so reset_umac no longer needs to return a result, and that means init_umac that calls reset_umac does not need to as well. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 55 ++++--------------- 1 file changed, 10 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 78368466eb70..3da177fa2659 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1935,12 +1935,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) usleep_range(1000, 2000); } -static int reset_umac(struct bcmgenet_priv *priv) +static void reset_umac(struct bcmgenet_priv *priv) { - struct device *kdev = &priv->pdev->dev; - unsigned int timeout = 0; - u32 reg; - /* 7358a0/7552a0: bad default in RBUF_FLUSH_CTRL.umac_sw_rst */ bcmgenet_rbuf_ctrl_set(priv, 0); udelay(10); @@ -1948,23 +1944,10 @@ static int reset_umac(struct bcmgenet_priv *priv) /* disable MAC while updating its registers */ bcmgenet_umac_writel(priv, 0, UMAC_CMD); - /* issue soft reset, wait for it to complete */ - bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); - while (timeout++ < 1000) { - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (!(reg & CMD_SW_RESET)) - return 0; - - udelay(1); - } - - if (timeout == 1000) { - dev_err(kdev, - "timeout waiting for MAC to come out of reset\n"); - return -ETIMEDOUT; - } - - return 0; + /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ + bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); + udelay(2); + bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -1994,20 +1977,16 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); } -static int init_umac(struct bcmgenet_priv *priv) +static void init_umac(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; - int ret; u32 reg; u32 int0_enable = 0; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); - ret = reset_umac(priv); - if (ret) - return ret; + reset_umac(priv); - bcmgenet_umac_writel(priv, 0, UMAC_CMD); /* clear tx/rx counter */ bcmgenet_umac_writel(priv, MIB_RESET_RX | MIB_RESET_TX | MIB_RESET_RUNT, @@ -2046,8 +2025,6 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); dev_dbg(kdev, "done init umac\n"); - - return 0; } /* Initialize a Tx ring along with corresponding hardware registers */ @@ -2863,12 +2840,7 @@ static int bcmgenet_open(struct net_device *dev) /* take MAC out of reset */ bcmgenet_umac_reset(priv); - ret = init_umac(priv); - if (ret) - goto err_clk_disable; - - /* disable ethernet MAC while updating its registers */ - umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false); + init_umac(priv); /* Make sure we reflect the value of CRC_CMD_FWD */ reg = bcmgenet_umac_readl(priv, UMAC_CMD); @@ -3546,9 +3518,7 @@ static int bcmgenet_probe(struct platform_device *pdev) !strcasecmp(phy_mode_str, "internal")) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - err = reset_umac(priv); - if (err) - goto err_clk_disable; + reset_umac(priv); err = bcmgenet_mii_init(dev); if (err) @@ -3660,9 +3630,7 @@ static int bcmgenet_resume(struct device *d) bcmgenet_umac_reset(priv); - ret = init_umac(priv); - if (ret) - goto out_clk_disable; + init_umac(priv); /* From WOL-enabled suspend, switch to regular clock */ if (priv->wolopts) @@ -3672,9 +3640,6 @@ static int bcmgenet_resume(struct device *d) /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); - /* disable ethernet MAC while updating its registers */ - umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false); - bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { From 7587935cfa119e122215e37e002a481d7374198b Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:14 -0700 Subject: [PATCH 1397/2177] net: bcmgenet: move NAPI initialization to ring initialization Since each ring has its own NAPI instance it might as well be initialized along with the other ring context. Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 42 ++++--------------- 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3da177fa2659..9ce6671e8916 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2081,6 +2081,10 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); + + /* Initialize Tx NAPI */ + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, + NAPI_POLL_WEIGHT); } /* Initialize a RDMA ring */ @@ -2112,6 +2116,10 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, if (ret) return ret; + /* Initialize Rx NAPI */ + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, + NAPI_POLL_WEIGHT); + bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_PROD_INDEX); bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_CONS_INDEX); bcmgenet_rdma_ring_writel(priv, index, 1, DMA_MBUF_DONE_THRESH); @@ -2136,20 +2144,6 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, return ret; } -static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) -{ - unsigned int i; - struct bcmgenet_tx_ring *ring; - - for (i = 0; i < priv->hw_params->tx_queues; ++i) { - ring = &priv->tx_rings[i]; - netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); - } - - ring = &priv->tx_rings[DESC_INDEX]; - netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); -} - static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) { unsigned int i; @@ -2263,9 +2257,6 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1); bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2); - /* Initialize Tx NAPI */ - bcmgenet_init_tx_napi(priv); - /* Enable Tx queues */ bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -2275,20 +2266,6 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); } -static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv) -{ - unsigned int i; - struct bcmgenet_rx_ring *ring; - - for (i = 0; i < priv->hw_params->rx_queues; ++i) { - ring = &priv->rx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); - } - - ring = &priv->rx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); -} - static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) { unsigned int i; @@ -2391,9 +2368,6 @@ static int bcmgenet_init_rx_queues(struct net_device *dev) ring_cfg |= (1 << DESC_INDEX); dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); - /* Initialize Rx NAPI */ - bcmgenet_init_rx_napi(priv); - /* Enable rings */ bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG); From fbf557d9d1bf93892db70121061c81aaded41607 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:15 -0700 Subject: [PATCH 1398/2177] net: bcmgenet: cleanup ring interrupt masking and unmasking Since the NAPI interrupts are basically ignored when NAPI is disabled we don't need to mask them within the functions bcmgenet_disable_tx_napi() and bcmgenet_disable_rx_napi(). So wait until all NAPI instances are disabled and mask all of the bcmgenet driver interrupts together in bcmgenet_netif_stop(). The interrupts can still be enabled in the functions bcmgenet_enable_tx_napi() and bcmgenet_enable_rx_napi(), but use the ring context int_enable() method to keep the functionality consistent and the code cleaner. Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9ce6671e8916..88aacf3bf44f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2147,33 +2147,24 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_enable = UMAC_IRQ_TXDMA_DONE; - u32 int1_enable = 0; struct bcmgenet_tx_ring *ring; for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_enable(&ring->napi); - int1_enable |= (1 << i); + ring->int_enable(ring); } ring = &priv->tx_rings[DESC_INDEX]; napi_enable(&ring->napi); - - bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); - bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); + ring->int_enable(ring); } static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_disable = UMAC_IRQ_TXDMA_DONE; - u32 int1_disable = 0xffff; struct bcmgenet_tx_ring *ring; - bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET); - bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET); - for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_disable(&ring->napi); @@ -2269,33 +2260,24 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_enable = UMAC_IRQ_RXDMA_DONE; - u32 int1_enable = 0; struct bcmgenet_rx_ring *ring; for (i = 0; i < priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_enable(&ring->napi); - int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i)); + ring->int_enable(ring); } ring = &priv->rx_rings[DESC_INDEX]; napi_enable(&ring->napi); - - bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); - bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); + ring->int_enable(ring); } static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_disable = UMAC_IRQ_RXDMA_DONE; - u32 int1_disable = 0xffff << UMAC_IRQ1_RX_INTR_SHIFT; struct bcmgenet_rx_ring *ring; - bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET); - bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET); - for (i = 0; i < priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_disable(&ring->napi); @@ -2888,9 +2870,9 @@ static void bcmgenet_netif_stop(struct net_device *dev) netif_tx_stop_all_queues(dev); phy_stop(priv->phydev); - bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); + bcmgenet_intr_disable(priv); /* Wait for pending work items to complete. Since interrupts are * disabled no new work will be scheduled. From d215dbac48ab9e77c680fcd28863ccc227a5657e Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:16 -0700 Subject: [PATCH 1399/2177] net: bcmgenet: rework bcmgenet_netif_start and bcmgenet_netif_stop This commit consolidates more common functionality from bcmgenet_close and bcmgenet_suspend into bcmgenet_netif_stop and modifies the start and stop sequences to better suit the design of the GENET hardware. Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 49 +++++++------------ 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 88aacf3bf44f..747224714394 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2763,11 +2763,11 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Start the network engine */ bcmgenet_enable_rx_napi(priv); - bcmgenet_enable_tx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); netif_tx_start_all_queues(dev); + bcmgenet_enable_tx_napi(priv); /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); @@ -2868,10 +2868,19 @@ static void bcmgenet_netif_stop(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); + bcmgenet_disable_tx_napi(priv); netif_tx_stop_all_queues(dev); + + /* Disable MAC receive */ + umac_enable_set(priv, CMD_RX_EN, false); + + bcmgenet_dma_teardown(priv); + + /* Disable MAC transmit. TX DMA disabled must be done before this */ + umac_enable_set(priv, CMD_TX_EN, false); + phy_stop(priv->phydev); bcmgenet_disable_rx_napi(priv); - bcmgenet_disable_tx_napi(priv); bcmgenet_intr_disable(priv); /* Wait for pending work items to complete. Since interrupts are @@ -2883,12 +2892,16 @@ static void bcmgenet_netif_stop(struct net_device *dev) priv->old_speed = -1; priv->old_duplex = -1; priv->old_pause = -1; + + /* tx reclaim */ + bcmgenet_tx_reclaim_all(dev); + bcmgenet_fini_dma(priv); } static int bcmgenet_close(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - int ret; + int ret = 0; netif_dbg(priv, ifdown, dev, "bcmgenet_close\n"); @@ -2897,20 +2910,6 @@ static int bcmgenet_close(struct net_device *dev) /* Really kill the PHY state machine and disconnect from it */ phy_disconnect(priv->phydev); - /* Disable MAC receive */ - umac_enable_set(priv, CMD_RX_EN, false); - - ret = bcmgenet_dma_teardown(priv); - if (ret) - return ret; - - /* Disable MAC transmit. TX DMA disabled must be done before this */ - umac_enable_set(priv, CMD_TX_EN, false); - - /* tx reclaim */ - bcmgenet_tx_reclaim_all(dev); - bcmgenet_fini_dma(priv); - free_irq(priv->irq0, priv); free_irq(priv->irq1, priv); @@ -3522,7 +3521,7 @@ static int bcmgenet_suspend(struct device *d) { struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); - int ret; + int ret = 0; if (!netif_running(dev)) return 0; @@ -3534,20 +3533,6 @@ static int bcmgenet_suspend(struct device *d) netif_device_detach(dev); - /* Disable MAC receive */ - umac_enable_set(priv, CMD_RX_EN, false); - - ret = bcmgenet_dma_teardown(priv); - if (ret) - return ret; - - /* Disable MAC transmit. TX DMA disabled must be done before this */ - umac_enable_set(priv, CMD_TX_EN, false); - - /* tx reclaim */ - bcmgenet_tx_reclaim_all(dev); - bcmgenet_fini_dma(priv); - /* Prepare the device for Wake-on-LAN and switch to the slow clock */ if (device_may_wakeup(d) && priv->wolopts) { ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC); From b0447ecb533270cf857ebee1133cb8ff67115423 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:17 -0700 Subject: [PATCH 1400/2177] net: bcmgenet: relax lock constraints to reduce IRQ latency Since the ring locks are not used in a hard IRQ context it is often not necessary to disable global IRQs while waiting on a lock. Using less restrictive lock and unlock calls improves the real-time responsiveness of the system. Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 747224714394..91f52c1b5108 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1405,11 +1405,10 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { unsigned int released; - unsigned long flags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock_bh(&ring->lock); released = __bcmgenet_tx_reclaim(dev, ring); - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_bh(&ring->lock); return released; } @@ -1420,15 +1419,14 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) container_of(napi, struct bcmgenet_tx_ring, napi); unsigned int work_done = 0; struct netdev_queue *txq; - unsigned long flags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring); if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { txq = netdev_get_tx_queue(ring->priv->dev, ring->queue); netif_tx_wake_queue(txq); } - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); if (work_done == 0) { napi_complete(napi); @@ -1523,7 +1521,6 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) struct bcmgenet_tx_ring *ring = NULL; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; - unsigned long flags = 0; int nr_frags, index; dma_addr_t mapping; unsigned int size; @@ -1550,7 +1547,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) nr_frags = skb_shinfo(skb)->nr_frags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); if (ring->free_bds <= (nr_frags + 1)) { if (!netif_tx_queue_stopped(txq)) { netif_tx_stop_queue(txq); @@ -1645,7 +1642,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); out: - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); return ret; @@ -2520,17 +2517,16 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Interrupt bottom half */ static void bcmgenet_irq_task(struct work_struct *work) { - unsigned long flags; unsigned int status; struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); netif_dbg(priv, intr, priv->dev, "%s\n", __func__); - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); status = priv->irq0_stat; priv->irq0_stat = 0; - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irq(&priv->lock); /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) @@ -2927,7 +2923,6 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) u32 p_index, c_index, intsts, intmsk; struct netdev_queue *txq; unsigned int free_bds; - unsigned long flags; bool txq_stopped; if (!netif_msg_tx_err(priv)) @@ -2935,7 +2930,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) txq = netdev_get_tx_queue(priv->dev, ring->queue); - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); if (ring->index == DESC_INDEX) { intsts = ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); intmsk = UMAC_IRQ_TXDMA_DONE | UMAC_IRQ_TXDMA_MBDONE; @@ -2947,7 +2942,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) p_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_PROD_INDEX); txq_stopped = netif_tx_queue_stopped(txq); free_bds = ring->free_bds; - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); netif_err(priv, tx_err, priv->dev, "Ring %d queue %d status summary\n" "TX queue status: %s, interrupts: %s\n" From 484bfa1507bf71cecc7833ae7f7272d8af49badc Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:18 -0700 Subject: [PATCH 1401/2177] Revert "net: bcmgenet: Software reset EPHY after power on" With commit f7d72996e222 ("net: bcmgenet: enable loopback during UniMAC sw_reset") it is no longer necessary to force the software reset of the internal EPHY before resetting the UniMAC to ensure a clean reset. Therefore this commit reverts commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on"). Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 1 - drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 - drivers/net/ethernet/broadcom/genet/bcmmii.c | 16 ---------------- 3 files changed, 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 91f52c1b5108..54b09a01cb2c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1172,7 +1172,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, } bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); bcmgenet_phy_power_set(priv->dev, true); - bcmgenet_mii_reset(priv->dev); break; case GENET_POWER_CABLE_SENSE: diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 4c49d0b97748..35f18a8d1ce6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -711,7 +711,6 @@ int bcmgenet_mii_init(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); -void bcmgenet_mii_reset(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 18f5723be2c9..a5ae9b78389c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -121,22 +121,6 @@ static int bcmgenet_fixed_phy_link_update(struct net_device *dev, return 0; } -/* Perform a voluntary PHY software reset, since the EPHY is very finicky about - * not doing it and will start corrupting packets - */ -void bcmgenet_mii_reset(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - - if (GENET_IS_V4(priv)) - return; - - if (priv->phydev) { - phy_init_hw(priv->phydev); - phy_start_aneg(priv->phydev); - } -} - void bcmgenet_phy_power_set(struct net_device *dev, bool enable) { struct bcmgenet_priv *priv = netdev_priv(dev); From 6c97f010cee28e3f262c547215fb0e8702bdb654 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 25 Oct 2017 15:04:19 -0700 Subject: [PATCH 1402/2177] net: bcmgenet: use dev->phydev instead of priv->phydev Now that the software reset of the PHY has been removed it is no longer necessary to retain a private pointer to the phydev for use when the PHY is detached (which isn't generally safe anyway). The driver now uses the phydev member attached to the net_device. For ethtool commands that have a PHY component, an explicit check is made to prevent accessing an invalid phydev pointer when one is not attached (e.g. interface is down). Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 47 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 1 - drivers/net/ethernet/broadcom/genet/bcmmii.c | 17 +++---- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 54b09a01cb2c..9713374ebf14 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -488,15 +488,13 @@ static void bcmgenet_complete(struct net_device *dev) static int bcmgenet_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - phy_ethtool_ksettings_get(priv->phydev, cmd); + phy_ethtool_ksettings_get(dev->phydev, cmd); return 0; } @@ -504,15 +502,13 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev, static int bcmgenet_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(priv->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -1042,11 +1038,14 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) if (GENET_IS_V1(priv)) return -EOPNOTSUPP; + if (!dev->phydev) + return -ENODEV; + e->eee_enabled = p->eee_enabled; e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(priv->phydev, e); + return phy_ethtool_get_eee(dev->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -1058,12 +1057,15 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (GENET_IS_V1(priv)) return -EOPNOTSUPP; + if (!dev->phydev) + return -ENODEV; + p->eee_enabled = e->eee_enabled; if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(priv->phydev, 0); + ret = phy_init_eee(dev->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -1073,7 +1075,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(priv->phydev, e); + return phy_ethtool_set_eee(dev->phydev, e); } /* standard ethtool support functions. */ @@ -1107,7 +1109,7 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(priv->phydev); + phy_detach(priv->dev->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1192,15 +1194,13 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv, @@ -2529,7 +2529,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) - phy_mac_interrupt(priv->phydev, + phy_mac_interrupt(priv->dev->phydev, !!(status & UMAC_IRQ_LINK_UP)); } @@ -2767,7 +2767,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(priv->phydev); + phy_start(dev->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2874,7 +2874,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) /* Disable MAC transmit. TX DMA disabled must be done before this */ umac_enable_set(priv, CMD_TX_EN, false); - phy_stop(priv->phydev); + phy_stop(dev->phydev); bcmgenet_disable_rx_napi(priv); bcmgenet_intr_disable(priv); @@ -2903,7 +2903,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(priv->phydev); + phy_disconnect(dev->phydev); free_irq(priv->irq0, priv); free_irq(priv->irq1, priv); @@ -3523,7 +3523,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); if (!device_may_wakeup(d)) - phy_suspend(priv->phydev); + phy_suspend(dev->phydev); netif_device_detach(dev); @@ -3571,7 +3571,8 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(priv->phydev); + phy_init_hw(dev->phydev); + /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); @@ -3602,7 +3603,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); if (!device_may_wakeup(d)) - phy_resume(priv->phydev); + phy_resume(dev->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 35f18a8d1ce6..3c50431ccd2a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -617,7 +617,6 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; - struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index a5ae9b78389c..ba3fcfdaa0bc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -34,7 +34,7 @@ void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -166,14 +166,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) } if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(priv->phydev, + fixed_phy_set_link_update(priv->dev->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -220,7 +220,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((priv->phydev->supported & PHY_BASIC_FEATURES) == + if ((dev->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -290,7 +290,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = priv->phydev; + phydev = dev->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -301,8 +301,6 @@ int bcmgenet_mii_probe(struct net_device *dev) } } - priv->phydev = phydev; - /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -310,7 +308,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev, true); if (ret) { - phy_disconnect(priv->phydev); + phy_disconnect(dev->phydev); return ret; } @@ -320,7 +318,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - priv->phydev->irq = PHY_IGNORE_INTERRUPT; + dev->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -529,7 +527,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } - priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; From 8e8ef50bb4246d6fee9971994a6d846d9bc7ff4c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 23 Oct 2017 14:17:30 -0400 Subject: [PATCH 1403/2177] net: dsa: legacy: don't unmask port bitmaps The legacy code does not unmask the cpu_port_mask and dsa_port_mask as stated. But this is done on the error path and those masks won't be used after that. So instead of fixing the bit operation, simply remove it. Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/legacy.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index b6c88fd33d4f..93c1c43bcc58 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -272,10 +272,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; dsa_cpu_dsa_destroy(&ds->ports[port]); - - /* Clearing a bit which is not set does no harm */ - ds->cpu_port_mask |= ~(1 << port); - ds->dsa_port_mask |= ~(1 << port); } if (ds->slave_mii_bus && ds->ops->phy_read) From eaac97466ee4e0ab6ed67afbf9fc400862e64986 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 23 Oct 2017 14:17:31 -0400 Subject: [PATCH 1404/2177] net: dsa: don't unmask port bitmaps The unapply functions are called on the error path. As for dsa_port_mask, enabled_port_mask and cpu_port_mask won't be used after so there's no need to unmask the corresponding port bit from them. This makes dsa_cpu_port_unapply() and dsa_dsa_port_unapply() identical, which can be factorized later. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9e8b8aab049d..62485a57dbfc 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -260,8 +260,6 @@ static void dsa_cpu_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); dsa_cpu_dsa_destroy(port); - port->ds->cpu_port_mask &= ~BIT(port->index); - } static int dsa_user_port_apply(struct dsa_port *port) @@ -300,7 +298,6 @@ static void dsa_user_port_unapply(struct dsa_port *port) if (port->slave) { dsa_slave_destroy(port->slave); port->slave = NULL; - port->ds->enabled_port_mask &= ~(1 << port->index); } } @@ -512,7 +509,6 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); - ds->cpu_port_mask &= ~BIT(index); return PTR_ERR(tag_ops); } From 7036d26f328f12a323069eb16d965055b4cb3795 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 24 Oct 2017 21:02:09 +0800 Subject: [PATCH 1405/2177] net: hns3: fix the bug of hns3_set_txbd_baseinfo The SC bits of TX BD mean switch control. For this area, value 0 indicates no switch control, the packet is routed according to the forwarding table. Value 1 indicates that the packet is transmitted to the network bypassing the forwarding table. As HNS3 driver need support VF later, VF conmunicate with its own PF need forwarding table. This patch sets SC bits of TX BD 0 and use forwarding table. Fixes: 76ad4f0 (net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 537f6c3babb7..c6c5b2a96aaa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -716,7 +716,7 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end) HNS3_TXD_BDTYPE_M, 0); hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end); hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1); - hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 1); + hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0); } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, From 3a46f34d20d453f09defb76b11a567647939c0aa Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 24 Oct 2017 21:02:10 +0800 Subject: [PATCH 1406/2177] net: hns3: add nic_client check when initialize roce base information Roce driver works base on HNS3 driver.If insmod Roce driver before NIC driver there is a error because do not check nic_client. This patch adds nic_client check when initialize roce base information. Fixes: 46a3df9 (net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 443124177f05..2c22d3cf6d1e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4285,7 +4285,7 @@ static int hclge_init_client_instance(struct hnae3_client *client, vport->roce.client = client; } - if (hdev->roce_client) { + if (hdev->roce_client && hdev->nic_client) { ret = hclge_init_roce_base_info(vport); if (ret) goto err; From a17dcf3f0124698d1120da71574bf4c339e5a368 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 24 Oct 2017 21:02:11 +0800 Subject: [PATCH 1407/2177] net: hns3: fix a bug in hclge_uninit_client_instance HNS3 driver initialize hdev->roce_client and vport->roce.client in hclge_init_client_instance, and need set hdev->roce_client and vport->roce.client NULL. If do not set them NULL when uninit, it will fail in the scene: insmod hns3.ko, hns-roce.ko, hns-roce-hw-v3.ko successfully, but rmmod hns3.ko after rmmod hns-roce-hw-v2.ko and hns-roce.ko. This patch fixes the issue. Fixes: 46a3df9 (net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2c22d3cf6d1e..d11a9a56c7d8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4311,13 +4311,19 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { vport = &hdev->vport[i]; - if (hdev->roce_client) + if (hdev->roce_client) { hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } if (client->type == HNAE3_CLIENT_ROCE) return; - if (client->ops->uninit_instance) + if (client->ops->uninit_instance) { client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; + } } } From c3b6f755fdcd2c0d8342c01e630741928a7c62ab Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 24 Oct 2017 21:02:12 +0800 Subject: [PATCH 1408/2177] net: hns3: fix the bug when reuse command description in hclge_add_mac_vlan_tbl When reusing a command description read from HW, driver should set IN_VLD bit, WR bit and NO_INTR bit. If IN_VLD bit and NO_INTR bit are not set, the command fails and driver prints error message: [ 135.261284] hns3 0000:7d:00.0: cmdq execute failed for get_mac_vlan_cmd_status,status=2. [ 135.270983] hns3 0000:7d:00.0: add mac addr failed for cmd_send, ret =-5. This patch fixes the bug. Fixes: 46a3df9 (net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support) Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d11a9a56c7d8..0b95fbe63ac1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3600,11 +3600,11 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, resp_code, HCLGE_MAC_VLAN_ADD); } else { - mc_desc[0].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[0], false); mc_desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - mc_desc[1].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[1], false); mc_desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[2], false); mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT); memcpy(mc_desc[0].data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); From acfdf7eabea4186a386ba5e656f0c739563cb1a5 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 24 Oct 2017 19:28:26 +0530 Subject: [PATCH 1409/2177] cxgb4: fix overflow in collecting IBQ and OBQ dump Destination buffer already has offset added. So, don't add offset again. Fetch actual size of configured OBQ from hardware, instead of using hardcoded value. Fixes: 7c075ce221cf ("cxgb4: collect IBQ and OBQ dumps") Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 20 +++++++++++++------ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 16 ++++++++++++++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index c451b2e42a6c..19da54f83e52 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -146,8 +146,7 @@ static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, /* t4_read_cim_ibq will return no. of read words or error */ no_of_read_words = t4_read_cim_ibq(padap, qid, - (u32 *)((u32 *)temp_buff.data + - temp_buff.offset), qsize); + (u32 *)temp_buff.data, qsize); /* no_of_read_words is less than or equal to 0 means error */ if (no_of_read_words <= 0) { if (!no_of_read_words) @@ -204,6 +203,17 @@ int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init, return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 5); } +u32 cudbg_cim_obq_size(struct adapter *padap, int qid) +{ + u32 value; + + t4_write_reg(padap, CIM_QUEUE_CONFIG_REF_A, OBQSELECT_F | + QUENUMSELECT_V(qid)); + value = t4_read_reg(padap, CIM_QUEUE_CONFIG_CTRL_A); + value = CIMQSIZE_G(value) * 64; /* size in number of words */ + return value * sizeof(u32); +} + static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err, int qid) @@ -214,15 +224,14 @@ static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, u32 qsize; /* collect CIM OBQ */ - qsize = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32); + qsize = cudbg_cim_obq_size(padap, qid); rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); if (rc) return rc; /* t4_read_cim_obq will return no. of read words or error */ no_of_read_words = t4_read_cim_obq(padap, qid, - (u32 *)((u32 *)temp_buff.data + - temp_buff.offset), qsize); + (u32 *)temp_buff.data, qsize); /* no_of_read_words is less than or equal to 0 means error */ if (no_of_read_words <= 0) { if (!no_of_read_words) @@ -233,7 +242,6 @@ static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, cudbg_put_buff(&temp_buff, dbg_buff); return rc; } - temp_buff.size = no_of_read_words * 4; cudbg_write_and_release_buff(&temp_buff, dbg_buff); return rc; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index c4440c1d0142..df24c409c82f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -100,4 +100,5 @@ int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, struct cudbg_entity_hdr *entity_hdr); +u32 cudbg_cim_obq_size(struct adapter *padap, int qid); #endif /* __CUDBG_LIB_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 9d97080a9d17..59740ac7e46e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -82,14 +82,28 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = CIM_IBQ_SIZE * 4 * sizeof(u32); break; case CUDBG_CIM_OBQ_ULP0: + len = cudbg_cim_obq_size(adap, 0); + break; case CUDBG_CIM_OBQ_ULP1: + len = cudbg_cim_obq_size(adap, 1); + break; case CUDBG_CIM_OBQ_ULP2: + len = cudbg_cim_obq_size(adap, 2); + break; case CUDBG_CIM_OBQ_ULP3: + len = cudbg_cim_obq_size(adap, 3); + break; case CUDBG_CIM_OBQ_SGE: + len = cudbg_cim_obq_size(adap, 4); + break; case CUDBG_CIM_OBQ_NCSI: + len = cudbg_cim_obq_size(adap, 5); + break; case CUDBG_CIM_OBQ_RXQ0: + len = cudbg_cim_obq_size(adap, 6); + break; case CUDBG_CIM_OBQ_RXQ1: - len = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32); + len = cudbg_cim_obq_size(adap, 7); break; case CUDBG_EDC0: value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); From 3c91b0c1de8d013490bbc41ce9ee8810ea5baddd Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 24 Oct 2017 17:14:10 +0200 Subject: [PATCH 1410/2177] net: dsa: lan9303: Do not disable switch fabric port 0 at .probe Make the LAN9303 work when lan9303_probe() is called twice. For some unknown reason the LAN9303 switch fail to forward data when switch fabric port 0 TX is disabled during probe. (Write of LAN9303_MAC_TX_CFG_0 in lan9303_disable_processing_port().) In that situation the switch fabric seem to receive frames, because the ALR is learning addresses. But no frames are transmitted on any of the ports. In our system lan9303_probe() is called twice, first time dsa_register_switch() return -EPROBE_DEFER. As an experiment, modified the code to skip writing LAN9303_MAC_TX_CFG_0, port 0 during the first probe. Then the switch works as expected. Resolve the problem by not calling lan9303_disable_processing_port() on port 0 during probe. Ports 1 and 2 are still disabled. Although unsatisfying that the exact failure mechanism is not known, the patch should not cause any harm. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 87f919f0e641..4c412bd52319 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -818,7 +818,7 @@ static int lan9303_disable_processing(struct lan9303 *chip) { int p; - for (p = 0; p < LAN9303_NUM_PORTS; p++) { + for (p = 1; p < LAN9303_NUM_PORTS; p++) { int ret = lan9303_disable_processing_port(chip, p); if (ret) From c1eef220c1760762753b602c382127bfccee226d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 24 Oct 2017 15:30:37 -0700 Subject: [PATCH 1411/2177] vsock: always call vsock_init_tables() Although CONFIG_VSOCKETS_DIAG depends on CONFIG_VSOCKETS, vsock_init_tables() is not always called, it is called only if other modules call its caller. Therefore if we only enable CONFIG_VSOCKETS_DIAG, it would crash kernel on uninitialized vsock_bind_table. This patch fixes it by moving vsock_init_tables() to its own module_init(). Fixes: 413a4317aca7 ("VSOCK: add sock_diag interface") Reported-by: syzkaller bot Cc: Stefan Hajnoczi Cc: Jorgen Hansen Signed-off-by: Cong Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 98359c19522f..5d28abf87fbf 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -195,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk) return __vsock_bind(sk, &local_addr); } -static void vsock_init_tables(void) +static int __init vsock_init_tables(void) { int i; @@ -204,6 +204,7 @@ static void vsock_init_tables(void) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) INIT_LIST_HEAD(&vsock_connected_table[i]); + return 0; } static void __vsock_insert_bound(struct list_head *list, @@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) vsock_proto.owner = owner; transport = t; - vsock_init_tables(); - vsock_device.minor = MISC_DYNAMIC_MINOR; err = misc_register(&vsock_device); if (err) { @@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void) } EXPORT_SYMBOL_GPL(vsock_core_get_transport); +module_init(vsock_init_tables); + MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); MODULE_VERSION("1.0.2.0-k"); From e233df01576bba9f5bafacccd571353b72152bd5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 24 Oct 2017 15:44:49 -0700 Subject: [PATCH 1412/2177] tipc: fix a dangling pointer tsk->group is set to grp earlier, but we forget to unset it after grp is freed. Fixes: 75da2163dbb6 ("tipc: introduce communication groups") Reported-by: syzkaller bot Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b3b72d8e9543..ea61c32f6b80 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2756,8 +2756,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) seq.upper = seq.lower; tipc_nametbl_build_group(net, grp, mreq->type, domain); rc = tipc_sk_publish(tsk, mreq->scope, &seq); - if (rc) + if (rc) { tipc_group_delete(net, grp); + tsk->group = NULL; + } /* Eliminate any risk that a broadcast overtakes the sent JOIN */ tsk->mc_method.rcast = true; From 3fc27b71b894f5e2ad611297d5817b6092c96626 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 24 Oct 2017 20:11:28 -0700 Subject: [PATCH 1413/2177] tools: bpftool: try to mount bpffs if required for pinning objects One possible cause of failure for `bpftool {prog|map} pin * file FILE` is the FILE not being in an eBPF virtual file system (bpffs). In this case, make bpftool attempt to mount bpffs on the parent directory of the FILE. Then, if this operation is successful, try again to pin the object. The code for mnt_bpffs() is a copy of function bpf_mnt_fs() from iproute2 package (under lib/bpf.c, taken at commit 4b73d52f8a81), with modifications regarding handling of error messages. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 71 +++++++++++++++++++++++++++++++++----- tools/bpf/bpftool/main.h | 2 ++ 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b2533f1cae3e..f0288269dae8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,37 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +static int mnt_bpffs(const char *target, char *buff, size_t bufflen) +{ + bool bind_done = false; + + while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { + if (errno != EINVAL || bind_done) { + snprintf(buff, bufflen, + "mount --make-private %s failed: %s", + target, strerror(errno)); + return -1; + } + + if (mount(target, target, "none", MS_BIND, NULL)) { + snprintf(buff, bufflen, + "mount --bind %s %s failed: %s", + target, target, strerror(errno)); + return -1; + } + + bind_done = true; + } + + if (mount("bpf", target, "bpf", 0, "mode=0700")) { + snprintf(buff, bufflen, "mount -t bpf bpf %s failed: %s", + target, strerror(errno)); + return -1; + } + + return 0; +} + int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) { enum bpf_obj_type type; @@ -89,8 +121,11 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) { + char err_str[ERR_MAX_LEN]; unsigned int id; char *endptr; + char *file; + char *dir; int err; int fd; @@ -117,16 +152,36 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) } err = bpf_obj_pin(fd, *argv); - close(fd); - if (err) { - p_err("can't pin the object (%s): %s", *argv, - errno == EACCES && !is_bpffs(dirname(*argv)) ? - "directory not in bpf file system (bpffs)" : - strerror(errno)); - return -1; + if (!err) + goto out_close; + + file = malloc(strlen(*argv) + 1); + strcpy(file, *argv); + dir = dirname(file); + + if (errno != EPERM || is_bpffs(dir)) { + p_err("can't pin the object (%s): %s", *argv, strerror(errno)); + goto out_free; } - return 0; + /* Attempt to mount bpffs, then retry pinning. */ + err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + if (!err) { + err = bpf_obj_pin(fd, *argv); + if (err) + p_err("can't pin the object (%s): %s", *argv, + strerror(errno)); + } else { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system to pin the object (%s): %s", + *argv, err_str); + } + +out_free: + free(file); +out_close: + close(fd); + return err; } const char *get_fd_type_name(enum bpf_obj_type type) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 2f94bed03a8d..d315d01be645 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -51,6 +51,8 @@ #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) #define BAD_ARG() ({ p_err("what is '%s'?\n", *argv); -1; }) +#define ERR_MAX_LEN 1024 + #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" #define HELP_SPEC_PROGRAM \ From 145686baab68e9c7594fe9269f47da479c25ad79 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Oct 2017 11:01:44 +0200 Subject: [PATCH 1414/2177] smc: fix mutex unlocks during link group creation Link group creation is synchronized with the smc_create_lgr_pending lock. In smc_listen_work() this mutex is sometimes unlocked, even though it has not been locked before. This issue will surface in presence of the SMC rendezvous code. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 745f145d4c4d..70f7640f5090 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -808,7 +808,7 @@ static void smc_listen_work(struct work_struct *work) rc = local_contact; if (rc == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - goto decline_rdma; + goto decline_rdma_unlock; } link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -816,7 +816,7 @@ static void smc_listen_work(struct work_struct *work) rc = smc_buf_create(new_smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma; + goto decline_rdma_unlock; } smc_close_init(new_smc); @@ -831,7 +831,7 @@ static void smc_listen_work(struct work_struct *work) buf_desc->mr_rx[SMC_SINGLE_LINK]); if (rc) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma; + goto decline_rdma_unlock; } } } @@ -839,15 +839,15 @@ static void smc_listen_work(struct work_struct *work) rc = smc_clc_send_accept(new_smc, local_contact); if (rc) - goto out_err; + goto out_err_unlock; /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM); if (reason_code < 0) - goto out_err; + goto out_err_unlock; if (reason_code > 0) - goto decline_rdma; + goto decline_rdma_unlock; smc_conn_save_peer_info(new_smc, &cclc); if (local_contact == SMC_FIRST_CONTACT) smc_link_save_peer_info(link, &cclc); @@ -855,34 +855,34 @@ static void smc_listen_work(struct work_struct *work) rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); if (rc) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma; + goto decline_rdma_unlock; } if (local_contact == SMC_FIRST_CONTACT) { rc = smc_ib_ready_link(link); if (rc) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma; + goto decline_rdma_unlock; } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); if (reason_code < 0) { /* peer is not aware of a problem */ rc = reason_code; - goto out_err; + goto out_err_unlock; } if (reason_code > 0) - goto decline_rdma; + goto decline_rdma_unlock; } smc_tx_init(new_smc); + mutex_unlock(&smc_create_lgr_pending); out_connected: sk_refcnt_debug_inc(newsmcsk); if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; enqueue: - mutex_unlock(&smc_create_lgr_pending); lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { smc_accept_enqueue(&lsmc->sk, newsmcsk); @@ -896,6 +896,8 @@ enqueue: sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ return; +decline_rdma_unlock: + mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ smc_conn_free(&new_smc->conn); @@ -907,6 +909,8 @@ decline_rdma: } goto out_connected; +out_err_unlock: + mutex_unlock(&smc_create_lgr_pending); out_err: newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); From 60e2a7780793bae0debc275a9ccd57f7da0cf195 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Oct 2017 11:01:45 +0200 Subject: [PATCH 1415/2177] tcp: TCP experimental option for SMC The SMC protocol [1] relies on the use of a new TCP experimental option [2, 3]. With this option, SMC capabilities are exchanged between peers during the TCP three way handshake. This patch adds support for this experimental option to TCP. References: [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 [2] Shared Use of TCP Experimental Options RFC 6994: https://tools.ietf.org/rfc/rfc6994.txt [3] IANA ExID SMCR: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 ++++-- include/net/inet_sock.h | 3 +- include/net/tcp.h | 7 +++++ net/ipv4/tcp.c | 6 ++++ net/ipv4/tcp_input.c | 35 ++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 19 ++++++++++++ net/ipv4/tcp_output.c | 63 ++++++++++++++++++++++++++++++++++++++-- 7 files changed, 136 insertions(+), 6 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 173a7c2f9636..8c431385b272 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -98,7 +98,8 @@ struct tcp_options_received { tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */ dsack : 1, /* D-SACK is scheduled */ wscale_ok : 1, /* Wscale seen on SYN packet */ - sack_ok : 4, /* SACK seen on SYN packet */ + sack_ok : 3, /* SACK seen on SYN packet */ + smc_ok : 1, /* SMC seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ u8 num_sacks; /* Number of SACK blocks */ @@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; +#if IS_ENABLED(CONFIG_SMC) + rx_opt->smc_ok = 0; +#endif } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -229,7 +233,8 @@ struct tcp_sock { syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + syn_smc:1; /* SYN includes SMC */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 425752f768d2..c49938d1481a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -92,7 +92,8 @@ struct inet_request_sock { wscale_ok : 1, ecn_ok : 1, acked : 1, - no_srccheck: 1; + no_srccheck: 1, + smc_ok : 1; kmemcheck_bitfield_end(flags); u32 ir_mark; union { diff --git a/include/net/tcp.h b/include/net/tcp.h index 2392f74074e7..285bc82dea41 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); * experimental options. See draft-ietf-tcpm-experimental-options-00.txt */ #define TCPOPT_FASTOPEN_MAGIC 0xF989 +#define TCPOPT_SMC_MAGIC 0xE2D4C3D9 /* * TCP option lengths @@ -203,6 +204,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_MD5SIG 18 #define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 +#define TCPOLEN_EXP_SMC_BASE 6 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -213,6 +215,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 +#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -2108,4 +2111,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); } + +#if IS_ENABLED(CONFIG_SMC) +extern struct static_key_false tcp_have_smc; +#endif #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8f36277e82e9..f6e1c00e300e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -270,6 +270,7 @@ #include #include #include +#include #include #include @@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); +#if IS_ENABLED(CONFIG_SMC) +DEFINE_STATIC_KEY_FALSE(tcp_have_smc); +EXPORT_SYMBOL(tcp_have_smc); +#endif + /* * Current number of TCP sockets. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 893286db4623..337f6011528a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,6 +76,8 @@ #include #include #include +#include +#include int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; @@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, foc->exp = exp_opt; } +static void smc_parse_options(const struct tcphdr *th, + struct tcp_options_received *opt_rx, + const unsigned char *ptr, + int opsize) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (th->syn && !(opsize & 1) && + opsize >= TCPOLEN_EXP_SMC_BASE && + get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) + opt_rx->smc_ok = 1; + } +#endif +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net, tcp_parse_fastopen_option(opsize - TCPOLEN_EXP_FASTOPEN_BASE, ptr + 2, th->syn, foc, true); + else + smc_parse_options(th, opt_rx, ptr, + opsize); break; } @@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return false; } +static void smc_check_reset_syn(struct tcp_sock *tp) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc && !tp->rx_opt.smc_ok) + tp->syn_smc = 0; + } +#endif +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { @@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; + smc_check_reset_syn(tp); + smp_mb(); tcp_finish_connect(sk, skb); @@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req, ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_num = ntohs(tcp_hdr(skb)->dest); ireq->ir_mark = inet_request_mark(sk, skb); +#if IS_ENABLED(CONFIG_SMC) + ireq->smc_ok = rx_opt->smc_ok; +#endif } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a952357054f4..056009f1c14f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); +static void smc_check_reset_syn_req(struct tcp_sock *oldtp, + struct request_sock *req, + struct tcp_sock *newtp) +{ +#if IS_ENABLED(CONFIG_SMC) + struct inet_request_sock *ireq; + + if (static_branch_unlikely(&tcp_have_smc)) { + ireq = inet_rsk(req); + if (oldtp->syn_smc && !ireq->smc_ok) + newtp->syn_smc = 0; + } +#endif +} + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); + struct tcp_sock *oldtp = tcp_sk(sk); + + smc_check_reset_syn_req(oldtp, req, newtp); /* Now setup tcp_sock */ newtp->pred_flags = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1f01f4c9c738..c8fc512e0bbb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_FAST_OPEN_COOKIE (1 << 8) +#define OPTION_SMC (1 << 9) + +static void smc_options_write(__be32 *ptr, u16 *options) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (unlikely(OPTION_SMC & *options)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_EXP << 8) | + (TCPOLEN_EXP_SMC_BASE)); + *ptr++ = htonl(TCPOPT_SMC_MAGIC); + } + } +#endif +} struct tcp_out_options { u16 options; /* bit field of OPTION_* */ @@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, } ptr += (len + 3) >> 2; } + + smc_options_write(ptr, &options); +} + +static void smc_set_option(const struct tcp_sock *tp, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc) { + if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; + } + } + } +#endif +} + +static void smc_set_option_cond(const struct tcp_sock *tp, + const struct inet_request_sock *ireq, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc && ireq->smc_ok) { + if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; + } + } + } +#endif } /* Compute TCP options for SYN packets. This is not the final @@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } } + smc_set_option(tp, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } /* Set up TCP options for SYN-ACKs. */ -static unsigned int tcp_synack_options(struct request_sock *req, +static unsigned int tcp_synack_options(const struct sock *sk, + struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, @@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req, } } + smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } @@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); - tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + - sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); From c5c1cc9c522fc337601213afeb39c3df2eb92d04 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Oct 2017 11:01:46 +0200 Subject: [PATCH 1416/2177] smc: add SMC rendezvous protocol The SMC protocol [1] uses a rendezvous protocol to negotiate SMC capability between peers. The current Linux implementation does not yet use this rendezvous protocol and, thus, is not compliant to RFC7609 and incompatible with other SMC implementations like in zOS. This patch adds support for the SMC rendezvous protocol. It uses a new TCP experimental option. With this option, SMC capabilities are exchanged between the peers during the TCP three way handshake. [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 70f7640f5090..6451c5013e06 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc) int rc = 0; u8 ibport; + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { + /* peer has not signalled SMC-capability */ + smc->use_fallback = true; + goto out_connected; + } + /* IPSec connections opt out of SMC-R optimizations */ if (using_ipsec(smc)) { reason_code = SMC_CLC_DECL_IPSEC; @@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, } smc_copy_sock_settings_to_clc(smc); + tcp_sk(smc->clcsock->sk)->syn_smc = 1; rc = kernel_connect(smc->clcsock, addr, alen, flags); if (rc) goto out; @@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work) u8 prefix_len; u8 ibport; + /* check if peer is smc capable */ + if (!tcp_sk(newclcsock->sk)->syn_smc) { + new_smc->use_fallback = true; + goto out_connected; + } + /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ @@ -967,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog) * them to the clc socket -- copy smc socket options to clc socket */ smc_copy_sock_settings_to_clc(smc); + tcp_sk(smc->clcsock->sk)->syn_smc = 1; rc = kernel_listen(smc->clcsock, backlog); if (rc) @@ -1409,6 +1423,7 @@ static int __init smc_init(void) goto out_sock; } + static_branch_enable(&tcp_have_smc); return 0; out_sock: @@ -1433,6 +1448,7 @@ static void __exit smc_exit(void) list_del_init(&lgr->list); smc_lgr_free(lgr); /* free link group */ } + static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); proto_unregister(&smc_proto); From 2fc5f83b92ba93f8f7119461d998bcdb1ac519ac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Oct 2017 06:31:35 -0500 Subject: [PATCH 1417/2177] net: xfrm_user: use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 45 ++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f7a12aa15086..bbc390ec6d29 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1146,13 +1146,14 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; + int err; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) - BUG(); + err = build_spdinfo(r_skb, net, sportid, seq, *flags); + BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } @@ -1204,13 +1205,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; + int err; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) - BUG(); + err = build_sadinfo(r_skb, net, sportid, seq, *flags); + BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } @@ -1957,8 +1959,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; - if (build_aevent(r_skb, x, &c) < 0) - BUG(); + err = build_aevent(r_skb, x, &c); + BUG_ON(err < 0); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); @@ -2385,6 +2388,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, { struct net *net = &init_net; struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), GFP_ATOMIC); @@ -2392,8 +2396,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0) - BUG(); + err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } @@ -2617,13 +2621,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event { struct net *net = xs_net(x); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_aevent(skb, x, c) < 0) - BUG(); + err = build_aevent(skb, x, c); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } @@ -2830,13 +2835,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, { struct net *net = xs_net(x); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp) < 0) - BUG(); + err = build_acquire(skb, x, xt, xp); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } @@ -2945,13 +2951,14 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct { struct net *net = xp_net(xp); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_polexpire(skb, xp, dir, c) < 0) - BUG(); + err = build_polexpire(skb, xp, dir, c); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } @@ -3106,13 +3113,14 @@ static int xfrm_send_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_report(skb, proto, sel, addr) < 0) - BUG(); + err = build_report(skb, proto, sel, addr); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } @@ -3153,6 +3161,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, { struct net *net = xs_net(x); struct sk_buff *skb; + int err; if (x->id.proto != IPPROTO_ESP) return -EINVAL; @@ -3164,8 +3173,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (skb == NULL) return -ENOMEM; - if (build_mapping(skb, x, ipaddr, sport) < 0) - BUG(); + err = build_mapping(skb, x, ipaddr, sport); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } From 32d18ab1d44166cbb1dcaf8b359183636734ddb1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 24 Oct 2017 12:41:01 +0200 Subject: [PATCH 1418/2177] net: updating dst lastusage is an unlikely event. Since commit 0da4af00b2ed ("ipv6: only update __use and lastusetime once per jiffy at most"), updating the dst lastuse field is an unlikely action: it happens at most once per jiffy, out of potentially millions of calls per second. Mark explicitly the code as such, and let the compiler generate better code. Note: gcc 7.2 and several older versions do actually generate different - better - code when the unlikely() hint is in place, avoid jump in the fast path and keeping better code locality. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/dst.h b/include/net/dst.h index 5047e8053d6c..2f53ecc2c296 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -257,7 +257,7 @@ static inline void dst_hold(struct dst_entry *dst) static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - if (time != dst->lastuse) { + if (unlikely(time != dst->lastuse)) { dst->__use++; dst->lastuse = time; } From b5beecb580376cd8d959eb990abece6a748a3ce3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 24 Oct 2017 19:57:12 +0200 Subject: [PATCH 1419/2177] net: stmmac: snps, dwmac-mdio MDIOs are automatically registered stmmac bindings docs said that its mdio node must have compatible = "snps,dwmac-mdio"; Since dwmac-sun8i does not have any good reasons to not doing it, all their MDIO node must have it. Since these compatible is automatically registered, dwmac-sun8i compatible does not need to be in need_mdio_ids. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 8a280b48e3a9..9e616da0745d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -311,10 +311,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, bool mdio = true; static const struct of_device_id need_mdio_ids[] = { { .compatible = "snps,dwc-qos-ethernet-4.10" }, - { .compatible = "allwinner,sun8i-a83t-emac" }, - { .compatible = "allwinner,sun8i-h3-emac" }, - { .compatible = "allwinner,sun8i-v3s-emac" }, - { .compatible = "allwinner,sun50i-a64-emac" }, {}, }; From 634db83b82658f4641d8026e340c6027cf74a6bb Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 24 Oct 2017 19:57:13 +0200 Subject: [PATCH 1420/2177] net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs The Allwinner H3 SoC have two distinct MDIO bus, only one could be active at the same time. The selection of the active MDIO bus are done via some bits in the EMAC register of the system controller. This patch implement this MDIO switch via a custom MDIO-mux. Signed-off-by: Corentin Labbe Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 1 + .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 353 +++++++++++------- 2 files changed, 224 insertions(+), 130 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 97035766c291..e28c0d2c58e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -159,6 +159,7 @@ config DWMAC_SUN8I tristate "Allwinner sun8i GMAC support" default ARCH_SUNXI depends on OF && (ARCH_SUNXI || COMPILE_TEST) + select MDIO_BUS_MUX ---help--- Support for Allwinner H3 A83T A64 EMAC ethernet controllers. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 39c2122a4f26..b3eb344bb158 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -41,14 +42,14 @@ * This value is used for disabling properly EMAC * and used as a good starting value in case of the * boot process(uboot) leave some stuff. - * @internal_phy: Does the MAC embed an internal PHY + * @soc_has_internal_phy: Does the MAC embed an internal PHY * @support_mii: Does the MAC handle MII * @support_rmii: Does the MAC handle RMII * @support_rgmii: Does the MAC handle RGMII */ struct emac_variant { u32 default_syscon_value; - int internal_phy; + bool soc_has_internal_phy; bool support_mii; bool support_rmii; bool support_rgmii; @@ -61,7 +62,8 @@ struct emac_variant { * @rst_ephy: reference to the optional EPHY reset for the internal PHY * @variant: reference to the current board variant * @regmap: regmap for using the syscon - * @use_internal_phy: Does the current PHY choice imply using the internal PHY + * @internal_phy_powered: Does the internal PHY is enabled + * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { struct clk *tx_clk; @@ -70,12 +72,13 @@ struct sunxi_priv_data { struct reset_control *rst_ephy; const struct emac_variant *variant; struct regmap *regmap; - bool use_internal_phy; + bool internal_phy_powered; + void *mux_handle; }; static const struct emac_variant emac_variant_h3 = { .default_syscon_value = 0x58000, - .internal_phy = PHY_INTERFACE_MODE_MII, + .soc_has_internal_phy = true, .support_mii = true, .support_rmii = true, .support_rgmii = true @@ -83,20 +86,20 @@ static const struct emac_variant emac_variant_h3 = { static const struct emac_variant emac_variant_v3s = { .default_syscon_value = 0x38000, - .internal_phy = PHY_INTERFACE_MODE_MII, + .soc_has_internal_phy = true, .support_mii = true }; static const struct emac_variant emac_variant_a83t = { .default_syscon_value = 0, - .internal_phy = 0, + .soc_has_internal_phy = false, .support_mii = true, .support_rgmii = true }; static const struct emac_variant emac_variant_a64 = { .default_syscon_value = 0, - .internal_phy = 0, + .soc_has_internal_phy = false, .support_mii = true, .support_rmii = true, .support_rgmii = true @@ -195,6 +198,9 @@ static const struct emac_variant emac_variant_a64 = { #define H3_EPHY_LED_POL BIT(17) /* 1: active low, 0: active high */ #define H3_EPHY_SHUTDOWN BIT(16) /* 1: shutdown, 0: power up */ #define H3_EPHY_SELECT BIT(15) /* 1: internal PHY, 0: external PHY */ +#define H3_EPHY_MUX_MASK (H3_EPHY_SHUTDOWN | H3_EPHY_SELECT) +#define DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID 1 +#define DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID 2 /* H3/A64 specific bits */ #define SYSCON_RMII_EN BIT(13) /* 1: enable RMII (overrides EPIT) */ @@ -634,6 +640,159 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) return 0; } +/* Search in mdio-mux node for internal PHY node and get its clk/reset */ +static int get_ephy_nodes(struct stmmac_priv *priv) +{ + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + struct device_node *mdio_mux, *iphynode; + struct device_node *mdio_internal; + int ret; + + mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux"); + if (!mdio_mux) { + dev_err(priv->device, "Cannot get mdio-mux node\n"); + return -ENODEV; + } + + mdio_internal = of_find_compatible_node(mdio_mux, NULL, + "allwinner,sun8i-h3-mdio-internal"); + if (!mdio_internal) { + dev_err(priv->device, "Cannot get internal_mdio node\n"); + return -ENODEV; + } + + /* Seek for internal PHY */ + for_each_child_of_node(mdio_internal, iphynode) { + gmac->ephy_clk = of_clk_get(iphynode, 0); + if (IS_ERR(gmac->ephy_clk)) + continue; + gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL); + if (IS_ERR(gmac->rst_ephy)) { + ret = PTR_ERR(gmac->rst_ephy); + if (ret == -EPROBE_DEFER) + return ret; + continue; + } + dev_info(priv->device, "Found internal PHY node\n"); + return 0; + } + return -ENODEV; +} + +static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv) +{ + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + int ret; + + if (gmac->internal_phy_powered) { + dev_warn(priv->device, "Internal PHY already powered\n"); + return 0; + } + + dev_info(priv->device, "Powering internal PHY\n"); + ret = clk_prepare_enable(gmac->ephy_clk); + if (ret) { + dev_err(priv->device, "Cannot enable internal PHY\n"); + return ret; + } + + /* Make sure the EPHY is properly reseted, as U-Boot may leave + * it at deasserted state, and thus it may fail to reset EMAC. + */ + reset_control_assert(gmac->rst_ephy); + + ret = reset_control_deassert(gmac->rst_ephy); + if (ret) { + dev_err(priv->device, "Cannot deassert internal phy\n"); + clk_disable_unprepare(gmac->ephy_clk); + return ret; + } + + gmac->internal_phy_powered = true; + + return 0; +} + +static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac) +{ + if (!gmac->internal_phy_powered) + return 0; + + clk_disable_unprepare(gmac->ephy_clk); + reset_control_assert(gmac->rst_ephy); + gmac->internal_phy_powered = false; + return 0; +} + +/* MDIO multiplexing switch function + * This function is called by the mdio-mux layer when it thinks the mdio bus + * multiplexer needs to switch. + * 'current_child' is the current value of the mux register + * 'desired_child' is the value of the 'reg' property of the target child MDIO + * node. + * The first time this function is called, current_child == -1. + * If current_child == desired_child, then the mux is already set to the + * correct bus. + */ +static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, + void *data) +{ + struct stmmac_priv *priv = data; + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + u32 reg, val; + int ret = 0; + bool need_power_ephy = false; + + if (current_child ^ desired_child) { + regmap_read(gmac->regmap, SYSCON_EMAC_REG, ®); + switch (desired_child) { + case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID: + dev_info(priv->device, "Switch mux to internal PHY"); + val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT; + + need_power_ephy = true; + break; + case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID: + dev_info(priv->device, "Switch mux to external PHY"); + val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN; + need_power_ephy = false; + break; + default: + dev_err(priv->device, "Invalid child ID %x\n", + desired_child); + return -EINVAL; + } + regmap_write(gmac->regmap, SYSCON_EMAC_REG, val); + if (need_power_ephy) { + ret = sun8i_dwmac_power_internal_phy(priv); + if (ret) + return ret; + } else { + sun8i_dwmac_unpower_internal_phy(gmac); + } + /* After changing syscon value, the MAC need reset or it will + * use the last value (and so the last PHY set). + */ + ret = sun8i_dwmac_reset(priv); + } + return ret; +} + +static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) +{ + int ret; + struct device_node *mdio_mux; + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + + mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux"); + if (!mdio_mux) + return -ENODEV; + + ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn, + &gmac->mux_handle, priv, priv->mii); + return ret; +} + static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) { struct sunxi_priv_data *gmac = priv->plat->bsp_priv; @@ -648,35 +807,25 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) "Current syscon value is not the default %x (expect %x)\n", val, reg); - if (gmac->variant->internal_phy) { - if (!gmac->use_internal_phy) { - /* switch to external PHY interface */ - reg &= ~H3_EPHY_SELECT; - } else { - reg |= H3_EPHY_SELECT; - reg &= ~H3_EPHY_SHUTDOWN; - dev_dbg(priv->device, "Select internal_phy %x\n", reg); + if (gmac->variant->soc_has_internal_phy) { + if (of_property_read_bool(priv->plat->phy_node, + "allwinner,leds-active-low")) + reg |= H3_EPHY_LED_POL; + else + reg &= ~H3_EPHY_LED_POL; - if (of_property_read_bool(priv->plat->phy_node, - "allwinner,leds-active-low")) - reg |= H3_EPHY_LED_POL; - else - reg &= ~H3_EPHY_LED_POL; + /* Force EPHY xtal frequency to 24MHz. */ + reg |= H3_EPHY_CLK_SEL; - /* Force EPHY xtal frequency to 24MHz. */ - reg |= H3_EPHY_CLK_SEL; - - ret = of_mdio_parse_addr(priv->device, - priv->plat->phy_node); - if (ret < 0) { - dev_err(priv->device, "Could not parse MDIO addr\n"); - return ret; - } - /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY - * address. No need to mask it again. - */ - reg |= ret << H3_EPHY_ADDR_SHIFT; + ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node); + if (ret < 0) { + dev_err(priv->device, "Could not parse MDIO addr\n"); + return ret; } + /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY + * address. No need to mask it again. + */ + reg |= 1 << H3_EPHY_ADDR_SHIFT; } if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { @@ -746,81 +895,21 @@ static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac) regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg); } -static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv) -{ - struct sunxi_priv_data *gmac = priv->plat->bsp_priv; - int ret; - - if (!gmac->use_internal_phy) - return 0; - - ret = clk_prepare_enable(gmac->ephy_clk); - if (ret) { - dev_err(priv->device, "Cannot enable ephy\n"); - return ret; - } - - /* Make sure the EPHY is properly reseted, as U-Boot may leave - * it at deasserted state, and thus it may fail to reset EMAC. - */ - reset_control_assert(gmac->rst_ephy); - - ret = reset_control_deassert(gmac->rst_ephy); - if (ret) { - dev_err(priv->device, "Cannot deassert ephy\n"); - clk_disable_unprepare(gmac->ephy_clk); - return ret; - } - - return 0; -} - -static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac) -{ - if (!gmac->use_internal_phy) - return 0; - - clk_disable_unprepare(gmac->ephy_clk); - reset_control_assert(gmac->rst_ephy); - return 0; -} - -/* sun8i_power_phy() - Activate the PHY: - * In case of error, no need to call sun8i_unpower_phy(), - * it will be called anyway by sun8i_dwmac_exit() - */ -static int sun8i_power_phy(struct stmmac_priv *priv) -{ - int ret; - - ret = sun8i_dwmac_power_internal_phy(priv); - if (ret) - return ret; - - ret = sun8i_dwmac_set_syscon(priv); - if (ret) - return ret; - - /* After changing syscon value, the MAC need reset or it will use - * the last value (and so the last PHY set. - */ - ret = sun8i_dwmac_reset(priv); - if (ret) - return ret; - return 0; -} - -static void sun8i_unpower_phy(struct sunxi_priv_data *gmac) -{ - sun8i_dwmac_unset_syscon(gmac); - sun8i_dwmac_unpower_internal_phy(gmac); -} - static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) { struct sunxi_priv_data *gmac = priv; - sun8i_unpower_phy(gmac); + if (gmac->variant->soc_has_internal_phy) { + /* sun8i_dwmac_exit could be called with mdiomux uninit */ + if (gmac->mux_handle) + mdio_mux_uninit(gmac->mux_handle); + if (gmac->internal_phy_powered) + sun8i_dwmac_unpower_internal_phy(gmac); + } + + sun8i_dwmac_unset_syscon(gmac); + + reset_control_put(gmac->rst_ephy); clk_disable_unprepare(gmac->tx_clk); @@ -849,7 +938,7 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) if (!mac) return NULL; - ret = sun8i_power_phy(priv); + ret = sun8i_dwmac_set_syscon(priv); if (ret) return NULL; @@ -889,6 +978,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) struct sunxi_priv_data *gmac; struct device *dev = &pdev->dev; int ret; + struct stmmac_priv *priv; + struct net_device *ndev; ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) @@ -932,29 +1023,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } plat_dat->interface = of_get_phy_mode(dev->of_node); - if (plat_dat->interface == gmac->variant->internal_phy) { - dev_info(&pdev->dev, "Will use internal PHY\n"); - gmac->use_internal_phy = true; - gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0); - if (IS_ERR(gmac->ephy_clk)) { - ret = PTR_ERR(gmac->ephy_clk); - dev_err(&pdev->dev, "Cannot get EPHY clock: %d\n", ret); - return -EINVAL; - } - - gmac->rst_ephy = of_reset_control_get(plat_dat->phy_node, NULL); - if (IS_ERR(gmac->rst_ephy)) { - ret = PTR_ERR(gmac->rst_ephy); - if (ret == -EPROBE_DEFER) - return ret; - dev_err(&pdev->dev, "No EPHY reset control found %d\n", - ret); - return -EINVAL; - } - } else { - dev_info(&pdev->dev, "Will use external PHY\n"); - gmac->use_internal_phy = false; - } /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. @@ -973,9 +1041,34 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - sun8i_dwmac_exit(pdev, plat_dat->bsp_priv); + goto dwmac_exit; + + ndev = dev_get_drvdata(&pdev->dev); + priv = netdev_priv(ndev); + /* The mux must be registered after parent MDIO + * so after stmmac_dvr_probe() + */ + if (gmac->variant->soc_has_internal_phy) { + ret = get_ephy_nodes(priv); + if (ret) + goto dwmac_exit; + ret = sun8i_dwmac_register_mdio_mux(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to register mux\n"); + goto dwmac_mux; + } + } else { + ret = sun8i_dwmac_reset(priv); + if (ret) + goto dwmac_exit; + } return ret; +dwmac_mux: + sun8i_dwmac_unset_syscon(gmac); +dwmac_exit: + sun8i_dwmac_exit(pdev, plat_dat->bsp_priv); +return ret; } static const struct of_device_id sun8i_dwmac_match[] = { From a8ff8ccb45d37efa64476958fc5e9a8d9716b23b Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 24 Oct 2017 19:57:14 +0200 Subject: [PATCH 1421/2177] net: stmmac: sun8i: Restore the compatibles The original dwmac-sun8i DT bindings have some issue on how to handle integrated PHY and was reverted in last RC of 4.13. But now we have a solution so we need to get back that was reverted. This patch restore compatibles about dwmac-sun8i This reverts commit ad4540cc5aa3 ("net: stmmac: sun8i: Remove the compatibles") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index b3eb344bb158..e5ff734d4f9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1072,6 +1072,14 @@ return ret; } static const struct of_device_id sun8i_dwmac_match[] = { + { .compatible = "allwinner,sun8i-h3-emac", + .data = &emac_variant_h3 }, + { .compatible = "allwinner,sun8i-v3s-emac", + .data = &emac_variant_v3s }, + { .compatible = "allwinner,sun8i-a83t-emac", + .data = &emac_variant_a83t }, + { .compatible = "allwinner,sun50i-a64-emac", + .data = &emac_variant_a64 }, { } }; MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); From a78ec0d4f0eada03378413d51c92d0c1253133fa Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 25 Oct 2017 12:27:34 +0300 Subject: [PATCH 1422/2177] thunderbolt: Drop sequence number check from tb_xdomain_match() Commit 9a03c3d398c1 ("thunderbolt: Fix a couple right shifting to zero bugs") revealed an issue that was previously hidden because we never actually compared received XDomain message sequence numbers properly. The idea with these sequence numbers is that the responding host uses the same sequence number that was in the request packet which we can then check at the requesting host. However, testing against macOS it looks like it does not follow this but instead uses some other logic. Windows driver on the other hand handles it the same way than Linux. In order to be able to talk to macOS again, fix this so that we drop the whole sequence number check. This effectively works exactly the same than it worked before the aforementioned commit. This also follows the logic the original P2P networking code used. Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/thunderbolt/xdomain.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index ff8d91189e99..f25d88d4552b 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -56,7 +56,6 @@ static bool tb_xdomain_match(const struct tb_cfg_request *req, case TB_CFG_PKG_XDOMAIN_RESP: { const struct tb_xdp_header *res_hdr = pkg->buffer; const struct tb_xdp_header *req_hdr = req->request; - u32 req_seq, res_seq; if (pkg->frame.size < req->response_size / 4) return false; @@ -68,14 +67,6 @@ static bool tb_xdomain_match(const struct tb_cfg_request *req, if ((res_hdr->xd_hdr.route_lo) != req_hdr->xd_hdr.route_lo) return false; - /* Then check that the sequence number matches */ - res_seq = res_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; - res_seq >>= TB_XDOMAIN_SN_SHIFT; - req_seq = req_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; - req_seq >>= TB_XDOMAIN_SN_SHIFT; - if (res_seq != req_seq) - return false; - /* Check that the XDomain protocol matches */ if (!uuid_equal(&res_hdr->uuid, &req_hdr->uuid)) return false; From 0ff624fbfefbb96db62d100bda84e4fbdabaf628 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:03 -0700 Subject: [PATCH 1423/2177] drivers/net: 3com/3c515: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Stephen Hemminger Cc: Johannes Berg Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c515.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index c5987f518cb2..b648e3f95c01 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -367,7 +367,7 @@ static struct net_device *corkscrew_scan(int unit); static int corkscrew_setup(struct net_device *dev, int ioaddr, struct pnp_dev *idev, int card_number); static int corkscrew_open(struct net_device *dev); -static void corkscrew_timer(unsigned long arg); +static void corkscrew_timer(struct timer_list *t); static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, struct net_device *dev); static int corkscrew_rx(struct net_device *dev); @@ -627,7 +627,7 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, spin_lock_init(&vp->lock); - setup_timer(&vp->timer, corkscrew_timer, (unsigned long) dev); + timer_setup(&vp->timer, corkscrew_timer, 0); /* Read the station address from the EEPROM. */ EL3WINDOW(0); @@ -869,11 +869,11 @@ static int corkscrew_open(struct net_device *dev) return 0; } -static void corkscrew_timer(unsigned long data) +static void corkscrew_timer(struct timer_list *t) { #ifdef AUTOMEDIA - struct net_device *dev = (struct net_device *) data; - struct corkscrew_private *vp = netdev_priv(dev); + struct corkscrew_private *vp = from_timer(vp, t, timer); + struct net_device *dev = vp->our_dev; int ioaddr = dev->base_addr; unsigned long flags; int ok = 0; From 550acfb37ffeee9ff00f7f13f087ad1c33a74b29 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:14 -0700 Subject: [PATCH 1424/2177] drivers/net: can: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Allen Pais Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/can/grcan.c | 19 ++++++++----------- drivers/net/can/sja1000/peak_pcmcia.c | 6 +++--- drivers/net/can/usb/peak_usb/pcan_usb.c | 10 ++++++---- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 8570cfdaea75..897c6b113d3f 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -807,10 +807,10 @@ static irqreturn_t grcan_interrupt(int irq, void *dev_id) * is not ONGOING (TX might be stuck in ONGOING due to a harwrware bug * for single shot) */ -static void grcan_running_reset(unsigned long data) +static void grcan_running_reset(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct grcan_priv *priv = netdev_priv(dev); + struct grcan_priv *priv = from_timer(priv, t, rr_timer); + struct net_device *dev = priv->dev; struct grcan_registers __iomem *regs = priv->regs; unsigned long flags; @@ -898,10 +898,10 @@ static inline void grcan_reset_timer(struct timer_list *timer, __u32 bitrate) } /* Disable channels and schedule a running reset */ -static void grcan_initiate_running_reset(unsigned long data) +static void grcan_initiate_running_reset(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct grcan_priv *priv = netdev_priv(dev); + struct grcan_priv *priv = from_timer(priv, t, hang_timer); + struct net_device *dev = priv->dev; struct grcan_registers __iomem *regs = priv->regs; unsigned long flags; @@ -1626,11 +1626,8 @@ static int grcan_setup_netdev(struct platform_device *ofdev, spin_lock_init(&priv->lock); if (priv->need_txbug_workaround) { - setup_timer(&priv->rr_timer, grcan_running_reset, - (unsigned long)dev); - - setup_timer(&priv->hang_timer, grcan_initiate_running_reset, - (unsigned long)dev); + timer_setup(&priv->rr_timer, grcan_running_reset, 0); + timer_setup(&priv->hang_timer, grcan_initiate_running_reset, 0); } netif_napi_add(dev, &priv->napi, grcan_poll, GRCAN_NAPI_WEIGHT); diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index 4b8758e10bd4..485b19c9ae47 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -381,9 +381,9 @@ static inline void pcan_set_can_power(struct pcan_pccard *card, int onoff) /* * set leds state according to channel activity */ -static void pcan_led_timer(unsigned long arg) +static void pcan_led_timer(struct timer_list *t) { - struct pcan_pccard *card = (struct pcan_pccard *)arg; + struct pcan_pccard *card = from_timer(card, t, led_timer); struct net_device *netdev; int i, up_count = 0; u8 ccr; @@ -692,7 +692,7 @@ static int pcan_probe(struct pcmcia_device *pdev) } /* init the timer which controls the leds */ - setup_timer(&card->led_timer, pcan_led_timer, (unsigned long)card); + timer_setup(&card->led_timer, pcan_led_timer, 0); /* request the given irq */ err = request_irq(pdev->irq, &pcan_isr, IRQF_SHARED, PCC_NAME, card); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 7e10dbdded28..25a9b79cc42d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -259,10 +259,13 @@ static int pcan_usb_write_mode(struct peak_usb_device *dev, u8 onoff) /* * handle end of waiting for the device to reset */ -static void pcan_usb_restart(unsigned long arg) +static void pcan_usb_restart(struct timer_list *t) { + struct pcan_usb *pdev = from_timer(pdev, t, restart_timer); + struct peak_usb_device *dev = &pdev->dev; + /* notify candev and netdev */ - peak_usb_restart_complete((struct peak_usb_device *)arg); + peak_usb_restart_complete(dev); } /* @@ -798,8 +801,7 @@ static int pcan_usb_init(struct peak_usb_device *dev) int err; /* initialize a timer needed to wait for hardware restart */ - setup_timer(&pdev->restart_timer, pcan_usb_restart, - (unsigned long)dev); + timer_setup(&pdev->restart_timer, pcan_usb_restart, 0); /* * explicit use of dev_xxx() instead of netdev_xxx() here: From f6fd8918f04191a3e24a0717e97520b65a7d6c4e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:20 -0700 Subject: [PATCH 1425/2177] drivers/net: hamradio/yam: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Initialization was entirely missing. Cc: Jean-Paul Roubelat Cc: linux-hams@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/hamradio/yam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 7a7c5224a336..b88c5cc00a63 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -647,7 +647,7 @@ static void yam_arbitrate(struct net_device *dev) yam_start_tx(dev, yp); } -static void yam_dotimer(unsigned long dummy) +static void yam_dotimer(struct timer_list *unused) { int i; @@ -1164,7 +1164,7 @@ static int __init yam_init_driver(void) } - yam_timer.function = yam_dotimer; + timer_setup(&yam_timer, yam_dotimer, 0); yam_timer.expires = jiffies + HZ / 100; add_timer(&yam_timer); From 0eba23bbcece8f0fe925f302facbae27f086b887 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:29 -0700 Subject: [PATCH 1426/2177] drivers/net: hippi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jes Sorensen Cc: linux-hippi@sunsite.dk Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 76cc140774a2..8483f03d5a41 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1146,10 +1146,10 @@ static inline void rr_raz_rx(struct rr_private *rrpriv, } } -static void rr_timer(unsigned long data) +static void rr_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct rr_private *rrpriv = netdev_priv(dev); + struct rr_private *rrpriv = from_timer(rrpriv, t, timer); + struct net_device *dev = pci_get_drvdata(rrpriv->pci_dev); struct rr_regs __iomem *regs = rrpriv->regs; unsigned long flags; @@ -1229,7 +1229,7 @@ static int rr_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - setup_timer(&rrpriv->timer, rr_timer, (unsigned long)dev); + timer_setup(&rrpriv->timer, rr_timer, 0); rrpriv->timer.expires = RUN_AT(5*HZ); /* 5 sec. watchdog */ add_timer(&rrpriv->timer); From 3248f77fa3eec6014653166a9cd0d429e8d30890 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:38 -0700 Subject: [PATCH 1427/2177] drivers/net: netronome: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jakub Kicinski Cc: "David S. Miller" Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: Simon Horman Cc: oss-drivers@netronome.com Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 5 +++-- drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 ++- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 9 ++++----- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 7 +++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index fa0ac90ed956..f15a186f6c87 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -97,8 +97,9 @@ nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) nn->app_priv = priv; spin_lock_init(&priv->rx_filter_lock); - setup_timer(&priv->rx_filter_stats_timer, - nfp_net_filter_stats_timer, (unsigned long)nn); + priv->nn = nn; + timer_setup(&priv->rx_filter_stats_timer, + nfp_net_filter_stats_timer, 0); ret = nfp_app_nic_vnic_alloc(app, nn, id); if (ret) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 86edc0691a5f..bc604030ff6c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -209,10 +209,11 @@ struct nfp_net_bpf_priv { struct nfp_stat_pair rx_filter, rx_filter_prev; unsigned long rx_filter_change; struct timer_list rx_filter_stats_timer; + struct nfp_net *nn; spinlock_t rx_filter_lock; }; int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); -void nfp_net_filter_stats_timer(unsigned long data); +void nfp_net_filter_stats_timer(struct timer_list *t); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index fbca1ca1f39b..63c8f7847054 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,14 +51,13 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" -void nfp_net_filter_stats_timer(unsigned long data) +void nfp_net_filter_stats_timer(struct timer_list *t) { - struct nfp_net *nn = (void *)data; - struct nfp_net_bpf_priv *priv; + struct nfp_net_bpf_priv *priv = from_timer(priv, t, + rx_filter_stats_timer); + struct nfp_net *nn = priv->nn; struct nfp_stat_pair latest; - priv = nn->app_priv; - spin_lock_bh(&priv->rx_filter_lock); if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 2c9109b09faf..eddf850a6a7f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -177,9 +177,9 @@ static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline) return timed_out ? -EIO : 0; } -static void nfp_net_reconfig_timer(unsigned long data) +static void nfp_net_reconfig_timer(struct timer_list *t) { - struct nfp_net *nn = (void *)data; + struct nfp_net *nn = from_timer(nn, t, reconfig_timer); spin_lock_bh(&nn->reconfig_lock); @@ -3537,8 +3537,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev, spin_lock_init(&nn->reconfig_lock); spin_lock_init(&nn->link_status_lock); - setup_timer(&nn->reconfig_timer, - nfp_net_reconfig_timer, (unsigned long)nn); + timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0); return nn; } From 97815186d4f1ea0a14b0683ddc63f288809a94e9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:51:58 -0700 Subject: [PATCH 1428/2177] drivers/net: nuvoton: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Wan ZongShun Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/nuvoton/w90p910_ether.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 4a67c55aa9f1..052b3d2c07a1 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -253,10 +253,10 @@ static void update_linkspeed(struct net_device *dev) netif_carrier_on(dev); } -static void w90p910_check_link(unsigned long dev_id) +static void w90p910_check_link(struct timer_list *t) { - struct net_device *dev = (struct net_device *) dev_id; - struct w90p910_ether *ether = netdev_priv(dev); + struct w90p910_ether *ether = from_timer(ether, t, check_timer); + struct net_device *dev = ether->mii.dev; update_linkspeed(dev); mod_timer(ðer->check_timer, jiffies + msecs_to_jiffies(1000)); @@ -957,8 +957,7 @@ static int w90p910_ether_setup(struct net_device *dev) ether->mii.mdio_read = w90p910_mdio_read; ether->mii.mdio_write = w90p910_mdio_write; - setup_timer(ðer->check_timer, w90p910_check_link, - (unsigned long)dev); + timer_setup(ðer->check_timer, w90p910_check_link, 0); return 0; } From 9de36ccf0891fbdfcc347a34bda009977d8dc2a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:12 -0700 Subject: [PATCH 1429/2177] drivers/net: realtek: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Realtek linux nic maintainers Cc: "David S. Miller" Cc: David Howells Cc: Jay Vosburgh Cc: Allen Pais Cc: Eric Dumazet Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/atp.c | 12 +++++++----- drivers/net/ethernet/realtek/r8169.c | 7 +++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index bdc3833fab7e..7e011c1c1e6e 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -170,6 +170,7 @@ struct net_local { spinlock_t lock; struct net_device *next_module; struct timer_list timer; /* Media selection timer. */ + struct net_device *dev; /* Timer dev. */ unsigned long last_rx_time; /* Last Rx, in jiffies, to handle Rx hang. */ int saved_tx_size; unsigned int tx_unit_busy:1; @@ -184,7 +185,7 @@ struct net_local { #define TIMED_CHECKER (HZ/4) #ifdef TIMED_CHECKER #include -static void atp_timed_checker(unsigned long ignored); +static void atp_timed_checker(struct timer_list *t); #endif /* Index to functions, as function prototypes. */ @@ -438,7 +439,8 @@ static int net_open(struct net_device *dev) hardware_init(dev); - setup_timer(&lp->timer, atp_timed_checker, (unsigned long)dev); + lp->dev = dev; + timer_setup(&lp->timer, atp_timed_checker, 0); lp->timer.expires = jiffies + TIMED_CHECKER; add_timer(&lp->timer); @@ -708,11 +710,11 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) #ifdef TIMED_CHECKER /* This following code fixes a rare (and very difficult to track down) problem where the adapter forgets its ethernet address. */ -static void atp_timed_checker(unsigned long data) +static void atp_timed_checker(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; + struct net_local *lp = from_timer(lp, t, timer); + struct net_device *dev = lp->dev; long ioaddr = dev->base_addr; - struct net_local *lp = netdev_priv(dev); int tickssofar = jiffies - lp->last_rx_time; int i; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index a3c949ea7d1a..7dc4b6de31e6 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4401,10 +4401,9 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) schedule_work(&tp->wk.work); } -static void rtl8169_phy_timer(unsigned long __opaque) +static void rtl8169_phy_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)__opaque; - struct rtl8169_private *tp = netdev_priv(dev); + struct rtl8169_private *tp = from_timer(tp, t, timer); rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING); } @@ -8454,7 +8453,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? ~(RxBOVF | RxFOVF) : ~0; - setup_timer(&tp->timer, rtl8169_phy_timer, (unsigned long)dev); + timer_setup(&tp->timer, rtl8169_phy_timer, 0); tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; From c37631c7f686518157de4aa5fb456a54f27607b6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:20 -0700 Subject: [PATCH 1430/2177] drivers/net: sxgbe: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Byungho An Cc: Girish K S Cc: Vipul Pandya Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 89831adb8eb7..fd35d8004a78 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -105,9 +105,9 @@ void sxgbe_disable_eee_mode(struct sxgbe_priv_data * const priv) * If there is no data transfer and if we are not in LPI state, * then MAC Transmitter can be moved to LPI state. */ -static void sxgbe_eee_ctrl_timer(unsigned long arg) +static void sxgbe_eee_ctrl_timer(struct timer_list *t) { - struct sxgbe_priv_data *priv = (struct sxgbe_priv_data *)arg; + struct sxgbe_priv_data *priv = from_timer(priv, t, eee_ctrl_timer); sxgbe_enable_eee_mode(priv); mod_timer(&priv->eee_ctrl_timer, SXGBE_LPI_TIMER(eee_timer)); @@ -134,8 +134,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv) return false; priv->eee_active = 1; - setup_timer(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, - (unsigned long)priv); + timer_setup(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, 0); priv->eee_ctrl_timer.expires = SXGBE_LPI_TIMER(eee_timer); add_timer(&priv->eee_ctrl_timer); @@ -1002,13 +1001,13 @@ static void sxgbe_disable_mtl_engine(struct sxgbe_priv_data *priv) /** * sxgbe_tx_timer: mitigation sw timer for tx. - * @data: data pointer + * @t: timer pointer * Description: * This is the timer handler to directly invoke the sxgbe_tx_clean. */ -static void sxgbe_tx_timer(unsigned long data) +static void sxgbe_tx_timer(struct timer_list *t) { - struct sxgbe_tx_queue *p = (struct sxgbe_tx_queue *)data; + struct sxgbe_tx_queue *p = from_timer(p, t, txtimer); sxgbe_tx_queue_clean(p); } @@ -1028,8 +1027,7 @@ static void sxgbe_tx_init_coalesce(struct sxgbe_priv_data *priv) struct sxgbe_tx_queue *p = priv->txq[queue_num]; p->tx_coal_frames = SXGBE_TX_FRAMES; p->tx_coal_timer = SXGBE_COAL_TX_TIMER; - setup_timer(&p->txtimer, sxgbe_tx_timer, - (unsigned long)&priv->txq[queue_num]); + timer_setup(&p->txtimer, sxgbe_tx_timer, 0); p->txtimer.expires = SXGBE_COAL_TIMER(p->tx_coal_timer); add_timer(&p->txtimer); } From e2009be038143b4a8c7d4b51b74da63220739a74 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:42 -0700 Subject: [PATCH 1431/2177] drivers/net: wan/dscc4: Remove unused timer This removes an entirely unused timer, which avoids needing to convert it to timer_setup(). Cc: Francois Romieu Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wan/dscc4.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 64f176496da4..c0b0f525c87c 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -223,8 +223,6 @@ struct dscc4_dev_priv { u32 scc_regs[SCC_REGISTERS_MAX]; /* Cf errata DS5 p.4 */ - struct timer_list timer; - struct dscc4_pci_priv *pci_priv; spinlock_t lock; @@ -369,7 +367,6 @@ static int dscc4_close(struct net_device *); static int dscc4_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int dscc4_init_ring(struct net_device *); static void dscc4_release_ring(struct dscc4_dev_priv *); -static void dscc4_timer(unsigned long); static void dscc4_tx_timeout(struct net_device *); static irqreturn_t dscc4_irq(int irq, void *dev_id); static int dscc4_hdlc_attach(struct net_device *, unsigned short, unsigned short); @@ -983,19 +980,6 @@ err_out: return ret; }; -/* FIXME: get rid of the unneeded code */ -static void dscc4_timer(unsigned long data) -{ - struct net_device *dev = (struct net_device *)data; - struct dscc4_dev_priv *dpriv = dscc4_priv(dev); -// struct dscc4_pci_priv *ppriv; - - goto done; -done: - dpriv->timer.expires = jiffies + TX_TIMEOUT; - add_timer(&dpriv->timer); -} - static void dscc4_tx_timeout(struct net_device *dev) { /* FIXME: something is missing there */ @@ -1127,9 +1111,6 @@ static int dscc4_open(struct net_device *dev) done: netif_start_queue(dev); - setup_timer(&dpriv->timer, dscc4_timer, (unsigned long)dev); - dpriv->timer.expires = jiffies + 10*HZ; - add_timer(&dpriv->timer); netif_carrier_on(dev); return 0; @@ -1197,7 +1178,6 @@ static int dscc4_close(struct net_device *dev) { struct dscc4_dev_priv *dpriv = dscc4_priv(dev); - del_timer_sync(&dpriv->timer); netif_stop_queue(dev); scc_patchl(PowerUp | Vis, 0, dpriv, dev, CCR0); From 605ea2f9356f315c9c03d69a54dc910fde32fd71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:53 -0700 Subject: [PATCH 1432/2177] drivers/net: wan/lmc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Allen Pais Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index ae69d65158e6..37b1e0d03e31 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -99,7 +99,7 @@ static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, size_t c static void lmc_softreset(lmc_softc_t * const); static void lmc_running_reset(struct net_device *dev); static int lmc_ifdown(struct net_device * const); -static void lmc_watchdog(unsigned long data); +static void lmc_watchdog(struct timer_list *t); static void lmc_reset(lmc_softc_t * const sc); static void lmc_dec_reset(lmc_softc_t * const sc); static void lmc_driver_timeout(struct net_device *dev); @@ -636,10 +636,10 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ /* the watchdog process that cruises around */ -static void lmc_watchdog (unsigned long data) /*fold00*/ +static void lmc_watchdog(struct timer_list *t) /*fold00*/ { - struct net_device *dev = (struct net_device *)data; - lmc_softc_t *sc = dev_to_sc(dev); + lmc_softc_t *sc = from_timer(sc, t, timer); + struct net_device *dev = sc->lmc_device; int link_status; u32 ticks; unsigned long flags; @@ -1084,7 +1084,7 @@ static int lmc_open(struct net_device *dev) * Setup a timer for the watchdog on probe, and start it running. * Since lmc_ok == 0, it will be a NOP for now. */ - setup_timer(&sc->timer, lmc_watchdog, (unsigned long)dev); + timer_setup(&sc->timer, lmc_watchdog, 0); sc->timer.expires = jiffies + HZ; add_timer (&sc->timer); From 032cfd66afcc2dd2c7be89c71b020fcb15bcc37d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:59 -0700 Subject: [PATCH 1433/2177] drivers/net: wan/sdla: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Allen Pais Cc: "David S. Miller" Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/wan/sdla.c | 12 +++++------- include/linux/if_frad.h | 1 + 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 0cc48902dbb9..57ed259c8208 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -927,13 +927,10 @@ static irqreturn_t sdla_isr(int dummy, void *dev_id) return IRQ_HANDLED; } -static void sdla_poll(unsigned long device) +static void sdla_poll(struct timer_list *t) { - struct net_device *dev; - struct frad_local *flp; - - dev = (struct net_device *) device; - flp = netdev_priv(dev); + struct frad_local *flp = from_timer(flp, t, timer); + struct net_device *dev = flp->dev; if (sdla_byte(dev, SDLA_502_RCV_BUF)) sdla_receive(dev); @@ -1616,8 +1613,9 @@ static void setup_sdla(struct net_device *dev) flp->assoc = sdla_assoc; flp->deassoc = sdla_deassoc; flp->dlci_conf = sdla_dlci_conf; + flp->dev = dev; - setup_timer(&flp->timer, sdla_poll, (unsigned long)dev); + timer_setup(&flp->timer, sdla_poll, 0); flp->timer.expires = 1; } diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 46df7e565d6f..82a1b4e93570 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -83,6 +83,7 @@ struct frad_local /* fields that are used by the Sangoma SDLA cards */ struct timer_list timer; + struct net_device *dev; int type; /* adapter type */ int state; /* state of the S502/8 control latch */ int buffer; /* current buffer for S508 firmware */ From c58320de5194b57858b829b8f204ca6bc1e38e10 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:54:06 -0700 Subject: [PATCH 1434/2177] drivers/net: arcnet: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Michael Grzeschik Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/arcnet/arcnet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 13236b2cdf13..8459115d9d4e 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -382,9 +382,10 @@ static void arcdev_setup(struct net_device *dev) dev->flags = IFF_BROADCAST; } -static void arcnet_timer(unsigned long data) +static void arcnet_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; + struct arcnet_local *lp = from_timer(lp, t, timer); + struct net_device *dev = lp->dev; if (!netif_carrier_ok(dev)) { netif_carrier_on(dev); @@ -450,7 +451,7 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); - setup_timer(&lp->timer, arcnet_timer, (unsigned long)dev); + timer_setup(&lp->timer, arcnet_timer, 0); } return dev; From 4dc12ffeaeac939097a3f55c881d3dc3523dff0c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 25 Oct 2017 15:57:55 +0200 Subject: [PATCH 1435/2177] l2tp: cleanup l2tp_tunnel_delete calls l2tp_tunnel_delete does not return anything since commit 62b982eeb458 ("l2tp: fix race condition in l2tp_tunnel_delete"). But call sites of l2tp_tunnel_delete still do casts to void to avoid unused return value warnings. Kill these now useless casts. Signed-off-by: Jiri Slaby Cc: Sabrina Dubroca Cc: Guillaume Nault Cc: David S. Miller Cc: netdev@vger.kernel.org Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- net/l2tp/l2tp_netlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 02d61101b108..af22aa8ae35b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1891,7 +1891,7 @@ static __net_exit void l2tp_exit_net(struct net *net) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - (void)l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f5179424eaf1..f04fb347d251 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -282,7 +282,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_DELETE); - (void) l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); l2tp_tunnel_dec_refcount(tunnel); From eee12df5a0bd5769af5efb72fa95dd1f633a266c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Oct 2017 07:51:06 -0500 Subject: [PATCH 1436/2177] ipv6: esp6: use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG in esp_remove_trailer. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1696401fed6c..4000b71bfdc5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -483,8 +483,8 @@ static inline int esp_remove_trailer(struct sk_buff *skb) goto out; } - if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) - BUG(); + ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2); + BUG_ON(ret); ret = -EINVAL; padlen = nexthdr[0]; From 2ae21cf527da0e5cf9d7ee14bd5b0909bb9d1a75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:56 -0700 Subject: [PATCH 1437/2177] tcp: Namespace-ify sysctl_tcp_early_retrans Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 4 +++- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2c4222a5d102..a7f39e3ea666 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_sack; int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; + int sysctl_tcp_early_retrans; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 285bc82dea41..a12b71d4118b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,6 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_recovery; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 81d218346cf7..f0f650f020af 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -634,15 +634,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_early_retrans", - .data = &sysctl_tcp_early_retrans, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &four, - }, { .procname = "tcp_min_tso_segs", .data = &sysctl_tcp_min_tso_segs, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_early_retrans", + .data = &init_net.ipv4.sysctl_tcp_early_retrans, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &four, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 337f6011528a..7656b1e6d504 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -95,7 +95,6 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23a8100af5ad..7ab313f6768e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2484,6 +2484,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; + net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c8fc512e0bbb..21713836d46a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2435,6 +2435,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, rto_delta_us; + int early_retrans; /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. @@ -2442,10 +2443,11 @@ bool tcp_schedule_loss_probe(struct sock *sk) if (tp->fastopen_rsk) return false; + early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + if ((early_retrans != 3 && early_retrans != 4) || !tp->packets_out || !tcp_is_sack(tp) || icsk->icsk_ca_state != TCP_CA_Open) return false; From e20223f1962831d1b1c416d59d259879d0639d68 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:57 -0700 Subject: [PATCH 1438/2177] tcp: Namespace-ify sysctl_tcp_recovery Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_recovery.c | 2 -- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a7f39e3ea666..d6ed718075d4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -129,6 +129,7 @@ struct netns_ipv4 { int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; + int sysctl_tcp_recovery; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index a12b71d4118b..c7f51534fc44 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,7 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_recovery; + #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ extern int sysctl_tcp_limit_output_bytes; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f0f650f020af..78019adcae87 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -449,13 +449,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_recovery", - .data = &sysctl_tcp_recovery, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_max_reordering", .data = &sysctl_tcp_max_reordering, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &four, }, + { + .procname = "tcp_recovery", + .data = &init_net.ipv4.sysctl_tcp_recovery, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7656b1e6d504..5b2272dbf6a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2788,7 +2788,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) struct tcp_sock *tp = tcp_sk(sk); /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { u32 prior_retrans = tp->retrans_out; tcp_rack_mark_lost(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7ab313f6768e..517ff1948a71 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2485,6 +2485,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_early_retrans = 3; + net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index cda6074a429a..d3603a9e24ea 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -1,8 +1,6 @@ #include #include -int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION; - static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); From 2c04ac8ae0b61e0780a30b7069a11bb202b21f26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:58 -0700 Subject: [PATCH 1439/2177] tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts Note that sysctl_tcp_thin_dupack was not used, I deleted it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_timer.c | 4 +--- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d6ed718075d4..2a9f37b39c45 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -130,6 +130,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; + int sysctl_tcp_thin_linear_timeouts; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index c7f51534fc44..063a7a48b7fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -263,8 +263,6 @@ extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_thin_linear_timeouts; -extern int sysctl_tcp_thin_dupack; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 78019adcae87..12003214f4d8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -620,13 +620,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, - { - .procname = "tcp_thin_linear_timeouts", - .data = &sysctl_tcp_thin_linear_timeouts, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_min_tso_segs", .data = &sysctl_tcp_min_tso_segs, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_thin_linear_timeouts", + .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 804a8d34ce86..035a1ef1f2d8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,8 +22,6 @@ #include #include -int sysctl_tcp_thin_linear_timeouts __read_mostly; - /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. @@ -522,7 +520,7 @@ out_reset_timer: * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; From b510f0d23a47c3d1f074fe583e7867dc4918fe02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:54:59 -0700 Subject: [PATCH 1440/2177] tcp: Namespace-ify sysctl_tcp_slow_start_after_idle Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 5 +---- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2a9f37b39c45..8662692686b3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -131,6 +131,7 @@ struct netns_ipv4 { int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; + int sysctl_tcp_slow_start_after_idle; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 063a7a48b7fe..cc2ab522eb5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -262,7 +262,6 @@ extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; -extern int sysctl_tcp_slow_start_after_idle; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ @@ -1308,7 +1307,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 12003214f4d8..40d69af8b363 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -571,13 +571,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_slow_start_after_idle", - .data = &sysctl_tcp_slow_start_after_idle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_slow_start_after_idle", + .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 517ff1948a71..cea63a4b5965 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2486,6 +2486,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; + net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 21713836d46a..bdc288a06f94 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,9 +62,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; */ int sysctl_tcp_tso_win_divisor __read_mostly = 3; -/* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle __read_mostly = 1; - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -1690,7 +1687,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sysctl_tcp_slow_start_after_idle && + if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); From e0a1e5b519236dc1662ff25e42560dd1be9e3776 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:00 -0700 Subject: [PATCH 1441/2177] tcp: Namespace-ify sysctl_tcp_retrans_collapse Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 5 +---- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8662692686b3..b28c172b10e4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; + int sysctl_tcp_retrans_collapse; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cc2ab522eb5c..33cc86355b8f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40d69af8b363..533b92ad39dd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, } static struct ctl_table ipv4_table[] = { - { - .procname = "tcp_retrans_collapse", - .data = &sysctl_tcp_retrans_collapse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_retrans_collapse", + .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cea63a4b5965..2bc6ba2059d3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2487,7 +2487,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ - + net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdc288a06f94..55a0aa4b96df 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,9 +45,6 @@ #include -/* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse __read_mostly = 1; - /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ @@ -2804,7 +2801,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sysctl_tcp_retrans_collapse) + if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; From 3f4c7c6f6a9053493ce7dd8a0f17ed8eafc53893 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:01 -0700 Subject: [PATCH 1442/2177] tcp: Namespace-ify sysctl_tcp_stdurg Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 3 +-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b28c172b10e4..ffa2cf3dc747 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,7 @@ struct netns_ipv4 { int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; + int sysctl_tcp_stdurg; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 33cc86355b8f..cf3fac7008d7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 533b92ad39dd..a34bb75815c1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_stdurg", - .data = &sysctl_tcp_stdurg, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_rfc1337", .data = &sysctl_tcp_rfc1337, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_stdurg", + .data = &init_net.ipv4.sysctl_tcp_stdurg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5b2272dbf6a9..14b06963c102 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; @@ -5123,7 +5122,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sysctl_tcp_stdurg) + if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) ptr--; ptr += ntohl(th->seq); From 625357aa175c688d219da43c8cfaa2e1629e0e1a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:02 -0700 Subject: [PATCH 1443/2177] tcp: Namespace-ify sysctl_tcp_rfc1337 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ffa2cf3dc747..968edce38eb5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -134,6 +134,7 @@ struct netns_ipv4 { int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; + int sysctl_tcp_rfc1337; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cf3fac7008d7..2aea2b3373b3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a34bb75815c1..832e554235df 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_rfc1337", - .data = &sysctl_tcp_rfc1337, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_rfc1337", + .data = &init_net.ipv4.sysctl_tcp_rfc1337, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 14b06963c102..64fde81b0eb7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 056009f1c14f..11836667763c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -181,7 +181,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (sysctl_tcp_rfc1337 == 0) { + if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; From 65c9410cf55ecf32da1b720f563365d565d6289a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:03 -0700 Subject: [PATCH 1444/2177] tcp: Namespace-ify sysctl_tcp_abort_on_overflow Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_minisocks.c | 4 +--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 968edce38eb5..3875fdf6b186 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; + int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2aea2b3373b3..7331281a2292 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 832e554235df..ffd1fd769bba 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -393,13 +393,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_abort_on_overflow", - .data = &sysctl_tcp_abort_on_overflow, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_abort_on_overflow", + .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 11836667763c..3674d63170b2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -29,8 +29,6 @@ #include #include -int sysctl_tcp_abort_on_overflow __read_mostly; - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -783,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { + if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { inet_rsk(req)->acked = 1; return NULL; } From 0bc65a28ae2aeb14aab7f4a930e0d8cf4cad9dc4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:04 -0700 Subject: [PATCH 1445/2177] tcp: Namespace-ify sysctl_tcp_fack Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_minisocks.c | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3875fdf6b186..f0e792beeea9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -136,6 +136,7 @@ struct netns_ipv4 { int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; + int sysctl_tcp_fack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7331281a2292..e7b15e9f6e28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ffd1fd769bba..1f23be13ce7b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_fack", - .data = &sysctl_tcp_fack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_reordering", .data = &sysctl_tcp_max_reordering, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fack", + .data = &init_net.ipv4.sysctl_tcp_fack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6e1c00e300e..c7c983f0f817 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 64fde81b0eb7..c5b94460793f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; @@ -5720,7 +5719,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sysctl_tcp_fack) + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3674d63170b2..3270ab8416ce 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(newtp); } newtp->window_clamp = req->rsk_window_clamp; From 773d4bb96ceca6829ae9928f6b002b93e2e62cdc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:05 -0700 Subject: [PATCH 1446/2177] tcp: remove stale sysctl_tcp_reordering This extern is no longer used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e7b15e9f6e28..fc134ba74c7d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; From c6e218035913e14952b04ceecf1a543205106fdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:06 -0700 Subject: [PATCH 1447/2177] tcp: Namespace-ify sysctl_tcp_max_reordering Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 2 ++ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0e792beeea9..3f6844665a2f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -137,6 +137,7 @@ struct netns_ipv4 { int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; + int sysctl_tcp_max_reordering; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index fc134ba74c7d..8cd286226a1e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1f23be13ce7b..18cd228a2069 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_max_reordering", - .data = &sysctl_tcp_max_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_dsack", .data = &sysctl_tcp_dsack, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_reordering", + .data = &init_net.ipv4.sysctl_tcp_max_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5b94460793f..c118657f06ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -889,7 +888,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, return; if (metric > tp->reordering) { - tp->reordering = min(sysctl_tcp_max_reordering, metric); + tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2bc6ba2059d3..c379a242abb3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2488,6 +2488,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; + net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; From 6496f6bde0c323fba5e8c5b5cbf3a7bf28dad7ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:07 -0700 Subject: [PATCH 1448/2177] tcp: Namespace-ify sysctl_tcp_dsack Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3f6844665a2f..956957a77db9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -138,6 +138,7 @@ struct netns_ipv4 { int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; int sysctl_tcp_max_reordering; + int sysctl_tcp_dsack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8cd286226a1e..8b2ae3e8d79f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 18cd228a2069..7652a9c2a65d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_dsack", - .data = &sysctl_tcp_dsack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_mem", .maxlen = sizeof(sysctl_tcp_mem), @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_dsack", + .data = &init_net.ipv4.sysctl_tcp_dsack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c118657f06ee..fd77037ac800 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -4150,7 +4149,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4185,7 +4184,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c379a242abb3..d9d4d191e8f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2489,6 +2489,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 0c12654ac6d9004b9538b2a969b2b59e9a5ed831 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:08 -0700 Subject: [PATCH 1449/2177] tcp: Namespace-ify sysctl_tcp_app_win Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 956957a77db9..63f91d52cbc0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -139,6 +139,7 @@ struct netns_ipv4 { int sysctl_tcp_fack; int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; + int sysctl_tcp_app_win; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8b2ae3e8d79f..7aa3d65062a1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7652a9c2a65d..e057788834a9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_app_win", - .data = &sysctl_tcp_app_win, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_adv_win_scale", .data = &sysctl_tcp_adv_win_scale, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_app_win", + .data = &init_net.ipv4.sysctl_tcp_app_win, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fd77037ac800..6af4b58ac6d5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -428,6 +427,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) */ void tcp_init_buffer_space(struct sock *sk) { + int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -446,14 +446,14 @@ void tcp_init_buffer_space(struct sock *sk) if (tp->window_clamp >= maxwin) { tp->window_clamp = maxwin; - if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) + if (tcp_app_win && maxwin > 4 * tp->advmss) tp->window_clamp = max(maxwin - - (maxwin >> sysctl_tcp_app_win), + (maxwin >> tcp_app_win), 4 * tp->advmss); } /* Force reservation of one segment. */ - if (sysctl_tcp_app_win && + if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9d4d191e8f3..189664ebd28e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2490,6 +2490,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; net->ipv4.sysctl_tcp_dsack = 1; + net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 94f0893e0c27219f4a726932618505aab6795973 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:09 -0700 Subject: [PATCH 1450/2177] tcp: Namespace-ify sysctl_tcp_adv_win_scale Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 9 ++++----- net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp_input.c | 13 +++++-------- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 20 insertions(+), 22 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 63f91d52cbc0..9dbb07d4eff4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -140,6 +140,7 @@ struct netns_ipv4 { int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; int sysctl_tcp_app_win; + int sysctl_tcp_adv_win_scale; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7aa3d65062a1..0dc27cd24899 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; @@ -1311,9 +1310,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -static inline int tcp_win_from_space(int space) +static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1323,13 +1322,13 @@ static inline int tcp_win_from_space(int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk, sk->sk_rcvbuf); } extern void tcp_openreq_init_rwin(struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e057788834a9..a95123e1e7da 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,15 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_adv_win_scale", - .data = &sysctl_tcp_adv_win_scale, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &tcp_adv_win_scale_min, - .extra2 = &tcp_adv_win_scale_max, - }, { .procname = "tcp_frto", .data = &sysctl_tcp_frto, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_adv_win_scale", + .data = &init_net.ipv4.sysctl_tcp_adv_win_scale, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6af4b58ac6d5..8ee2c84b0bc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,9 +79,6 @@ #include #include -int sysctl_tcp_adv_win_scale __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); - /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -363,8 +360,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize) >> 1; - int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; + int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -389,7 +386,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #2. Increase window, if skb with such overhead * will fit to rcvbuf in future. */ - if (tcp_win_from_space(skb->truesize) <= skb->len) + if (tcp_win_from_space(sk, skb->truesize) <= skb->len) incr = 2 * tp->advmss; else incr = __tcp_grow_window(sk, skb); @@ -630,7 +627,7 @@ void tcp_rcv_space_adjust(struct sock *sk) } rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < tp->advmss) + while (tcp_win_from_space(sk, rcvmem) < tp->advmss) rcvmem += 128; rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); @@ -4809,7 +4806,7 @@ restart: * overlaps to the next one. */ if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && - (tcp_win_from_space(skb->truesize) > skb->len || + (tcp_win_from_space(sk, skb->truesize) > skb->len || before(TCP_SKB_CB(skb)->seq, start))) { end_of_skbs = false; break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 189664ebd28e..1fe30fb99308 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2491,6 +2491,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_max_reordering = 300; net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_app_win = 31; + net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From af9b69a7a6ca6b817e8d6f416e7aa5b2a5bf1d91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:55:10 -0700 Subject: [PATCH 1451/2177] tcp: Namespace-ify sysctl_tcp_frto Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9dbb07d4eff4..f4622e28db3a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -141,6 +141,7 @@ struct netns_ipv4 { int sysctl_tcp_dsack; int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; + int sysctl_tcp_frto; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0dc27cd24899..18f047501f53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a95123e1e7da..f1bcb9b7e082 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_frto", - .data = &sysctl_tcp_frto, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_min_rtt_wlen", .data = &sysctl_tcp_min_rtt_wlen, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &tcp_adv_win_scale_min, .extra2 = &tcp_adv_win_scale_max, }, + { + .procname = "tcp_frto", + .data = &init_net.ipv4.sysctl_tcp_frto, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ee2c84b0bc6..90d76f1c8f96 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -83,7 +83,6 @@ int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; @@ -2026,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk) * falsely raise the receive window, which results in repeated * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && + tp->frto = net->ipv4.sysctl_tcp_frto && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1fe30fb99308..49757c758211 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2492,6 +2492,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_adv_win_scale = 1; + net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 7c51d17c027e52ef6ccc075e31c2eb44c13e3a32 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:28:29 -0700 Subject: [PATCH 1452/2177] rtlwifi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Larry Finger Cc: Chaoming Li Cc: Ping-Ke Shih Cc: Arvind Yadav Cc: Souptick Joarder Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 20 +++++++++---------- drivers/net/wireless/realtek/rtlwifi/base.h | 4 ++-- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- drivers/net/wireless/realtek/rtlwifi/ps.c | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/dm.c | 6 ++++-- .../wireless/realtek/rtlwifi/rtl8188ee/dm.h | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/hw.c | 7 +++++-- .../wireless/realtek/rtlwifi/rtl8188ee/hw.h | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/sw.c | 12 ++++------- 9 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 0b34886321f1..ea90b460c017 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -457,10 +457,10 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); /* <1> timer */ - setup_timer(&rtlpriv->works.watchdog_timer, - rtl_watch_dog_timer_callback, (unsigned long)hw); - setup_timer(&rtlpriv->works.dualmac_easyconcurrent_retrytimer, - rtl_easy_concurrent_retrytimer_callback, (unsigned long)hw); + timer_setup(&rtlpriv->works.watchdog_timer, + rtl_watch_dog_timer_callback, 0); + timer_setup(&rtlpriv->works.dualmac_easyconcurrent_retrytimer, + rtl_easy_concurrent_retrytimer_callback, 0); /* <2> work queue */ rtlpriv->works.hw = hw; rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); @@ -2057,10 +2057,9 @@ label_lps_done: rtl_scan_list_expire(hw); } -void rtl_watch_dog_timer_callback(unsigned long data) +void rtl_watch_dog_timer_callback(struct timer_list *t) { - struct ieee80211_hw *hw = (struct ieee80211_hw *)data; - struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_priv *rtlpriv = from_timer(rtlpriv, t, works.watchdog_timer); queue_delayed_work(rtlpriv->works.rtl_wq, &rtlpriv->works.watchdog_wq, 0); @@ -2166,10 +2165,11 @@ void rtl_c2hcmd_wq_callback(void *data) rtl_c2hcmd_launcher(hw, 1); } -void rtl_easy_concurrent_retrytimer_callback(unsigned long data) +void rtl_easy_concurrent_retrytimer_callback(struct timer_list *t) { - struct ieee80211_hw *hw = (struct ieee80211_hw *)data; - struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_priv *rtlpriv = + from_timer(rtlpriv, t, works.dualmac_easyconcurrent_retrytimer); + struct ieee80211_hw *hw = rtlpriv->hw; struct rtl_priv *buddy_priv = rtlpriv->buddy_priv; if (buddy_priv == NULL) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h index cfea9fc39a9f..26735319b38f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.h +++ b/drivers/net/wireless/realtek/rtlwifi/base.h @@ -120,7 +120,7 @@ void rtl_init_rx_config(struct ieee80211_hw *hw); void rtl_init_rfkill(struct ieee80211_hw *hw); void rtl_deinit_rfkill(struct ieee80211_hw *hw); -void rtl_watch_dog_timer_callback(unsigned long data); +void rtl_watch_dog_timer_callback(struct timer_list *t); void rtl_deinit_deferred_work(struct ieee80211_hw *hw); bool rtl_action_proc(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx); @@ -173,7 +173,7 @@ int rtl_send_smps_action(struct ieee80211_hw *hw, u8 *rtl_find_ie(u8 *data, unsigned int len, u8 ie); void rtl_recognize_peer(struct ieee80211_hw *hw, u8 *data, unsigned int len); u8 rtl_tid_to_ac(u8 tid); -void rtl_easy_concurrent_retrytimer_callback(unsigned long data); +void rtl_easy_concurrent_retrytimer_callback(struct timer_list *t); extern struct rtl_global_var rtl_global_var; void rtl_phy_scan_operation_backup(struct ieee80211_hw *hw, u8 operation); diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 015a8ec36703..1147327e6f52 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -160,7 +160,7 @@ static int rtl_op_start(struct ieee80211_hw *hw) mutex_lock(&rtlpriv->locks.conf_mutex); err = rtlpriv->intf_ops->adapter_start(hw); if (!err) - rtl_watch_dog_timer_callback((unsigned long)hw); + rtl_watch_dog_timer_callback(&rtlpriv->works.watchdog_timer); mutex_unlock(&rtlpriv->locks.conf_mutex); return err; } diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 07ee3096f50e..24c87fae5382 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -55,7 +55,7 @@ bool rtl_ps_enable_nic(struct ieee80211_hw *hw) rtlpriv->cfg->ops->enable_interrupt(hw); /* */ - rtl_watch_dog_timer_callback((unsigned long)hw); + rtl_watch_dog_timer_callback(&rtlpriv->works.watchdog_timer); return true; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c index 207411d1b015..e05af7d60830 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c @@ -1708,9 +1708,11 @@ static void rtl88e_dm_fast_ant_training(struct ieee80211_hw *hw) } } -void rtl88e_dm_fast_antenna_training_callback(unsigned long data) +void rtl88e_dm_fast_antenna_training_callback(struct timer_list *t) { - struct ieee80211_hw *hw = (struct ieee80211_hw *)data; + struct rtl_priv *rtlpriv = + from_timer(rtlpriv, t, works.fast_antenna_training_timer); + struct ieee80211_hw *hw = rtlpriv->hw; rtl88e_dm_fast_ant_training(hw); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.h index 0fd2bac14db6..50f26a9a97db 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.h @@ -270,7 +270,7 @@ void rtl88e_dm_set_tx_ant_by_tx_info(struct ieee80211_hw *hw, void rtl88e_dm_ant_sel_statistics(struct ieee80211_hw *hw, u8 antsel_tr_mux, u32 mac_id, u32 rx_pwdb_all); -void rtl88e_dm_fast_antenna_training_callback(unsigned long data); +void rtl88e_dm_fast_antenna_training_callback(struct timer_list *t); void rtl88e_dm_init(struct ieee80211_hw *hw); void rtl88e_dm_watchdog(struct ieee80211_hw *hw); void rtl88e_dm_write_dig(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index d31117d52381..2c671364c521 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -253,9 +253,12 @@ static void _rtl88ee_set_fw_ps_rf_off_low_power(struct ieee80211_hw *hw) rpwm_val |= FW_PS_STATE_RF_OFF_LOW_PWR_88E; _rtl88ee_set_fw_clock_off(hw, rpwm_val); } -void rtl88ee_fw_clk_off_timer_callback(unsigned long data) + +void rtl88ee_fw_clk_off_timer_callback(struct timer_list *t) { - struct ieee80211_hw *hw = (struct ieee80211_hw *)data; + struct rtl_priv *rtlpriv = from_timer(rtlpriv, t, + works.fw_clockoff_timer); + struct ieee80211_hw *hw = rtlpriv->hw; _rtl88ee_set_fw_ps_rf_off_low_power(hw); } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h index 719b78a3b7db..ab8488da9409 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h @@ -58,6 +58,6 @@ void rtl8188ee_bt_reg_init(struct ieee80211_hw *hw); void rtl8188ee_bt_hw_init(struct ieee80211_hw *hw); void rtl88ee_suspend(struct ieee80211_hw *hw); void rtl88ee_resume(struct ieee80211_hw *hw); -void rtl88ee_fw_clk_off_timer_callback(unsigned long data); +void rtl88ee_fw_clk_off_timer_callback(struct timer_list *t); #endif diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 35de3aeafcc9..82681b96ef93 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -190,16 +190,12 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) /*low power */ rtlpriv->psc.low_power_enable = false; if (rtlpriv->psc.low_power_enable) { - init_timer(&rtlpriv->works.fw_clockoff_timer); - setup_timer(&rtlpriv->works.fw_clockoff_timer, - rtl88ee_fw_clk_off_timer_callback, - (unsigned long)hw); + timer_setup(&rtlpriv->works.fw_clockoff_timer, + rtl88ee_fw_clk_off_timer_callback, 0); } - init_timer(&rtlpriv->works.fast_antenna_training_timer); - setup_timer(&rtlpriv->works.fast_antenna_training_timer, - rtl88e_dm_fast_antenna_training_callback, - (unsigned long)hw); + timer_setup(&rtlpriv->works.fast_antenna_training_timer, + rtl88e_dm_fast_antenna_training_callback, 0); return err; } From 7e916cafb4d97227713497723a200be41fe1a4b2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:28:37 -0700 Subject: [PATCH 1453/2177] qtnfmac: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Igor Mitsyanko Cc: Avinash Patil Cc: Sergey Matyukevich Cc: Kamlesh Rath Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 7 +++---- drivers/net/wireless/quantenna/qtnfmac/core.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index ebc3792a95bd..63a540d1216e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -538,9 +538,9 @@ qtnf_del_station(struct wiphy *wiphy, struct net_device *dev, return ret; } -static void qtnf_scan_timeout(unsigned long data) +static void qtnf_scan_timeout(struct timer_list *t) { - struct qtnf_wmac *mac = (struct qtnf_wmac *)data; + struct qtnf_wmac *mac = from_timer(mac, t, scan_timeout); pr_warn("mac%d scan timed out\n", mac->macid); qtnf_scan_done(mac, true); @@ -559,8 +559,7 @@ qtnf_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) return -EFAULT; } - mac->scan_timeout.data = (unsigned long)mac; - mac->scan_timeout.function = qtnf_scan_timeout; + mac->scan_timeout.function = (TIMER_FUNC_TYPE)qtnf_scan_timeout; mod_timer(&mac->scan_timeout, jiffies + QTNF_SCAN_TIMEOUT_SEC * HZ); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 5e60180482d1..aa7f146278a7 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -289,7 +289,7 @@ static struct qtnf_wmac *qtnf_core_mac_alloc(struct qtnf_bus *bus, mac->iflist[i].vifid = i; qtnf_sta_list_init(&mac->iflist[i].sta_list); mutex_init(&mac->mac_lock); - init_timer(&mac->scan_timeout); + setup_timer(&mac->scan_timeout, NULL, 0); } qtnf_mac_init_primary_intf(mac); From 2b77839b37345693c565c835179c5f6f50a0ed77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:28:45 -0700 Subject: [PATCH 1454/2177] iwlegacy: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Stanislaw Gruszka Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/3945-rs.c | 10 +++------- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 9 ++++----- drivers/net/wireless/intel/iwlegacy/common.c | 4 ++-- drivers/net/wireless/intel/iwlegacy/common.h | 2 +- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 329f3a63dadd..4b53ebf00c7f 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -3429,7 +3429,7 @@ il3945_setup_deferred_work(struct il_priv *il) il3945_hw_setup_deferred_work(il); - setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); + timer_setup(&il->watchdog, il_bg_watchdog, 0); tasklet_init(&il->irq_tasklet, (void (*)(unsigned long))il3945_irq_tasklet, diff --git a/drivers/net/wireless/intel/iwlegacy/3945-rs.c b/drivers/net/wireless/intel/iwlegacy/3945-rs.c index b2f35dfbc01b..e8983c6a2b7b 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-rs.c @@ -181,9 +181,9 @@ il3945_rate_scale_flush_wins(struct il3945_rs_sta *rs_sta) #define IL_AVERAGE_PACKETS 1500 static void -il3945_bg_rate_scale_flush(unsigned long data) +il3945_bg_rate_scale_flush(struct timer_list *t) { - struct il3945_rs_sta *rs_sta = (void *)data; + struct il3945_rs_sta *rs_sta = from_timer(rs_sta, t, rate_scale_flush); struct il_priv *il __maybe_unused = rs_sta->il; int unflushed = 0; unsigned long flags; @@ -360,9 +360,6 @@ il3945_rs_rate_init(struct il_priv *il, struct ieee80211_sta *sta, u8 sta_id) rs_sta->flush_time = RATE_FLUSH; rs_sta->last_tx_packets = 0; - rs_sta->rate_scale_flush.data = (unsigned long)rs_sta; - rs_sta->rate_scale_flush.function = il3945_bg_rate_scale_flush; - for (i = 0; i < RATE_COUNT_3945; i++) il3945_clear_win(&rs_sta->win[i]); @@ -415,8 +412,7 @@ il3945_rs_alloc_sta(void *il_priv, struct ieee80211_sta *sta, gfp_t gfp) rs_sta = &psta->rs_sta; spin_lock_init(&rs_sta->lock); - init_timer(&rs_sta->rate_scale_flush); - + timer_setup(&rs_sta->rate_scale_flush, il3945_bg_rate_scale_flush, 0); D_RATE("leave\n"); return rs_sta; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 65eba2c24292..de63f2518f23 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4074,9 +4074,9 @@ il4965_hdl_alive(struct il_priv *il, struct il_rx_buf *rxb) * used for calibrating the TXPOWER. */ static void -il4965_bg_stats_periodic(unsigned long data) +il4965_bg_stats_periodic(struct timer_list *t) { - struct il_priv *il = (struct il_priv *)data; + struct il_priv *il = from_timer(il, t, stats_periodic); if (test_bit(S_EXIT_PENDING, &il->status)) return; @@ -6258,10 +6258,9 @@ il4965_setup_deferred_work(struct il_priv *il) INIT_WORK(&il->txpower_work, il4965_bg_txpower_work); - setup_timer(&il->stats_periodic, il4965_bg_stats_periodic, - (unsigned long)il); + timer_setup(&il->stats_periodic, il4965_bg_stats_periodic, 0); - setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); + timer_setup(&il->watchdog, il_bg_watchdog, 0); tasklet_init(&il->irq_tasklet, (void (*)(unsigned long))il4965_irq_tasklet, diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 8d5acda92a9b..558bb16bfd46 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -4844,9 +4844,9 @@ il_check_stuck_queue(struct il_priv *il, int cnt) * we reset the firmware. If everything is fine just rearm the timer. */ void -il_bg_watchdog(unsigned long data) +il_bg_watchdog(struct timer_list *t) { - struct il_priv *il = (struct il_priv *)data; + struct il_priv *il = from_timer(il, t, watchdog); int cnt; unsigned long timeout; diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index 18c60c92e3a3..dc6a74a05983 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -1832,7 +1832,7 @@ int il_enqueue_hcmd(struct il_priv *il, struct il_host_cmd *cmd); * PCI * *****************************************************/ -void il_bg_watchdog(unsigned long data); +void il_bg_watchdog(struct timer_list *t); u32 il_usecs_to_beacons(struct il_priv *il, u32 usec, u32 beacon_interval); __le32 il_add_beacon_time(struct il_priv *il, u32 base, u32 addon, u32 beacon_interval); From 3e79202b11526f6662d73501877cc18cf95b2ff8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:28:52 -0700 Subject: [PATCH 1455/2177] atmel: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Simon Kelley Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/atmel/atmel.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index e816d53c2c05..c9dd5e44c9c6 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -586,7 +586,7 @@ static int atmel_validate_channel(struct atmel_private *priv, int channel); static void atmel_management_frame(struct atmel_private *priv, struct ieee80211_hdr *header, u16 frame_len, u8 rssi); -static void atmel_management_timer(u_long a); +static void atmel_management_timer(struct timer_list *t); static void atmel_send_command(struct atmel_private *priv, int command, void *cmd, int cmd_size); static int atmel_send_command_wait(struct atmel_private *priv, int command, @@ -1579,8 +1579,7 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port, priv->default_beacon_period = priv->beacon_period = 100; priv->listen_interval = 1; - setup_timer(&priv->management_timer, atmel_management_timer, - (unsigned long)dev); + timer_setup(&priv->management_timer, atmel_management_timer, 0); spin_lock_init(&priv->irqlock); spin_lock_init(&priv->timerlock); @@ -3434,10 +3433,9 @@ static void atmel_management_frame(struct atmel_private *priv, } /* run when timer expires */ -static void atmel_management_timer(u_long a) +static void atmel_management_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) a; - struct atmel_private *priv = netdev_priv(dev); + struct atmel_private *priv = from_timer(priv, t, management_timer); unsigned long flags; /* Check if the card has been yanked. */ From e3dcf8bbeb0c58df475cad58e2dc267a5c02ae20 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:00 -0700 Subject: [PATCH 1456/2177] cw1200: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Solomon Peachy Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/st/cw1200/main.c | 3 +-- drivers/net/wireless/st/cw1200/queue.c | 6 +++--- drivers/net/wireless/st/cw1200/sta.c | 5 ++--- drivers/net/wireless/st/cw1200/sta.h | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c index dc478cedbde0..a186d1df1f29 100644 --- a/drivers/net/wireless/st/cw1200/main.c +++ b/drivers/net/wireless/st/cw1200/main.c @@ -373,8 +373,7 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr, INIT_WORK(&priv->update_filtering_work, cw1200_update_filtering_work); INIT_WORK(&priv->set_beacon_wakeup_period_work, cw1200_set_beacon_wakeup_period_work); - setup_timer(&priv->mcast_timeout, cw1200_mcast_timeout, - (unsigned long)priv); + timer_setup(&priv->mcast_timeout, cw1200_mcast_timeout, 0); if (cw1200_queue_stats_init(&priv->tx_queue_stats, CW1200_LINK_ID_MAX, diff --git a/drivers/net/wireless/st/cw1200/queue.c b/drivers/net/wireless/st/cw1200/queue.c index 0ba5ef9b3e7b..5153d2cfd991 100644 --- a/drivers/net/wireless/st/cw1200/queue.c +++ b/drivers/net/wireless/st/cw1200/queue.c @@ -130,11 +130,11 @@ static void __cw1200_queue_gc(struct cw1200_queue *queue, } } -static void cw1200_queue_gc(unsigned long arg) +static void cw1200_queue_gc(struct timer_list *t) { LIST_HEAD(list); struct cw1200_queue *queue = - (struct cw1200_queue *)arg; + from_timer(queue, t, gc); spin_lock_bh(&queue->lock); __cw1200_queue_gc(queue, &list, true); @@ -179,7 +179,7 @@ int cw1200_queue_init(struct cw1200_queue *queue, INIT_LIST_HEAD(&queue->pending); INIT_LIST_HEAD(&queue->free_pool); spin_lock_init(&queue->lock); - setup_timer(&queue->gc, cw1200_queue_gc, (unsigned long)queue); + timer_setup(&queue->gc, cw1200_queue_gc, 0); queue->pool = kzalloc(sizeof(struct cw1200_queue_item) * capacity, GFP_KERNEL); diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index a52224836a2b..03687a80d6e9 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -2112,10 +2112,9 @@ void cw1200_multicast_stop_work(struct work_struct *work) } } -void cw1200_mcast_timeout(unsigned long arg) +void cw1200_mcast_timeout(struct timer_list *t) { - struct cw1200_common *priv = - (struct cw1200_common *)arg; + struct cw1200_common *priv = from_timer(priv, t, mcast_timeout); wiphy_warn(priv->hw->wiphy, "Multicast delivery timeout.\n"); diff --git a/drivers/net/wireless/st/cw1200/sta.h b/drivers/net/wireless/st/cw1200/sta.h index a0bacaa39b31..719de34dcbfe 100644 --- a/drivers/net/wireless/st/cw1200/sta.h +++ b/drivers/net/wireless/st/cw1200/sta.h @@ -117,6 +117,6 @@ void cw1200_set_tim_work(struct work_struct *work); void cw1200_set_cts_work(struct work_struct *work); void cw1200_multicast_start_work(struct work_struct *work); void cw1200_multicast_stop_work(struct work_struct *work); -void cw1200_mcast_timeout(unsigned long arg); +void cw1200_mcast_timeout(struct timer_list *t); #endif From dfefb9f8d082b609197f7ea852333e7a5ecc9563 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:09 -0700 Subject: [PATCH 1457/2177] drivers/wireless: rsi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Amitkumar Karwar Cc: Prameela Rani Garnepudi Cc: Pavani Muthyala Cc: Karun Eagalapati Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_hal.c | 7 +++---- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 4 ++-- drivers/net/wireless/rsi/rsi_91x_main.c | 4 +--- drivers/net/wireless/rsi/rsi_common.h | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 689527d7007a..1176de646942 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -418,9 +418,9 @@ int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb) return 0; } -static void bl_cmd_timeout(unsigned long priv) +static void bl_cmd_timeout(struct timer_list *t) { - struct rsi_hw *adapter = (struct rsi_hw *)priv; + struct rsi_hw *adapter = from_timer(adapter, t, bl_cmd_timer); adapter->blcmd_timer_expired = true; del_timer(&adapter->bl_cmd_timer); @@ -428,8 +428,7 @@ static void bl_cmd_timeout(unsigned long priv) static int bl_start_cmd_timer(struct rsi_hw *adapter, u32 timeout) { - setup_timer(&adapter->bl_cmd_timer, (void *)&bl_cmd_timeout, - (unsigned long)adapter); + timer_setup(&adapter->bl_cmd_timer, bl_cmd_timeout, 0); adapter->bl_cmd_timer.expires = (msecs_to_jiffies(timeout) + jiffies); adapter->blcmd_timer_expired = false; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index b1f5dbbde3cb..9427c9d7c9c7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1663,9 +1663,9 @@ static void rsi_resume_conn_channel(struct rsi_common *common) } } -void rsi_roc_timeout(unsigned long data) +void rsi_roc_timeout(struct timer_list *t) { - struct rsi_common *common = (struct rsi_common *)data; + struct rsi_common *common = from_timer(common, t, roc_timer); rsi_dbg(INFO_ZONE, "Remain on channel expired\n"); diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 71b8cfb8252e..2a1fbb7db6c4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -262,9 +262,7 @@ struct rsi_hw *rsi_91x_init(void) rsi_default_ps_params(adapter); spin_lock_init(&adapter->ps_lock); - common->roc_timer.data = (unsigned long)common; - common->roc_timer.function = (void *)&rsi_roc_timeout; - init_timer(&common->roc_timer); + timer_setup(&common->roc_timer, rsi_roc_timeout, 0); common->init_done = true; return adapter; diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 272e18d750ff..29acaea3636d 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -85,5 +85,5 @@ void rsi_91x_deinit(struct rsi_hw *adapter); int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len); struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr); struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac); -void rsi_roc_timeout(unsigned long data); +void rsi_roc_timeout(struct timer_list *t); #endif From 08c2eb8ec800b1b5447728af0ffeebf6346bdcf2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:19 -0700 Subject: [PATCH 1458/2177] mwifiex: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Cc: Ganapathi Bhat Cc: Xinming Hu Cc: Arvind Yadav Cc: Ingo Molnar Cc: Johannes Berg Cc: "David S. Miller" Cc: Andrew Zaborowski Cc: libertas-dev@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 7 +++---- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 5 ++--- drivers/net/wireless/marvell/mwifiex/init.c | 7 +++---- drivers/net/wireless/marvell/mwifiex/main.c | 3 +-- drivers/net/wireless/marvell/mwifiex/main.h | 4 ++-- drivers/net/wireless/marvell/mwifiex/tdls.c | 7 +++---- drivers/net/wireless/marvell/mwifiex/usb.c | 9 ++++----- 7 files changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index d87df2dfcfa4..1edcddaf7b4b 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -312,10 +312,10 @@ mwifiex_11n_find_last_seq_num(struct reorder_tmr_cnxt *ctx) * them and then dumps the Rx reordering table. */ static void -mwifiex_flush_data(unsigned long context) +mwifiex_flush_data(struct timer_list *t) { struct reorder_tmr_cnxt *ctx = - (struct reorder_tmr_cnxt *) context; + from_timer(ctx, t, timer); int start_win, seq_num; ctx->timer_is_set = false; @@ -412,8 +412,7 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta, new_node->timer_context.priv = priv; new_node->timer_context.timer_is_set = false; - setup_timer(&new_node->timer_context.timer, mwifiex_flush_data, - (unsigned long)&new_node->timer_context); + timer_setup(&new_node->timer_context.timer, mwifiex_flush_data, 0); for (i = 0; i < win_size; ++i) new_node->rx_reorder_ptr[i] = NULL; diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index a9a1a736e6e8..dcc529e9c0ef 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -920,10 +920,9 @@ int mwifiex_process_cmdresp(struct mwifiex_adapter *adapter) * It will re-send the same command again. */ void -mwifiex_cmd_timeout_func(unsigned long function_context) +mwifiex_cmd_timeout_func(struct timer_list *t) { - struct mwifiex_adapter *adapter = - (struct mwifiex_adapter *) function_context; + struct mwifiex_adapter *adapter = from_timer(adapter, t, cmd_timer); struct cmd_ctrl_node *cmd_node; adapter->is_cmd_timedout = 1; diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 1176706ab094..e1aa86042469 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -52,9 +52,9 @@ static int mwifiex_add_bss_prio_tbl(struct mwifiex_private *priv) return 0; } -static void wakeup_timer_fn(unsigned long data) +static void wakeup_timer_fn(struct timer_list *t) { - struct mwifiex_adapter *adapter = (struct mwifiex_adapter *)data; + struct mwifiex_adapter *adapter = from_timer(adapter, t, wakeup_timer); mwifiex_dbg(adapter, ERROR, "Firmware wakeup failed\n"); adapter->hw_status = MWIFIEX_HW_STATUS_RESET; @@ -313,8 +313,7 @@ static void mwifiex_init_adapter(struct mwifiex_adapter *adapter) adapter->iface_limit.uap_intf = MWIFIEX_MAX_UAP_NUM; adapter->iface_limit.p2p_intf = MWIFIEX_MAX_P2P_NUM; adapter->active_scan_triggered = false; - setup_timer(&adapter->wakeup_timer, wakeup_timer_fn, - (unsigned long)adapter); + timer_setup(&adapter->wakeup_timer, wakeup_timer_fn, 0); } /* diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index ee40b739b289..a96bd7e653bf 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -100,8 +100,7 @@ static int mwifiex_register(void *card, struct device *dev, } mwifiex_init_lock_list(adapter); - setup_timer(&adapter->cmd_timer, mwifiex_cmd_timeout_func, - (unsigned long)adapter); + timer_setup(&adapter->cmd_timer, mwifiex_cmd_timeout_func, 0); return 0; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index a34de8582e91..154c0796c0c5 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -1072,7 +1072,7 @@ int mwifiex_complete_cmd(struct mwifiex_adapter *adapter, int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no, u16 cmd_action, u32 cmd_oid, void *data_buf, bool sync); -void mwifiex_cmd_timeout_func(unsigned long function_context); +void mwifiex_cmd_timeout_func(struct timer_list *t); int mwifiex_get_debug_info(struct mwifiex_private *, struct mwifiex_debug_info *); @@ -1617,7 +1617,7 @@ void mwifiex_auto_tdls_update_peer_status(struct mwifiex_private *priv, const u8 *mac, u8 link_status); void mwifiex_auto_tdls_update_peer_signal(struct mwifiex_private *priv, u8 *mac, s8 snr, s8 nflr); -void mwifiex_check_auto_tdls(unsigned long context); +void mwifiex_check_auto_tdls(struct timer_list *t); void mwifiex_add_auto_tdls_peer(struct mwifiex_private *priv, const u8 *mac); void mwifiex_setup_auto_tdls_timer(struct mwifiex_private *priv); void mwifiex_clean_auto_tdls(struct mwifiex_private *priv); diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index 9fe0bae957b7..27779d7317fd 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -1389,9 +1389,9 @@ void mwifiex_auto_tdls_update_peer_signal(struct mwifiex_private *priv, spin_unlock_irqrestore(&priv->auto_tdls_lock, flags); } -void mwifiex_check_auto_tdls(unsigned long context) +void mwifiex_check_auto_tdls(struct timer_list *t) { - struct mwifiex_private *priv = (struct mwifiex_private *)context; + struct mwifiex_private *priv = from_timer(priv, t, auto_tdls_timer); struct mwifiex_auto_tdls_peer *tdls_peer; unsigned long flags; u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED; @@ -1456,8 +1456,7 @@ void mwifiex_check_auto_tdls(unsigned long context) void mwifiex_setup_auto_tdls_timer(struct mwifiex_private *priv) { - setup_timer(&priv->auto_tdls_timer, mwifiex_check_auto_tdls, - (unsigned long)priv); + timer_setup(&priv->auto_tdls_timer, mwifiex_check_auto_tdls, 0); priv->auto_tdls_timer_active = true; mod_timer(&priv->auto_tdls_timer, jiffies + msecs_to_jiffies(MWIFIEX_TIMER_10S)); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index f4f2b9b27e32..4bc244801636 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -1096,12 +1096,12 @@ postcopy_cur_buf: return -EINPROGRESS; } -static void mwifiex_usb_tx_aggr_tmo(unsigned long context) +static void mwifiex_usb_tx_aggr_tmo(struct timer_list *t) { struct urb_context *urb_cnxt = NULL; struct sk_buff *skb_send = NULL; struct tx_aggr_tmr_cnxt *timer_context = - (struct tx_aggr_tmr_cnxt *)context; + from_timer(timer_context, t, hold_timer); struct mwifiex_adapter *adapter = timer_context->adapter; struct usb_tx_data_port *port = timer_context->port; unsigned long flags; @@ -1236,9 +1236,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) port->tx_aggr.timer_cnxt.port = port; port->tx_aggr.timer_cnxt.is_hold_timer_set = false; port->tx_aggr.timer_cnxt.hold_tmo_msecs = 0; - setup_timer(&port->tx_aggr.timer_cnxt.hold_timer, - mwifiex_usb_tx_aggr_tmo, - (unsigned long)&port->tx_aggr.timer_cnxt); + timer_setup(&port->tx_aggr.timer_cnxt.hold_timer, + mwifiex_usb_tx_aggr_tmo, 0); } return 0; From 78ce6a9083c4f5ac1a73aa0b614c852db2d4e982 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:28 -0700 Subject: [PATCH 1459/2177] libertas: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: Arvind Yadav Cc: Ingo Molnar Cc: Johannes Berg Cc: "David S. Miller" Cc: Andrew Zaborowski Cc: libertas-dev@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- .../net/wireless/marvell/libertas/if_usb.c | 6 +++--- drivers/net/wireless/marvell/libertas/main.c | 21 ++++++++----------- .../net/wireless/marvell/libertas_tf/if_usb.c | 6 +++--- .../net/wireless/marvell/libertas_tf/main.c | 7 +++---- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 16e54c757dd0..ffea610f67e2 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -161,9 +161,9 @@ static void if_usb_setup_firmware(struct lbs_private *priv) } } -static void if_usb_fw_timeo(unsigned long priv) +static void if_usb_fw_timeo(struct timer_list *t) { - struct if_usb_card *cardp = (void *)priv; + struct if_usb_card *cardp = from_timer(cardp, t, fw_timeout); if (cardp->fwdnldover) { lbs_deb_usb("Download complete, no event. Assuming success\n"); @@ -205,7 +205,7 @@ static int if_usb_probe(struct usb_interface *intf, if (!cardp) goto error; - setup_timer(&cardp->fw_timeout, if_usb_fw_timeo, (unsigned long)cardp); + timer_setup(&cardp->fw_timeout, if_usb_fw_timeo, 0); init_waitqueue_head(&cardp->fw_wq); cardp->udev = udev; diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index aefa88f4f29c..f22e1c220cba 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -722,9 +722,9 @@ EXPORT_SYMBOL_GPL(lbs_resume); * * @data: &struct lbs_private pointer */ -static void lbs_cmd_timeout_handler(unsigned long data) +static void lbs_cmd_timeout_handler(struct timer_list *t) { - struct lbs_private *priv = (struct lbs_private *)data; + struct lbs_private *priv = from_timer(priv, t, command_timer); unsigned long flags; spin_lock_irqsave(&priv->driver_lock, flags); @@ -756,9 +756,9 @@ out: * * @data: &struct lbs_private pointer */ -static void lbs_tx_lockup_handler(unsigned long data) +static void lbs_tx_lockup_handler(struct timer_list *t) { - struct lbs_private *priv = (struct lbs_private *)data; + struct lbs_private *priv = from_timer(priv, t, tx_lockup_timer); unsigned long flags; spin_lock_irqsave(&priv->driver_lock, flags); @@ -779,9 +779,9 @@ static void lbs_tx_lockup_handler(unsigned long data) * @data: &struct lbs_private pointer * returns: N/A */ -static void auto_deepsleep_timer_fn(unsigned long data) +static void auto_deepsleep_timer_fn(struct timer_list *t) { - struct lbs_private *priv = (struct lbs_private *)data; + struct lbs_private *priv = from_timer(priv, t, auto_deepsleep_timer); if (priv->is_activity_detected) { priv->is_activity_detected = 0; @@ -847,12 +847,9 @@ static int lbs_init_adapter(struct lbs_private *priv) init_waitqueue_head(&priv->fw_waitq); mutex_init(&priv->lock); - setup_timer(&priv->command_timer, lbs_cmd_timeout_handler, - (unsigned long)priv); - setup_timer(&priv->tx_lockup_timer, lbs_tx_lockup_handler, - (unsigned long)priv); - setup_timer(&priv->auto_deepsleep_timer, auto_deepsleep_timer_fn, - (unsigned long)priv); + timer_setup(&priv->command_timer, lbs_cmd_timeout_handler, 0); + timer_setup(&priv->tx_lockup_timer, lbs_tx_lockup_handler, 0); + timer_setup(&priv->auto_deepsleep_timer, auto_deepsleep_timer_fn, 0); INIT_LIST_HEAD(&priv->cmdfreeq); INIT_LIST_HEAD(&priv->cmdpendingq); diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index e9104eca327b..5153922e7ce1 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -115,9 +115,9 @@ static void if_usb_setup_firmware(struct lbtf_private *priv) lbtf_deb_leave(LBTF_DEB_USB); } -static void if_usb_fw_timeo(unsigned long priv) +static void if_usb_fw_timeo(struct timer_list *t) { - struct if_usb_card *cardp = (void *)priv; + struct if_usb_card *cardp = from_timer(cardp, t, fw_timeout); lbtf_deb_enter(LBTF_DEB_USB); if (!cardp->fwdnldover) { @@ -156,7 +156,7 @@ static int if_usb_probe(struct usb_interface *intf, if (!cardp) goto error; - setup_timer(&cardp->fw_timeout, if_usb_fw_timeo, (unsigned long)cardp); + timer_setup(&cardp->fw_timeout, if_usb_fw_timeo, 0); init_waitqueue_head(&cardp->fw_wq); cardp->udev = udev; diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index 81228bf73043..1d45da187b9b 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -165,9 +165,9 @@ done: * This function handles the timeout of command sending. * It will re-send the same command again. */ -static void command_timer_fn(unsigned long data) +static void command_timer_fn(struct timer_list *t) { - struct lbtf_private *priv = (struct lbtf_private *)data; + struct lbtf_private *priv = from_timer(priv, t, command_timer); unsigned long flags; lbtf_deb_enter(LBTF_DEB_CMD); @@ -196,8 +196,7 @@ static int lbtf_init_adapter(struct lbtf_private *priv) mutex_init(&priv->lock); priv->vif = NULL; - setup_timer(&priv->command_timer, command_timer_fn, - (unsigned long)priv); + timer_setup(&priv->command_timer, command_timer_fn, 0); INIT_LIST_HEAD(&priv->cmdfreeq); INIT_LIST_HEAD(&priv->cmdpendingq); From 0425f079590c1e10561642c265523e4ccec2d9c7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:13 +0200 Subject: [PATCH 1460/2177] brcmsmac: split up wlc_phy_workarounds_nphy The stack consumption in this driver is still relatively high, with one remaining warning if the warning level is lowered to 1536 bytes: drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:17135:1: error: the frame size of 1880 bytes is larger than 1536 bytes [-Werror=frame-larger-than=] The affected function is actually a collection of three separate implementations, and each of them is fairly large by itself. Splitting them up is done easily and improves readability at the same time. I'm leaving the original indentation to make the review easier. Acked-by: Arend van Spriel Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/phy/phy_n.c | 178 ++++++++++-------- 1 file changed, 104 insertions(+), 74 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index ef685465f80a..ed409a80f3d2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -16061,52 +16061,8 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) } } -static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) +static void wlc_phy_workarounds_nphy_rev7(struct brcms_phy *pi) { - static const u8 rfseq_rx2tx_events[] = { - NPHY_RFSEQ_CMD_NOP, - NPHY_RFSEQ_CMD_RXG_FBW, - NPHY_RFSEQ_CMD_TR_SWITCH, - NPHY_RFSEQ_CMD_CLR_HIQ_DIS, - NPHY_RFSEQ_CMD_RXPD_TXPD, - NPHY_RFSEQ_CMD_TX_GAIN, - NPHY_RFSEQ_CMD_EXT_PA - }; - u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 }; - static const u8 rfseq_tx2rx_events[] = { - NPHY_RFSEQ_CMD_NOP, - NPHY_RFSEQ_CMD_EXT_PA, - NPHY_RFSEQ_CMD_TX_GAIN, - NPHY_RFSEQ_CMD_RXPD_TXPD, - NPHY_RFSEQ_CMD_TR_SWITCH, - NPHY_RFSEQ_CMD_RXG_FBW, - NPHY_RFSEQ_CMD_CLR_HIQ_DIS - }; - static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; - static const u8 rfseq_tx2rx_events_rev3[] = { - NPHY_REV3_RFSEQ_CMD_EXT_PA, - NPHY_REV3_RFSEQ_CMD_INT_PA_PU, - NPHY_REV3_RFSEQ_CMD_TX_GAIN, - NPHY_REV3_RFSEQ_CMD_RXPD_TXPD, - NPHY_REV3_RFSEQ_CMD_TR_SWITCH, - NPHY_REV3_RFSEQ_CMD_RXG_FBW, - NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, - NPHY_REV3_RFSEQ_CMD_END - }; - static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; - u8 rfseq_rx2tx_events_rev3[] = { - NPHY_REV3_RFSEQ_CMD_NOP, - NPHY_REV3_RFSEQ_CMD_RXG_FBW, - NPHY_REV3_RFSEQ_CMD_TR_SWITCH, - NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, - NPHY_REV3_RFSEQ_CMD_RXPD_TXPD, - NPHY_REV3_RFSEQ_CMD_TX_GAIN, - NPHY_REV3_RFSEQ_CMD_INT_PA_PU, - NPHY_REV3_RFSEQ_CMD_EXT_PA, - NPHY_REV3_RFSEQ_CMD_END - }; - u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 }; - static const u8 rfseq_rx2tx_events_rev3_ipa[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, @@ -16120,29 +16076,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) }; static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; - - s16 alpha0, alpha1, alpha2; - s16 beta0, beta1, beta2; - u32 leg_data_weights, ht_data_weights, nss1_data_weights, - stbc_data_weights; + u32 leg_data_weights; u8 chan_freq_range = 0; static const u16 dac_control = 0x0002; u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 }; u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 }; - u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 }; - u16 aux_adc_vmid_rev3[] = { 0xa2, 0xb4, 0xb4, 0x89 }; - u16 *aux_adc_vmid; u16 aux_adc_gain_rev7[] = { 0x02, 0x02, 0x02, 0x02 }; - u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 }; - u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 }; - u16 *aux_adc_gain; - static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; - static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; s32 min_nvar_val = 0x18d; s32 min_nvar_offset_6mbps = 20; u8 pdetrange; - u8 triso; - u16 regval; u16 afectrl_adc_ctrl1_rev7 = 0x20; u16 afectrl_adc_ctrl2_rev7 = 0x0; u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77; @@ -16171,17 +16113,6 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 freq; int coreNum; - if (CHSPEC_IS5G(pi->radio_chanspec)) - wlc_phy_classifier_nphy(pi, NPHY_ClassifierCtrl_cck_en, 0); - else - wlc_phy_classifier_nphy(pi, NPHY_ClassifierCtrl_cck_en, 1); - - if (pi->phyhang_avoid) - wlc_phy_stay_in_carriersearch_nphy(pi, true); - - or_phy_reg(pi, 0xb1, NPHY_IQFlip_ADC1 | NPHY_IQFlip_ADC2); - - if (NREV_GE(pi->pubpi.phy_rev, 7)) { if (NREV_IS(pi->pubpi.phy_rev, 7)) { mod_phy_reg(pi, 0x221, (0x1 << 4), (1 << 4)); @@ -16703,8 +16634,62 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) &aux_adc_gain_rev7); wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x1c, 16, &aux_adc_gain_rev7); +} - } else if (NREV_GE(pi->pubpi.phy_rev, 3)) { +static void wlc_phy_workarounds_nphy_rev3(struct brcms_phy *pi) +{ + static const u8 rfseq_tx2rx_events_rev3[] = { + NPHY_REV3_RFSEQ_CMD_EXT_PA, + NPHY_REV3_RFSEQ_CMD_INT_PA_PU, + NPHY_REV3_RFSEQ_CMD_TX_GAIN, + NPHY_REV3_RFSEQ_CMD_RXPD_TXPD, + NPHY_REV3_RFSEQ_CMD_TR_SWITCH, + NPHY_REV3_RFSEQ_CMD_RXG_FBW, + NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, + NPHY_REV3_RFSEQ_CMD_END + }; + static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; + u8 rfseq_rx2tx_events_rev3[] = { + NPHY_REV3_RFSEQ_CMD_NOP, + NPHY_REV3_RFSEQ_CMD_RXG_FBW, + NPHY_REV3_RFSEQ_CMD_TR_SWITCH, + NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, + NPHY_REV3_RFSEQ_CMD_RXPD_TXPD, + NPHY_REV3_RFSEQ_CMD_TX_GAIN, + NPHY_REV3_RFSEQ_CMD_INT_PA_PU, + NPHY_REV3_RFSEQ_CMD_EXT_PA, + NPHY_REV3_RFSEQ_CMD_END + }; + u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 }; + static const u8 rfseq_rx2tx_events_rev3_ipa[] = { + NPHY_REV3_RFSEQ_CMD_NOP, + NPHY_REV3_RFSEQ_CMD_RXG_FBW, + NPHY_REV3_RFSEQ_CMD_TR_SWITCH, + NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, + NPHY_REV3_RFSEQ_CMD_RXPD_TXPD, + NPHY_REV3_RFSEQ_CMD_TX_GAIN, + NPHY_REV3_RFSEQ_CMD_CLR_RXRX_BIAS, + NPHY_REV3_RFSEQ_CMD_INT_PA_PU, + NPHY_REV3_RFSEQ_CMD_END + }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + s16 alpha0, alpha1, alpha2; + s16 beta0, beta1, beta2; + u32 leg_data_weights, ht_data_weights, nss1_data_weights, + stbc_data_weights; + u8 chan_freq_range = 0; + static const u16 dac_control = 0x0002; + u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 }; + u16 aux_adc_vmid_rev3[] = { 0xa2, 0xb4, 0xb4, 0x89 }; + u16 *aux_adc_vmid; + u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 }; + u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 }; + u16 *aux_adc_gain; + static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; + static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; + s32 min_nvar_val = 0x18d; + u8 pdetrange; + u8 triso; write_phy_reg(pi, 0x23f, 0x1f8); write_phy_reg(pi, 0x240, 0x1f8); @@ -17030,7 +17015,33 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) MHF4_BPHY_TXCORE0, MHF4_BPHY_TXCORE0, BRCM_BAND_ALL); } - } else { +} + +void wlc_phy_workarounds_nphy_rev1(struct brcms_phy *pi) +{ + static const u8 rfseq_rx2tx_events[] = { + NPHY_RFSEQ_CMD_NOP, + NPHY_RFSEQ_CMD_RXG_FBW, + NPHY_RFSEQ_CMD_TR_SWITCH, + NPHY_RFSEQ_CMD_CLR_HIQ_DIS, + NPHY_RFSEQ_CMD_RXPD_TXPD, + NPHY_RFSEQ_CMD_TX_GAIN, + NPHY_RFSEQ_CMD_EXT_PA + }; + u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 }; + static const u8 rfseq_tx2rx_events[] = { + NPHY_RFSEQ_CMD_NOP, + NPHY_RFSEQ_CMD_EXT_PA, + NPHY_RFSEQ_CMD_TX_GAIN, + NPHY_RFSEQ_CMD_RXPD_TXPD, + NPHY_RFSEQ_CMD_TR_SWITCH, + NPHY_RFSEQ_CMD_RXG_FBW, + NPHY_RFSEQ_CMD_CLR_HIQ_DIS + }; + static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; + s16 alpha0, alpha1, alpha2; + s16 beta0, beta1, beta2; + u16 regval; if (pi->sh->boardflags2 & BFL2_SKWRKFEM_BRD || (pi->sh->boardtype == 0x8b)) { @@ -17128,7 +17139,26 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) mod_phy_reg(pi, 0x221, NPHY_FORCESIG_DECODEGATEDCLKS, NPHY_FORCESIG_DECODEGATEDCLKS); - } +} + +static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) +{ + if (CHSPEC_IS5G(pi->radio_chanspec)) + wlc_phy_classifier_nphy(pi, NPHY_ClassifierCtrl_cck_en, 0); + else + wlc_phy_classifier_nphy(pi, NPHY_ClassifierCtrl_cck_en, 1); + + if (pi->phyhang_avoid) + wlc_phy_stay_in_carriersearch_nphy(pi, true); + + or_phy_reg(pi, 0xb1, NPHY_IQFlip_ADC1 | NPHY_IQFlip_ADC2); + + if (NREV_GE(pi->pubpi.phy_rev, 7)) + wlc_phy_workarounds_nphy_rev7(pi); + else if (NREV_GE(pi->pubpi.phy_rev, 3)) + wlc_phy_workarounds_nphy_rev3(pi); + else + wlc_phy_workarounds_nphy_rev1(pi); if (pi->phyhang_avoid) wlc_phy_stay_in_carriersearch_nphy(pi, false); From ad1987d673920dd136006b56c9f3e4ab829356fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:14 +0200 Subject: [PATCH 1461/2177] brcmsmac: reindent split functions In the previous commit I left the indentation alone to help reviewing the patch, this one now runs the three new functions through 'indent -kr -8' with some manual fixups to avoid silliness. No changes other than whitespace are intended here. Signed-off-by: Arnd Bergmann Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/phy/phy_n.c | 1567 ++++++++--------- 1 file changed, 727 insertions(+), 840 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index ed409a80f3d2..763e8ba6b178 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -16074,7 +16074,8 @@ static void wlc_phy_workarounds_nphy_rev7(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_END }; - static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = + { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; u32 leg_data_weights; u8 chan_freq_range = 0; @@ -16114,526 +16115,452 @@ static void wlc_phy_workarounds_nphy_rev7(struct brcms_phy *pi) int coreNum; - if (NREV_IS(pi->pubpi.phy_rev, 7)) { - mod_phy_reg(pi, 0x221, (0x1 << 4), (1 << 4)); + if (NREV_IS(pi->pubpi.phy_rev, 7)) { + mod_phy_reg(pi, 0x221, (0x1 << 4), (1 << 4)); - mod_phy_reg(pi, 0x160, (0x7f << 0), (32 << 0)); - mod_phy_reg(pi, 0x160, (0x7f << 8), (39 << 8)); - mod_phy_reg(pi, 0x161, (0x7f << 0), (46 << 0)); - mod_phy_reg(pi, 0x161, (0x7f << 8), (51 << 8)); - mod_phy_reg(pi, 0x162, (0x7f << 0), (55 << 0)); - mod_phy_reg(pi, 0x162, (0x7f << 8), (58 << 8)); - mod_phy_reg(pi, 0x163, (0x7f << 0), (60 << 0)); - mod_phy_reg(pi, 0x163, (0x7f << 8), (62 << 8)); - mod_phy_reg(pi, 0x164, (0x7f << 0), (62 << 0)); - mod_phy_reg(pi, 0x164, (0x7f << 8), (63 << 8)); - mod_phy_reg(pi, 0x165, (0x7f << 0), (63 << 0)); - mod_phy_reg(pi, 0x165, (0x7f << 8), (64 << 8)); - mod_phy_reg(pi, 0x166, (0x7f << 0), (64 << 0)); - mod_phy_reg(pi, 0x166, (0x7f << 8), (64 << 8)); - mod_phy_reg(pi, 0x167, (0x7f << 0), (64 << 0)); - mod_phy_reg(pi, 0x167, (0x7f << 8), (64 << 8)); - } + mod_phy_reg(pi, 0x160, (0x7f << 0), (32 << 0)); + mod_phy_reg(pi, 0x160, (0x7f << 8), (39 << 8)); + mod_phy_reg(pi, 0x161, (0x7f << 0), (46 << 0)); + mod_phy_reg(pi, 0x161, (0x7f << 8), (51 << 8)); + mod_phy_reg(pi, 0x162, (0x7f << 0), (55 << 0)); + mod_phy_reg(pi, 0x162, (0x7f << 8), (58 << 8)); + mod_phy_reg(pi, 0x163, (0x7f << 0), (60 << 0)); + mod_phy_reg(pi, 0x163, (0x7f << 8), (62 << 8)); + mod_phy_reg(pi, 0x164, (0x7f << 0), (62 << 0)); + mod_phy_reg(pi, 0x164, (0x7f << 8), (63 << 8)); + mod_phy_reg(pi, 0x165, (0x7f << 0), (63 << 0)); + mod_phy_reg(pi, 0x165, (0x7f << 8), (64 << 8)); + mod_phy_reg(pi, 0x166, (0x7f << 0), (64 << 0)); + mod_phy_reg(pi, 0x166, (0x7f << 8), (64 << 8)); + mod_phy_reg(pi, 0x167, (0x7f << 0), (64 << 0)); + mod_phy_reg(pi, 0x167, (0x7f << 8), (64 << 8)); + } - if (NREV_LE(pi->pubpi.phy_rev, 8)) { - write_phy_reg(pi, 0x23f, 0x1b0); - write_phy_reg(pi, 0x240, 0x1b0); - } + if (NREV_LE(pi->pubpi.phy_rev, 8)) { + write_phy_reg(pi, 0x23f, 0x1b0); + write_phy_reg(pi, 0x240, 0x1b0); + } - if (NREV_GE(pi->pubpi.phy_rev, 8)) - mod_phy_reg(pi, 0xbd, (0xff << 0), (114 << 0)); + if (NREV_GE(pi->pubpi.phy_rev, 8)) + mod_phy_reg(pi, 0xbd, (0xff << 0), (114 << 0)); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x00, 16, - &dac_control); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x10, 16, - &dac_control); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x00, 16, + &dac_control); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x10, 16, + &dac_control); - wlc_phy_table_read_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 0, 32, &leg_data_weights); - leg_data_weights = leg_data_weights & 0xffffff; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 0, 32, &leg_data_weights); + wlc_phy_table_read_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 0, 32, &leg_data_weights); + leg_data_weights = leg_data_weights & 0xffffff; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 0, 32, &leg_data_weights); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, - 2, 0x15e, 16, - rfseq_rx2tx_dacbufpu_rev7); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x16e, 16, - rfseq_rx2tx_dacbufpu_rev7); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, + 2, 0x15e, 16, rfseq_rx2tx_dacbufpu_rev7); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x16e, 16, + rfseq_rx2tx_dacbufpu_rev7); - if (PHY_IPA(pi)) - wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, - rfseq_rx2tx_events_rev3_ipa, - rfseq_rx2tx_dlys_rev3_ipa, - ARRAY_SIZE(rfseq_rx2tx_events_rev3_ipa)); + if (PHY_IPA(pi)) + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, + rfseq_rx2tx_events_rev3_ipa, + rfseq_rx2tx_dlys_rev3_ipa, + ARRAY_SIZE + (rfseq_rx2tx_events_rev3_ipa)); - mod_phy_reg(pi, 0x299, (0x3 << 14), (0x1 << 14)); - mod_phy_reg(pi, 0x29d, (0x3 << 14), (0x1 << 14)); + mod_phy_reg(pi, 0x299, (0x3 << 14), (0x1 << 14)); + mod_phy_reg(pi, 0x29d, (0x3 << 14), (0x1 << 14)); - tx_lpf_bw_ofdm_20mhz = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x154); - tx_lpf_bw_ofdm_40mhz = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x159); - tx_lpf_bw_11b = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x152); + tx_lpf_bw_ofdm_20mhz = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x154); + tx_lpf_bw_ofdm_40mhz = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x159); + tx_lpf_bw_11b = wlc_phy_read_lpf_bw_ctl_nphy(pi, 0x152); - if (PHY_IPA(pi)) { + if (PHY_IPA(pi)) { - if (((pi->pubpi.radiorev == 5) - && (CHSPEC_IS40(pi->radio_chanspec) == 1)) - || (pi->pubpi.radiorev == 7) - || (pi->pubpi.radiorev == 8)) { + if (((pi->pubpi.radiorev == 5) + && (CHSPEC_IS40(pi->radio_chanspec) == 1)) + || (pi->pubpi.radiorev == 7) + || (pi->pubpi.radiorev == 8)) { - rccal_bcap_val = - read_radio_reg( - pi, - RADIO_2057_RCCAL_BCAP_VAL); - rccal_scap_val = - read_radio_reg( - pi, - RADIO_2057_RCCAL_SCAP_VAL); + rccal_bcap_val = + read_radio_reg(pi, RADIO_2057_RCCAL_BCAP_VAL); + rccal_scap_val = + read_radio_reg(pi, RADIO_2057_RCCAL_SCAP_VAL); - rccal_tx20_11b_bcap = rccal_bcap_val; - rccal_tx20_11b_scap = rccal_scap_val; + rccal_tx20_11b_bcap = rccal_bcap_val; + rccal_tx20_11b_scap = rccal_scap_val; - if ((pi->pubpi.radiorev == 5) && - (CHSPEC_IS40(pi->radio_chanspec) == 1)) { + if ((pi->pubpi.radiorev == 5) && + (CHSPEC_IS40(pi->radio_chanspec) == 1)) { - rccal_tx20_11n_bcap = rccal_bcap_val; - rccal_tx20_11n_scap = rccal_scap_val; - rccal_tx40_11n_bcap = 0xc; - rccal_tx40_11n_scap = 0xc; + rccal_tx20_11n_bcap = rccal_bcap_val; + rccal_tx20_11n_scap = rccal_scap_val; + rccal_tx40_11n_bcap = 0xc; + rccal_tx40_11n_scap = 0xc; - rccal_ovrd = true; + rccal_ovrd = true; - } else if ((pi->pubpi.radiorev == 7) - || (pi->pubpi.radiorev == 8)) { + } else if ((pi->pubpi.radiorev == 7) + || (pi->pubpi.radiorev == 8)) { - tx_lpf_bw_ofdm_20mhz = 4; - tx_lpf_bw_11b = 1; + tx_lpf_bw_ofdm_20mhz = 4; + tx_lpf_bw_11b = 1; - if (CHSPEC_IS2G(pi->radio_chanspec)) { - rccal_tx20_11n_bcap = 0xc; - rccal_tx20_11n_scap = 0xc; - rccal_tx40_11n_bcap = 0xa; - rccal_tx40_11n_scap = 0xa; - } else { - rccal_tx20_11n_bcap = 0x14; - rccal_tx20_11n_scap = 0x14; - rccal_tx40_11n_bcap = 0xf; - rccal_tx40_11n_scap = 0xf; - } - - rccal_ovrd = true; + if (CHSPEC_IS2G(pi->radio_chanspec)) { + rccal_tx20_11n_bcap = 0xc; + rccal_tx20_11n_scap = 0xc; + rccal_tx40_11n_bcap = 0xa; + rccal_tx40_11n_scap = 0xa; + } else { + rccal_tx20_11n_bcap = 0x14; + rccal_tx20_11n_scap = 0x14; + rccal_tx40_11n_bcap = 0xf; + rccal_tx40_11n_scap = 0xf; } - } - - } else { - - if (pi->pubpi.radiorev == 5) { - - tx_lpf_bw_ofdm_20mhz = 1; - tx_lpf_bw_ofdm_40mhz = 3; - - rccal_bcap_val = - read_radio_reg( - pi, - RADIO_2057_RCCAL_BCAP_VAL); - rccal_scap_val = - read_radio_reg( - pi, - RADIO_2057_RCCAL_SCAP_VAL); - - rccal_tx20_11b_bcap = rccal_bcap_val; - rccal_tx20_11b_scap = rccal_scap_val; - - rccal_tx20_11n_bcap = 0x13; - rccal_tx20_11n_scap = 0x11; - rccal_tx40_11n_bcap = 0x13; - rccal_tx40_11n_scap = 0x11; rccal_ovrd = true; } } - if (rccal_ovrd) { + } else { - rx2tx_lpf_rc_lut_tx20_11b = - (rccal_tx20_11b_bcap << 8) | - (rccal_tx20_11b_scap << 3) | - tx_lpf_bw_11b; - rx2tx_lpf_rc_lut_tx20_11n = - (rccal_tx20_11n_bcap << 8) | - (rccal_tx20_11n_scap << 3) | - tx_lpf_bw_ofdm_20mhz; - rx2tx_lpf_rc_lut_tx40_11n = - (rccal_tx40_11n_bcap << 8) | - (rccal_tx40_11n_scap << 3) | - tx_lpf_bw_ofdm_40mhz; + if (pi->pubpi.radiorev == 5) { - for (coreNum = 0; coreNum <= 1; coreNum++) { - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x152 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx20_11b); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x153 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx20_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x154 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx20_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x155 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx40_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x156 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx40_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x157 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx40_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x158 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx40_11n); - wlc_phy_table_write_nphy( - pi, NPHY_TBL_ID_RFSEQ, - 1, - 0x159 + coreNum * 0x10, - 16, - &rx2tx_lpf_rc_lut_tx40_11n); - } + tx_lpf_bw_ofdm_20mhz = 1; + tx_lpf_bw_ofdm_40mhz = 3; - wlc_phy_rfctrl_override_nphy_rev7( - pi, (0x1 << 4), - 1, 0x3, 0, - NPHY_REV7_RFCTRLOVERRIDE_ID2); + rccal_bcap_val = + read_radio_reg(pi, RADIO_2057_RCCAL_BCAP_VAL); + rccal_scap_val = + read_radio_reg(pi, RADIO_2057_RCCAL_SCAP_VAL); + + rccal_tx20_11b_bcap = rccal_bcap_val; + rccal_tx20_11b_scap = rccal_scap_val; + + rccal_tx20_11n_bcap = 0x13; + rccal_tx20_11n_scap = 0x11; + rccal_tx40_11n_bcap = 0x13; + rccal_tx40_11n_scap = 0x11; + + rccal_ovrd = true; + } + } + + if (rccal_ovrd) { + + rx2tx_lpf_rc_lut_tx20_11b = + (rccal_tx20_11b_bcap << 8) | + (rccal_tx20_11b_scap << 3) | tx_lpf_bw_11b; + rx2tx_lpf_rc_lut_tx20_11n = + (rccal_tx20_11n_bcap << 8) | + (rccal_tx20_11n_scap << 3) | tx_lpf_bw_ofdm_20mhz; + rx2tx_lpf_rc_lut_tx40_11n = + (rccal_tx40_11n_bcap << 8) | + (rccal_tx40_11n_scap << 3) | tx_lpf_bw_ofdm_40mhz; + + for (coreNum = 0; coreNum <= 1; coreNum++) { + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x152 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx20_11b); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x153 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx20_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x154 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx20_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x155 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx40_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x156 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx40_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x157 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx40_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x158 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx40_11n); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, + 0x159 + coreNum * 0x10, 16, + &rx2tx_lpf_rc_lut_tx40_11n); } - write_phy_reg(pi, 0x32f, 0x3); + wlc_phy_rfctrl_override_nphy_rev7(pi, (0x1 << 4), 1, 0x3, 0, + NPHY_REV7_RFCTRLOVERRIDE_ID2); + } - if ((pi->pubpi.radiorev == 4) || (pi->pubpi.radiorev == 6)) - wlc_phy_rfctrl_override_nphy_rev7( - pi, (0x1 << 2), - 1, 0x3, 0, - NPHY_REV7_RFCTRLOVERRIDE_ID0); + write_phy_reg(pi, 0x32f, 0x3); - if ((pi->pubpi.radiorev == 3) || (pi->pubpi.radiorev == 4) || - (pi->pubpi.radiorev == 6)) { - if ((pi->sh->sromrev >= 8) - && (pi->sh->boardflags2 & BFL2_IPALVLSHIFT_3P3)) - ipalvlshift_3p3_war_en = 1; + if ((pi->pubpi.radiorev == 4) || (pi->pubpi.radiorev == 6)) + wlc_phy_rfctrl_override_nphy_rev7(pi, (0x1 << 2), 1, 0x3, 0, + NPHY_REV7_RFCTRLOVERRIDE_ID0); - if (ipalvlshift_3p3_war_en) { - write_radio_reg(pi, RADIO_2057_GPAIO_CONFIG, - 0x5); - write_radio_reg(pi, RADIO_2057_GPAIO_SEL1, - 0x30); - write_radio_reg(pi, RADIO_2057_GPAIO_SEL0, 0x0); - or_radio_reg(pi, - RADIO_2057_RXTXBIAS_CONFIG_CORE0, - 0x1); - or_radio_reg(pi, - RADIO_2057_RXTXBIAS_CONFIG_CORE1, - 0x1); + if ((pi->pubpi.radiorev == 3) || (pi->pubpi.radiorev == 4) || + (pi->pubpi.radiorev == 6)) { + if ((pi->sh->sromrev >= 8) + && (pi->sh->boardflags2 & BFL2_IPALVLSHIFT_3P3)) + ipalvlshift_3p3_war_en = 1; - ipa2g_mainbias = 0x1f; + if (ipalvlshift_3p3_war_en) { + write_radio_reg(pi, RADIO_2057_GPAIO_CONFIG, 0x5); + write_radio_reg(pi, RADIO_2057_GPAIO_SEL1, 0x30); + write_radio_reg(pi, RADIO_2057_GPAIO_SEL0, 0x0); + or_radio_reg(pi, RADIO_2057_RXTXBIAS_CONFIG_CORE0, 0x1); + or_radio_reg(pi, RADIO_2057_RXTXBIAS_CONFIG_CORE1, 0x1); - ipa2g_casconv = 0x6f; + ipa2g_mainbias = 0x1f; - ipa2g_biasfilt = 0xaa; - } else { + ipa2g_casconv = 0x6f; - ipa2g_mainbias = 0x2b; + ipa2g_biasfilt = 0xaa; + } else { - ipa2g_casconv = 0x7f; + ipa2g_mainbias = 0x2b; - ipa2g_biasfilt = 0xee; + ipa2g_casconv = 0x7f; + + ipa2g_biasfilt = 0xee; + } + + if (CHSPEC_IS2G(pi->radio_chanspec)) { + for (coreNum = 0; coreNum <= 1; coreNum++) { + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + coreNum, IPA2G_IMAIN, + ipa2g_mainbias); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + coreNum, IPA2G_CASCONV, + ipa2g_casconv); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + coreNum, + IPA2G_BIAS_FILTER, + ipa2g_biasfilt); + } + } + } + + if (PHY_IPA(pi)) { + if (CHSPEC_IS2G(pi->radio_chanspec)) { + if ((pi->pubpi.radiorev == 3) + || (pi->pubpi.radiorev == 4) + || (pi->pubpi.radiorev == 6)) + txgm_idac_bleed = 0x7f; + + for (coreNum = 0; coreNum <= 1; coreNum++) { + if (txgm_idac_bleed != 0) + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, + TXGM_IDAC_BLEED, + txgm_idac_bleed); } - if (CHSPEC_IS2G(pi->radio_chanspec)) { + if (pi->pubpi.radiorev == 5) { for (coreNum = 0; coreNum <= 1; coreNum++) { - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - coreNum, IPA2G_IMAIN, - ipa2g_mainbias); - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - coreNum, IPA2G_CASCONV, - ipa2g_casconv); - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - coreNum, + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, + IPA2G_CASCONV, + 0x13); + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, + IPA2G_IMAIN, + 0x1f); + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, IPA2G_BIAS_FILTER, - ipa2g_biasfilt); + 0xee); + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, + PAD2G_IDACS, + 0x8a); + WRITE_RADIO_REG4(pi, RADIO_2057, + CORE, coreNum, + PAD_BIAS_FILTER_BWS, + 0x3e); + } + } else if ((pi->pubpi.radiorev == 7) || + (pi->pubpi.radiorev == 8)) { + + if (CHSPEC_IS40(pi->radio_chanspec) == 0) { + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 0, IPA2G_IMAIN, 0x14); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 1, IPA2G_IMAIN, 0x12); + } else { + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 0, IPA2G_IMAIN, 0x16); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 1, IPA2G_IMAIN, 0x16); } } - } - if (PHY_IPA(pi)) { - if (CHSPEC_IS2G(pi->radio_chanspec)) { - if ((pi->pubpi.radiorev == 3) - || (pi->pubpi.radiorev == 4) - || (pi->pubpi.radiorev == 6)) - txgm_idac_bleed = 0x7f; - - for (coreNum = 0; coreNum <= 1; coreNum++) { - if (txgm_idac_bleed != 0) - WRITE_RADIO_REG4( - pi, RADIO_2057, - CORE, coreNum, - TXGM_IDAC_BLEED, - txgm_idac_bleed); - } - - if (pi->pubpi.radiorev == 5) { - - for (coreNum = 0; coreNum <= 1; - coreNum++) { - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, coreNum, - IPA2G_CASCONV, - 0x13); - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, coreNum, - IPA2G_IMAIN, - 0x1f); - WRITE_RADIO_REG4( - pi, RADIO_2057, - CORE, coreNum, - IPA2G_BIAS_FILTER, - 0xee); - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, coreNum, - PAD2G_IDACS, - 0x8a); - WRITE_RADIO_REG4( - pi, RADIO_2057, - CORE, coreNum, - PAD_BIAS_FILTER_BWS, - 0x3e); - } - - } else if ((pi->pubpi.radiorev == 7) - || (pi->pubpi.radiorev == 8)) { - - if (CHSPEC_IS40(pi->radio_chanspec) == - 0) { - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, 0, - IPA2G_IMAIN, - 0x14); - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, 1, - IPA2G_IMAIN, - 0x12); - } else { - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, 0, - IPA2G_IMAIN, - 0x16); - WRITE_RADIO_REG4(pi, RADIO_2057, - CORE, 1, - IPA2G_IMAIN, - 0x16); - } - } - - } else { - freq = CHAN5G_FREQ(CHSPEC_CHANNEL( - pi->radio_chanspec)); - if (((freq >= 5180) && (freq <= 5230)) - || ((freq >= 5745) && (freq <= 5805))) { - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - 0, IPA5G_BIAS_FILTER, - 0xff); - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - 1, IPA5G_BIAS_FILTER, - 0xff); - } - } } else { - - if (pi->pubpi.radiorev != 5) { - for (coreNum = 0; coreNum <= 1; coreNum++) { - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - coreNum, - TXMIX2G_TUNE_BOOST_PU, - 0x61); - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, - coreNum, - TXGM_IDAC_BLEED, 0x70); - } + freq = + CHAN5G_FREQ(CHSPEC_CHANNEL + (pi->radio_chanspec)); + if (((freq >= 5180) && (freq <= 5230)) + || ((freq >= 5745) && (freq <= 5805))) { + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 0, IPA5G_BIAS_FILTER, 0xff); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, + 1, IPA5G_BIAS_FILTER, 0xff); } } + } else { - if (pi->pubpi.radiorev == 4) { - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, - 0x05, 16, - &afectrl_adc_ctrl1_rev7); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, - 0x15, 16, - &afectrl_adc_ctrl1_rev7); - + if (pi->pubpi.radiorev != 5) { for (coreNum = 0; coreNum <= 1; coreNum++) { WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, - AFE_VCM_CAL_MASTER, 0x0); + TXMIX2G_TUNE_BOOST_PU, 0x61); WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, - AFE_SET_VCM_I, 0x3f); - WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, - AFE_SET_VCM_Q, 0x3f); + TXGM_IDAC_BLEED, 0x70); } - } else { - mod_phy_reg(pi, 0xa6, (0x1 << 2), (0x1 << 2)); - mod_phy_reg(pi, 0x8f, (0x1 << 2), (0x1 << 2)); - mod_phy_reg(pi, 0xa7, (0x1 << 2), (0x1 << 2)); - mod_phy_reg(pi, 0xa5, (0x1 << 2), (0x1 << 2)); - - mod_phy_reg(pi, 0xa6, (0x1 << 0), 0); - mod_phy_reg(pi, 0x8f, (0x1 << 0), (0x1 << 0)); - mod_phy_reg(pi, 0xa7, (0x1 << 0), 0); - mod_phy_reg(pi, 0xa5, (0x1 << 0), (0x1 << 0)); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, - 0x05, 16, - &afectrl_adc_ctrl2_rev7); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, - 0x15, 16, - &afectrl_adc_ctrl2_rev7); - - mod_phy_reg(pi, 0xa6, (0x1 << 2), 0); - mod_phy_reg(pi, 0x8f, (0x1 << 2), 0); - mod_phy_reg(pi, 0xa7, (0x1 << 2), 0); - mod_phy_reg(pi, 0xa5, (0x1 << 2), 0); } + } - write_phy_reg(pi, 0x6a, 0x2); + if (pi->pubpi.radiorev == 4) { + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x05, 16, + &afectrl_adc_ctrl1_rev7); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x15, 16, + &afectrl_adc_ctrl1_rev7); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 256, 32, - &min_nvar_offset_6mbps); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x138, 16, - &rfseq_pktgn_lpf_hpc_rev7); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x141, 16, - &rfseq_pktgn_lpf_h_hpc_rev7); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 3, 0x133, 16, - &rfseq_htpktgn_lpf_hpc_rev7); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x146, 16, - &rfseq_cckpktgn_lpf_hpc_rev7); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x123, 16, - &rfseq_tx2rx_lpf_h_hpc_rev7); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x12A, 16, - &rfseq_rx2tx_lpf_h_hpc_rev7); - - if (CHSPEC_IS40(pi->radio_chanspec) == 0) { - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, - 32, &min_nvar_val); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, - 127, 32, &min_nvar_val); - } else { - min_nvar_val = noise_var_tbl_rev7[3]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, - 32, &min_nvar_val); - - min_nvar_val = noise_var_tbl_rev7[127]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, - 127, 32, &min_nvar_val); + for (coreNum = 0; coreNum <= 1; coreNum++) { + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, + AFE_VCM_CAL_MASTER, 0x0); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, + AFE_SET_VCM_I, 0x3f); + WRITE_RADIO_REG4(pi, RADIO_2057, CORE, coreNum, + AFE_SET_VCM_Q, 0x3f); } + } else { + mod_phy_reg(pi, 0xa6, (0x1 << 2), (0x1 << 2)); + mod_phy_reg(pi, 0x8f, (0x1 << 2), (0x1 << 2)); + mod_phy_reg(pi, 0xa7, (0x1 << 2), (0x1 << 2)); + mod_phy_reg(pi, 0xa5, (0x1 << 2), (0x1 << 2)); - wlc_phy_workarounds_nphy_gainctrl(pi); + mod_phy_reg(pi, 0xa6, (0x1 << 0), 0); + mod_phy_reg(pi, 0x8f, (0x1 << 0), (0x1 << 0)); + mod_phy_reg(pi, 0xa7, (0x1 << 0), 0); + mod_phy_reg(pi, 0xa5, (0x1 << 0), (0x1 << 0)); - pdetrange = - (CHSPEC_IS5G(pi->radio_chanspec)) ? pi->srom_fem5g. - pdetrange : pi->srom_fem2g.pdetrange; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x05, 16, + &afectrl_adc_ctrl2_rev7); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x15, 16, + &afectrl_adc_ctrl2_rev7); - if (pdetrange == 0) { - chan_freq_range = - wlc_phy_get_chan_freq_range_nphy(pi, 0); - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - aux_adc_vmid_rev7_core0[3] = 0x70; - aux_adc_vmid_rev7_core1[3] = 0x70; - aux_adc_gain_rev7[3] = 2; - } else { - aux_adc_vmid_rev7_core0[3] = 0x80; - aux_adc_vmid_rev7_core1[3] = 0x80; - aux_adc_gain_rev7[3] = 3; - } - } else if (pdetrange == 1) { - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - aux_adc_vmid_rev7_core0[3] = 0x7c; - aux_adc_vmid_rev7_core1[3] = 0x7c; - aux_adc_gain_rev7[3] = 2; - } else { - aux_adc_vmid_rev7_core0[3] = 0x8c; - aux_adc_vmid_rev7_core1[3] = 0x8c; - aux_adc_gain_rev7[3] = 1; - } - } else if (pdetrange == 2) { - if (pi->pubpi.radioid == BCM2057_ID) { - if ((pi->pubpi.radiorev == 5) - || (pi->pubpi.radiorev == 7) - || (pi->pubpi.radiorev == 8)) { - if (chan_freq_range == - WL_CHAN_FREQ_RANGE_2G) { - aux_adc_vmid_rev7_core0[3] = - 0x8c; - aux_adc_vmid_rev7_core1[3] = - 0x8c; - aux_adc_gain_rev7[3] = 0; - } else { - aux_adc_vmid_rev7_core0[3] = - 0x96; - aux_adc_vmid_rev7_core1[3] = - 0x96; - aux_adc_gain_rev7[3] = 0; - } + mod_phy_reg(pi, 0xa6, (0x1 << 2), 0); + mod_phy_reg(pi, 0x8f, (0x1 << 2), 0); + mod_phy_reg(pi, 0xa7, (0x1 << 2), 0); + mod_phy_reg(pi, 0xa5, (0x1 << 2), 0); + } + + write_phy_reg(pi, 0x6a, 0x2); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 256, 32, + &min_nvar_offset_6mbps); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x138, 16, + &rfseq_pktgn_lpf_hpc_rev7); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x141, 16, + &rfseq_pktgn_lpf_h_hpc_rev7); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 3, 0x133, 16, + &rfseq_htpktgn_lpf_hpc_rev7); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 2, 0x146, 16, + &rfseq_cckpktgn_lpf_hpc_rev7); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x123, 16, + &rfseq_tx2rx_lpf_h_hpc_rev7); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_RFSEQ, 1, 0x12A, 16, + &rfseq_rx2tx_lpf_h_hpc_rev7); + + if (CHSPEC_IS40(pi->radio_chanspec) == 0) { + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, + 32, &min_nvar_val); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, + 127, 32, &min_nvar_val); + } else { + min_nvar_val = noise_var_tbl_rev7[3]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, + 32, &min_nvar_val); + + min_nvar_val = noise_var_tbl_rev7[127]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, + 127, 32, &min_nvar_val); + } + + wlc_phy_workarounds_nphy_gainctrl(pi); + + pdetrange = (CHSPEC_IS5G(pi->radio_chanspec)) ? + pi->srom_fem5g.pdetrange : pi->srom_fem2g.pdetrange; + + if (pdetrange == 0) { + chan_freq_range = wlc_phy_get_chan_freq_range_nphy(pi, 0); + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + aux_adc_vmid_rev7_core0[3] = 0x70; + aux_adc_vmid_rev7_core1[3] = 0x70; + aux_adc_gain_rev7[3] = 2; + } else { + aux_adc_vmid_rev7_core0[3] = 0x80; + aux_adc_vmid_rev7_core1[3] = 0x80; + aux_adc_gain_rev7[3] = 3; + } + } else if (pdetrange == 1) { + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + aux_adc_vmid_rev7_core0[3] = 0x7c; + aux_adc_vmid_rev7_core1[3] = 0x7c; + aux_adc_gain_rev7[3] = 2; + } else { + aux_adc_vmid_rev7_core0[3] = 0x8c; + aux_adc_vmid_rev7_core1[3] = 0x8c; + aux_adc_gain_rev7[3] = 1; + } + } else if (pdetrange == 2) { + if (pi->pubpi.radioid == BCM2057_ID) { + if ((pi->pubpi.radiorev == 5) + || (pi->pubpi.radiorev == 7) + || (pi->pubpi.radiorev == 8)) { + if (chan_freq_range == + WL_CHAN_FREQ_RANGE_2G) { + aux_adc_vmid_rev7_core0[3] = 0x8c; + aux_adc_vmid_rev7_core1[3] = 0x8c; + aux_adc_gain_rev7[3] = 0; + } else { + aux_adc_vmid_rev7_core0[3] = 0x96; + aux_adc_vmid_rev7_core1[3] = 0x96; + aux_adc_gain_rev7[3] = 0; } } - - } else if (pdetrange == 3) { - if (chan_freq_range == WL_CHAN_FREQ_RANGE_2G) { - aux_adc_vmid_rev7_core0[3] = 0x89; - aux_adc_vmid_rev7_core1[3] = 0x89; - aux_adc_gain_rev7[3] = 0; - } - - } else if (pdetrange == 5) { - - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - aux_adc_vmid_rev7_core0[3] = 0x80; - aux_adc_vmid_rev7_core1[3] = 0x80; - aux_adc_gain_rev7[3] = 3; - } else { - aux_adc_vmid_rev7_core0[3] = 0x70; - aux_adc_vmid_rev7_core1[3] = 0x70; - aux_adc_gain_rev7[3] = 2; - } } - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x08, 16, - &aux_adc_vmid_rev7_core0); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x18, 16, - &aux_adc_vmid_rev7_core1); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x0c, 16, - &aux_adc_gain_rev7); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x1c, 16, - &aux_adc_gain_rev7); + } else if (pdetrange == 3) { + if (chan_freq_range == WL_CHAN_FREQ_RANGE_2G) { + aux_adc_vmid_rev7_core0[3] = 0x89; + aux_adc_vmid_rev7_core1[3] = 0x89; + aux_adc_gain_rev7[3] = 0; + } + + } else if (pdetrange == 5) { + + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + aux_adc_vmid_rev7_core0[3] = 0x80; + aux_adc_vmid_rev7_core1[3] = 0x80; + aux_adc_gain_rev7[3] = 3; + } else { + aux_adc_vmid_rev7_core0[3] = 0x70; + aux_adc_vmid_rev7_core1[3] = 0x70; + aux_adc_gain_rev7[3] = 2; + } + } + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x08, 16, + &aux_adc_vmid_rev7_core0); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x18, 16, + &aux_adc_vmid_rev7_core1); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x0c, 16, + &aux_adc_gain_rev7); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, 0x1c, 16, + &aux_adc_gain_rev7); } static void wlc_phy_workarounds_nphy_rev3(struct brcms_phy *pi) @@ -16672,7 +16599,8 @@ static void wlc_phy_workarounds_nphy_rev3(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_END }; - static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = + { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; s16 alpha0, alpha1, alpha2; s16 beta0, beta1, beta2; u32 leg_data_weights, ht_data_weights, nss1_data_weights, @@ -16691,330 +16619,290 @@ static void wlc_phy_workarounds_nphy_rev3(struct brcms_phy *pi) u8 pdetrange; u8 triso; - write_phy_reg(pi, 0x23f, 0x1f8); - write_phy_reg(pi, 0x240, 0x1f8); + write_phy_reg(pi, 0x23f, 0x1f8); + write_phy_reg(pi, 0x240, 0x1f8); - wlc_phy_table_read_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 0, 32, &leg_data_weights); - leg_data_weights = leg_data_weights & 0xffffff; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 0, 32, &leg_data_weights); + wlc_phy_table_read_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 0, 32, &leg_data_weights); + leg_data_weights = leg_data_weights & 0xffffff; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 0, 32, &leg_data_weights); - alpha0 = 293; - alpha1 = 435; - alpha2 = 261; - beta0 = 366; - beta1 = 205; - beta2 = 32; - write_phy_reg(pi, 0x145, alpha0); - write_phy_reg(pi, 0x146, alpha1); - write_phy_reg(pi, 0x147, alpha2); - write_phy_reg(pi, 0x148, beta0); - write_phy_reg(pi, 0x149, beta1); - write_phy_reg(pi, 0x14a, beta2); + alpha0 = 293; + alpha1 = 435; + alpha2 = 261; + beta0 = 366; + beta1 = 205; + beta2 = 32; + write_phy_reg(pi, 0x145, alpha0); + write_phy_reg(pi, 0x146, alpha1); + write_phy_reg(pi, 0x147, alpha2); + write_phy_reg(pi, 0x148, beta0); + write_phy_reg(pi, 0x149, beta1); + write_phy_reg(pi, 0x14a, beta2); - write_phy_reg(pi, 0x38, 0xC); - write_phy_reg(pi, 0x2ae, 0xC); + write_phy_reg(pi, 0x38, 0xC); + write_phy_reg(pi, 0x2ae, 0xC); - wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_TX2RX, - rfseq_tx2rx_events_rev3, - rfseq_tx2rx_dlys_rev3, - ARRAY_SIZE(rfseq_tx2rx_events_rev3)); + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_TX2RX, + rfseq_tx2rx_events_rev3, + rfseq_tx2rx_dlys_rev3, + ARRAY_SIZE(rfseq_tx2rx_events_rev3)); - if (PHY_IPA(pi)) - wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, - rfseq_rx2tx_events_rev3_ipa, - rfseq_rx2tx_dlys_rev3_ipa, - ARRAY_SIZE(rfseq_rx2tx_events_rev3_ipa)); + if (PHY_IPA(pi)) + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, + rfseq_rx2tx_events_rev3_ipa, + rfseq_rx2tx_dlys_rev3_ipa, + ARRAY_SIZE (rfseq_rx2tx_events_rev3_ipa)); - if ((pi->sh->hw_phyrxchain != 0x3) && - (pi->sh->hw_phyrxchain != pi->sh->hw_phytxchain)) { + if ((pi->sh->hw_phyrxchain != 0x3) && + (pi->sh->hw_phyrxchain != pi->sh->hw_phytxchain)) { - if (PHY_IPA(pi)) { - rfseq_rx2tx_dlys_rev3[5] = 59; - rfseq_rx2tx_dlys_rev3[6] = 1; - rfseq_rx2tx_events_rev3[7] = - NPHY_REV3_RFSEQ_CMD_END; - } - - wlc_phy_set_rfseq_nphy( - pi, NPHY_RFSEQ_RX2TX, - rfseq_rx2tx_events_rev3, - rfseq_rx2tx_dlys_rev3, - ARRAY_SIZE(rfseq_rx2tx_events_rev3)); + if (PHY_IPA(pi)) { + rfseq_rx2tx_dlys_rev3[5] = 59; + rfseq_rx2tx_dlys_rev3[6] = 1; + rfseq_rx2tx_events_rev3[7] = NPHY_REV3_RFSEQ_CMD_END; } - if (CHSPEC_IS2G(pi->radio_chanspec)) - write_phy_reg(pi, 0x6a, 0x2); - else - write_phy_reg(pi, 0x6a, 0x9c40); + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, + rfseq_rx2tx_events_rev3, + rfseq_rx2tx_dlys_rev3, + ARRAY_SIZE (rfseq_rx2tx_events_rev3)); + } - mod_phy_reg(pi, 0x294, (0xf << 8), (7 << 8)); + if (CHSPEC_IS2G(pi->radio_chanspec)) + write_phy_reg(pi, 0x6a, 0x2); + else + write_phy_reg(pi, 0x6a, 0x9c40); - if (CHSPEC_IS40(pi->radio_chanspec) == 0) { - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, - 32, &min_nvar_val); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, - 127, 32, &min_nvar_val); + mod_phy_reg(pi, 0x294, (0xf << 8), (7 << 8)); + + if (CHSPEC_IS40(pi->radio_chanspec) == 0) { + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, + 32, &min_nvar_val); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, + 127, 32, &min_nvar_val); + } else { + min_nvar_val = noise_var_tbl_rev3[3]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, + 32, &min_nvar_val); + + min_nvar_val = noise_var_tbl_rev3[127]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, + 127, 32, &min_nvar_val); + } + + wlc_phy_workarounds_nphy_gainctrl(pi); + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x00, 16, + &dac_control); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x10, 16, + &dac_control); + + pdetrange = (CHSPEC_IS5G(pi->radio_chanspec)) ? + pi->srom_fem5g.pdetrange : pi->srom_fem2g.pdetrange; + + if (pdetrange == 0) { + if (NREV_GE(pi->pubpi.phy_rev, 4)) { + aux_adc_vmid = aux_adc_vmid_rev4; + aux_adc_gain = aux_adc_gain_rev4; } else { - min_nvar_val = noise_var_tbl_rev3[3]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, 3, - 32, &min_nvar_val); - - min_nvar_val = noise_var_tbl_rev3[127]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_NOISEVAR, 1, - 127, 32, &min_nvar_val); + aux_adc_vmid = aux_adc_vmid_rev3; + aux_adc_gain = aux_adc_gain_rev3; } - - wlc_phy_workarounds_nphy_gainctrl(pi); - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x00, 16, - &dac_control); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x10, 16, - &dac_control); - - pdetrange = - (CHSPEC_IS5G(pi->radio_chanspec)) ? pi->srom_fem5g. - pdetrange : pi->srom_fem2g.pdetrange; - - if (pdetrange == 0) { - if (NREV_GE(pi->pubpi.phy_rev, 4)) { - aux_adc_vmid = aux_adc_vmid_rev4; - aux_adc_gain = aux_adc_gain_rev4; - } else { - aux_adc_vmid = aux_adc_vmid_rev3; - aux_adc_gain = aux_adc_gain_rev3; - } - chan_freq_range = - wlc_phy_get_chan_freq_range_nphy(pi, 0); - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - switch (chan_freq_range) { - case WL_CHAN_FREQ_RANGE_5GL: - aux_adc_vmid[3] = 0x89; - aux_adc_gain[3] = 0; - break; - case WL_CHAN_FREQ_RANGE_5GM: - aux_adc_vmid[3] = 0x89; - aux_adc_gain[3] = 0; - break; - case WL_CHAN_FREQ_RANGE_5GH: - aux_adc_vmid[3] = 0x89; - aux_adc_gain[3] = 0; - break; - default: - break; - } - } - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x08, 16, aux_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x18, 16, aux_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x0c, 16, aux_adc_gain); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x1c, 16, aux_adc_gain); - } else if (pdetrange == 1) { - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x08, 16, sk_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x18, 16, sk_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x0c, 16, sk_adc_gain); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x1c, 16, sk_adc_gain); - } else if (pdetrange == 2) { - - u16 bcm_adc_vmid[] = { 0xa2, 0xb4, 0xb4, 0x74 }; - u16 bcm_adc_gain[] = { 0x02, 0x02, 0x02, 0x04 }; - - if (NREV_GE(pi->pubpi.phy_rev, 6)) { - chan_freq_range = - wlc_phy_get_chan_freq_range_nphy(pi, 0); - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - bcm_adc_vmid[3] = 0x8e; - bcm_adc_gain[3] = 0x03; - } else { - bcm_adc_vmid[3] = 0x94; - bcm_adc_gain[3] = 0x03; - } - } else if (NREV_IS(pi->pubpi.phy_rev, 5)) { - bcm_adc_vmid[3] = 0x84; - bcm_adc_gain[3] = 0x02; - } - - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x08, 16, bcm_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x18, 16, bcm_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x0c, 16, bcm_adc_gain); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x1c, 16, bcm_adc_gain); - } else if (pdetrange == 3) { - chan_freq_range = - wlc_phy_get_chan_freq_range_nphy(pi, 0); - if ((NREV_GE(pi->pubpi.phy_rev, 4)) - && (chan_freq_range == WL_CHAN_FREQ_RANGE_2G)) { - - u16 auxadc_vmid[] = { - 0xa2, 0xb4, 0xb4, 0x270 - }; - u16 auxadc_gain[] = { - 0x02, 0x02, 0x02, 0x00 - }; - - wlc_phy_table_write_nphy(pi, - NPHY_TBL_ID_AFECTRL, 4, - 0x08, 16, auxadc_vmid); - wlc_phy_table_write_nphy(pi, - NPHY_TBL_ID_AFECTRL, 4, - 0x18, 16, auxadc_vmid); - wlc_phy_table_write_nphy(pi, - NPHY_TBL_ID_AFECTRL, 4, - 0x0c, 16, auxadc_gain); - wlc_phy_table_write_nphy(pi, - NPHY_TBL_ID_AFECTRL, 4, - 0x1c, 16, auxadc_gain); - } - } else if ((pdetrange == 4) || (pdetrange == 5)) { - u16 bcm_adc_vmid[] = { 0xa2, 0xb4, 0xb4, 0x0 }; - u16 bcm_adc_gain[] = { 0x02, 0x02, 0x02, 0x0 }; - u16 Vmid[2], Av[2]; - - chan_freq_range = - wlc_phy_get_chan_freq_range_nphy(pi, 0); - if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { - Vmid[0] = (pdetrange == 4) ? 0x8e : 0x89; - Vmid[1] = (pdetrange == 4) ? 0x96 : 0x89; - Av[0] = (pdetrange == 4) ? 2 : 0; - Av[1] = (pdetrange == 4) ? 2 : 0; - } else { - Vmid[0] = (pdetrange == 4) ? 0x89 : 0x74; - Vmid[1] = (pdetrange == 4) ? 0x8b : 0x70; - Av[0] = (pdetrange == 4) ? 2 : 0; - Av[1] = (pdetrange == 4) ? 2 : 0; - } - - bcm_adc_vmid[3] = Vmid[0]; - bcm_adc_gain[3] = Av[0]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x08, 16, bcm_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x0c, 16, bcm_adc_gain); - - bcm_adc_vmid[3] = Vmid[1]; - bcm_adc_gain[3] = Av[1]; - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x18, 16, bcm_adc_vmid); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, - 0x1c, 16, bcm_adc_gain); - } - - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_MAST_BIAS | RADIO_2056_RX0), - 0x0); - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_MAST_BIAS | RADIO_2056_RX1), - 0x0); - - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_BIAS_MAIN | RADIO_2056_RX0), - 0x6); - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_BIAS_MAIN | RADIO_2056_RX1), - 0x6); - - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_BIAS_AUX | RADIO_2056_RX0), - 0x7); - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_BIAS_AUX | RADIO_2056_RX1), - 0x7); - - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_LOB_BIAS | RADIO_2056_RX0), - 0x88); - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_LOB_BIAS | RADIO_2056_RX1), - 0x88); - - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_CMFB_IDAC | RADIO_2056_RX0), - 0x0); - write_radio_reg(pi, - (RADIO_2056_RX_MIXA_CMFB_IDAC | RADIO_2056_RX1), - 0x0); - - write_radio_reg(pi, - (RADIO_2056_RX_MIXG_CMFB_IDAC | RADIO_2056_RX0), - 0x0); - write_radio_reg(pi, - (RADIO_2056_RX_MIXG_CMFB_IDAC | RADIO_2056_RX1), - 0x0); - - triso = - (CHSPEC_IS5G(pi->radio_chanspec)) ? pi->srom_fem5g. - triso : pi->srom_fem2g.triso; - if (triso == 7) { - wlc_phy_war_force_trsw_to_R_cliplo_nphy(pi, PHY_CORE_0); - wlc_phy_war_force_trsw_to_R_cliplo_nphy(pi, PHY_CORE_1); - } - - wlc_phy_war_txchain_upd_nphy(pi, pi->sh->hw_phytxchain); - - if (((pi->sh->boardflags2 & BFL2_APLL_WAR) && - (CHSPEC_IS5G(pi->radio_chanspec))) || - (((pi->sh->boardflags2 & BFL2_GPLL_WAR) || - (pi->sh->boardflags2 & BFL2_GPLL_WAR2)) && - (CHSPEC_IS2G(pi->radio_chanspec)))) { - nss1_data_weights = 0x00088888; - ht_data_weights = 0x00088888; - stbc_data_weights = 0x00088888; - } else { - nss1_data_weights = 0x88888888; - ht_data_weights = 0x88888888; - stbc_data_weights = 0x88888888; - } - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 1, 32, &nss1_data_weights); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 2, 32, &ht_data_weights); - wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, - 1, 3, 32, &stbc_data_weights); - - if (NREV_IS(pi->pubpi.phy_rev, 4)) { - if (CHSPEC_IS5G(pi->radio_chanspec)) { - write_radio_reg(pi, - RADIO_2056_TX_GMBB_IDAC | - RADIO_2056_TX0, 0x70); - write_radio_reg(pi, - RADIO_2056_TX_GMBB_IDAC | - RADIO_2056_TX1, 0x70); + chan_freq_range = wlc_phy_get_chan_freq_range_nphy(pi, 0); + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + switch (chan_freq_range) { + case WL_CHAN_FREQ_RANGE_5GL: + aux_adc_vmid[3] = 0x89; + aux_adc_gain[3] = 0; + break; + case WL_CHAN_FREQ_RANGE_5GM: + aux_adc_vmid[3] = 0x89; + aux_adc_gain[3] = 0; + break; + case WL_CHAN_FREQ_RANGE_5GH: + aux_adc_vmid[3] = 0x89; + aux_adc_gain[3] = 0; + break; + default: + break; } } + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x08, 16, aux_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x18, 16, aux_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x0c, 16, aux_adc_gain); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x1c, 16, aux_adc_gain); + } else if (pdetrange == 1) { + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x08, 16, sk_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x18, 16, sk_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x0c, 16, sk_adc_gain); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x1c, 16, sk_adc_gain); + } else if (pdetrange == 2) { - if (!pi->edcrs_threshold_lock) { - write_phy_reg(pi, 0x224, 0x3eb); - write_phy_reg(pi, 0x225, 0x3eb); - write_phy_reg(pi, 0x226, 0x341); - write_phy_reg(pi, 0x227, 0x341); - write_phy_reg(pi, 0x228, 0x42b); - write_phy_reg(pi, 0x229, 0x42b); - write_phy_reg(pi, 0x22a, 0x381); - write_phy_reg(pi, 0x22b, 0x381); - write_phy_reg(pi, 0x22c, 0x42b); - write_phy_reg(pi, 0x22d, 0x42b); - write_phy_reg(pi, 0x22e, 0x381); - write_phy_reg(pi, 0x22f, 0x381); - } + u16 bcm_adc_vmid[] = { 0xa2, 0xb4, 0xb4, 0x74 }; + u16 bcm_adc_gain[] = { 0x02, 0x02, 0x02, 0x04 }; if (NREV_GE(pi->pubpi.phy_rev, 6)) { - - if (pi->sh->boardflags2 & BFL2_SINGLEANT_CCK) - wlapi_bmac_mhf(pi->sh->physhim, MHF4, - MHF4_BPHY_TXCORE0, - MHF4_BPHY_TXCORE0, BRCM_BAND_ALL); + chan_freq_range = + wlc_phy_get_chan_freq_range_nphy(pi, 0); + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + bcm_adc_vmid[3] = 0x8e; + bcm_adc_gain[3] = 0x03; + } else { + bcm_adc_vmid[3] = 0x94; + bcm_adc_gain[3] = 0x03; + } + } else if (NREV_IS(pi->pubpi.phy_rev, 5)) { + bcm_adc_vmid[3] = 0x84; + bcm_adc_gain[3] = 0x02; } + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x08, 16, bcm_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x18, 16, bcm_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x0c, 16, bcm_adc_gain); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x1c, 16, bcm_adc_gain); + } else if (pdetrange == 3) { + chan_freq_range = wlc_phy_get_chan_freq_range_nphy(pi, 0); + if ((NREV_GE(pi->pubpi.phy_rev, 4)) && + (chan_freq_range == WL_CHAN_FREQ_RANGE_2G)) { + u16 auxadc_vmid[] = { 0xa2, 0xb4, 0xb4, 0x270 }; + u16 auxadc_gain[] = { 0x02, 0x02, 0x02, 0x00 }; + + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x08, 16, auxadc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x18, 16, auxadc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x0c, 16, auxadc_gain); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x1c, 16, auxadc_gain); + } + } else if ((pdetrange == 4) || (pdetrange == 5)) { + u16 bcm_adc_vmid[] = { 0xa2, 0xb4, 0xb4, 0x0 }; + u16 bcm_adc_gain[] = { 0x02, 0x02, 0x02, 0x0 }; + u16 Vmid[2], Av[2]; + + chan_freq_range = wlc_phy_get_chan_freq_range_nphy(pi, 0); + if (chan_freq_range != WL_CHAN_FREQ_RANGE_2G) { + Vmid[0] = (pdetrange == 4) ? 0x8e : 0x89; + Vmid[1] = (pdetrange == 4) ? 0x96 : 0x89; + Av[0] = (pdetrange == 4) ? 2 : 0; + Av[1] = (pdetrange == 4) ? 2 : 0; + } else { + Vmid[0] = (pdetrange == 4) ? 0x89 : 0x74; + Vmid[1] = (pdetrange == 4) ? 0x8b : 0x70; + Av[0] = (pdetrange == 4) ? 2 : 0; + Av[1] = (pdetrange == 4) ? 2 : 0; + } + + bcm_adc_vmid[3] = Vmid[0]; + bcm_adc_gain[3] = Av[0]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x08, 16, bcm_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x0c, 16, bcm_adc_gain); + + bcm_adc_vmid[3] = Vmid[1]; + bcm_adc_gain[3] = Av[1]; + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x18, 16, bcm_adc_vmid); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 4, + 0x1c, 16, bcm_adc_gain); + } + + write_radio_reg(pi, (RADIO_2056_RX_MIXA_MAST_BIAS | RADIO_2056_RX0), 0x0); + write_radio_reg(pi, (RADIO_2056_RX_MIXA_MAST_BIAS | RADIO_2056_RX1), 0x0); + + write_radio_reg(pi, (RADIO_2056_RX_MIXA_BIAS_MAIN | RADIO_2056_RX0), 0x6); + write_radio_reg(pi, (RADIO_2056_RX_MIXA_BIAS_MAIN | RADIO_2056_RX1), 0x6); + + write_radio_reg(pi, (RADIO_2056_RX_MIXA_BIAS_AUX | RADIO_2056_RX0), 0x7); + write_radio_reg(pi, (RADIO_2056_RX_MIXA_BIAS_AUX | RADIO_2056_RX1), 0x7); + + write_radio_reg(pi, (RADIO_2056_RX_MIXA_LOB_BIAS | RADIO_2056_RX0), 0x88); + write_radio_reg(pi, (RADIO_2056_RX_MIXA_LOB_BIAS | RADIO_2056_RX1), 0x88); + + write_radio_reg(pi, (RADIO_2056_RX_MIXA_CMFB_IDAC | RADIO_2056_RX0), 0x0); + write_radio_reg(pi, (RADIO_2056_RX_MIXA_CMFB_IDAC | RADIO_2056_RX1), 0x0); + + write_radio_reg(pi, (RADIO_2056_RX_MIXG_CMFB_IDAC | RADIO_2056_RX0), 0x0); + write_radio_reg(pi, (RADIO_2056_RX_MIXG_CMFB_IDAC | RADIO_2056_RX1), 0x0); + + triso = (CHSPEC_IS5G(pi->radio_chanspec)) ? + pi->srom_fem5g.triso : pi->srom_fem2g.triso; + if (triso == 7) { + wlc_phy_war_force_trsw_to_R_cliplo_nphy(pi, PHY_CORE_0); + wlc_phy_war_force_trsw_to_R_cliplo_nphy(pi, PHY_CORE_1); + } + + wlc_phy_war_txchain_upd_nphy(pi, pi->sh->hw_phytxchain); + + if (((pi->sh->boardflags2 & BFL2_APLL_WAR) && + (CHSPEC_IS5G(pi->radio_chanspec))) || + (((pi->sh->boardflags2 & BFL2_GPLL_WAR) || + (pi->sh->boardflags2 & BFL2_GPLL_WAR2)) && + (CHSPEC_IS2G(pi->radio_chanspec)))) { + nss1_data_weights = 0x00088888; + ht_data_weights = 0x00088888; + stbc_data_weights = 0x00088888; + } else { + nss1_data_weights = 0x88888888; + ht_data_weights = 0x88888888; + stbc_data_weights = 0x88888888; + } + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 1, 32, &nss1_data_weights); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 2, 32, &ht_data_weights); + wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL, + 1, 3, 32, &stbc_data_weights); + + if (NREV_IS(pi->pubpi.phy_rev, 4)) { + if (CHSPEC_IS5G(pi->radio_chanspec)) { + write_radio_reg(pi, + RADIO_2056_TX_GMBB_IDAC | + RADIO_2056_TX0, 0x70); + write_radio_reg(pi, + RADIO_2056_TX_GMBB_IDAC | + RADIO_2056_TX1, 0x70); + } + } + + if (!pi->edcrs_threshold_lock) { + write_phy_reg(pi, 0x224, 0x3eb); + write_phy_reg(pi, 0x225, 0x3eb); + write_phy_reg(pi, 0x226, 0x341); + write_phy_reg(pi, 0x227, 0x341); + write_phy_reg(pi, 0x228, 0x42b); + write_phy_reg(pi, 0x229, 0x42b); + write_phy_reg(pi, 0x22a, 0x381); + write_phy_reg(pi, 0x22b, 0x381); + write_phy_reg(pi, 0x22c, 0x42b); + write_phy_reg(pi, 0x22d, 0x42b); + write_phy_reg(pi, 0x22e, 0x381); + write_phy_reg(pi, 0x22f, 0x381); + } + + if (NREV_GE(pi->pubpi.phy_rev, 6)) { + + if (pi->sh->boardflags2 & BFL2_SINGLEANT_CCK) + wlapi_bmac_mhf(pi->sh->physhim, MHF4, + MHF4_BPHY_TXCORE0, + MHF4_BPHY_TXCORE0, BRCM_BAND_ALL); + } } void wlc_phy_workarounds_nphy_rev1(struct brcms_phy *pi) @@ -17043,102 +16931,101 @@ void wlc_phy_workarounds_nphy_rev1(struct brcms_phy *pi) s16 beta0, beta1, beta2; u16 regval; - if (pi->sh->boardflags2 & BFL2_SKWRKFEM_BRD || - (pi->sh->boardtype == 0x8b)) { - uint i; - u8 war_dlys[] = { 1, 6, 6, 2, 4, 20, 1 }; - for (i = 0; i < ARRAY_SIZE(rfseq_rx2tx_dlys); i++) - rfseq_rx2tx_dlys[i] = war_dlys[i]; - } + if (pi->sh->boardflags2 & BFL2_SKWRKFEM_BRD || + (pi->sh->boardtype == 0x8b)) { + uint i; + u8 war_dlys[] = { 1, 6, 6, 2, 4, 20, 1 }; + for (i = 0; i < ARRAY_SIZE(rfseq_rx2tx_dlys); i++) + rfseq_rx2tx_dlys[i] = war_dlys[i]; + } - if (CHSPEC_IS5G(pi->radio_chanspec) && pi->phy_5g_pwrgain) { - and_radio_reg(pi, RADIO_2055_CORE1_TX_RF_SPARE, 0xf7); - and_radio_reg(pi, RADIO_2055_CORE2_TX_RF_SPARE, 0xf7); - } else { - or_radio_reg(pi, RADIO_2055_CORE1_TX_RF_SPARE, 0x8); - or_radio_reg(pi, RADIO_2055_CORE2_TX_RF_SPARE, 0x8); - } + if (CHSPEC_IS5G(pi->radio_chanspec) && pi->phy_5g_pwrgain) { + and_radio_reg(pi, RADIO_2055_CORE1_TX_RF_SPARE, 0xf7); + and_radio_reg(pi, RADIO_2055_CORE2_TX_RF_SPARE, 0xf7); + } else { + or_radio_reg(pi, RADIO_2055_CORE1_TX_RF_SPARE, 0x8); + or_radio_reg(pi, RADIO_2055_CORE2_TX_RF_SPARE, 0x8); + } - regval = 0x000a; - wlc_phy_table_write_nphy(pi, 8, 1, 0, 16, ®val); - wlc_phy_table_write_nphy(pi, 8, 1, 0x10, 16, ®val); + regval = 0x000a; + wlc_phy_table_write_nphy(pi, 8, 1, 0, 16, ®val); + wlc_phy_table_write_nphy(pi, 8, 1, 0x10, 16, ®val); - if (NREV_LT(pi->pubpi.phy_rev, 3)) { - regval = 0xcdaa; - wlc_phy_table_write_nphy(pi, 8, 1, 0x02, 16, ®val); - wlc_phy_table_write_nphy(pi, 8, 1, 0x12, 16, ®val); - } + if (NREV_LT(pi->pubpi.phy_rev, 3)) { + regval = 0xcdaa; + wlc_phy_table_write_nphy(pi, 8, 1, 0x02, 16, ®val); + wlc_phy_table_write_nphy(pi, 8, 1, 0x12, 16, ®val); + } - if (NREV_LT(pi->pubpi.phy_rev, 2)) { - regval = 0x0000; - wlc_phy_table_write_nphy(pi, 8, 1, 0x08, 16, ®val); - wlc_phy_table_write_nphy(pi, 8, 1, 0x18, 16, ®val); + if (NREV_LT(pi->pubpi.phy_rev, 2)) { + regval = 0x0000; + wlc_phy_table_write_nphy(pi, 8, 1, 0x08, 16, ®val); + wlc_phy_table_write_nphy(pi, 8, 1, 0x18, 16, ®val); - regval = 0x7aab; - wlc_phy_table_write_nphy(pi, 8, 1, 0x07, 16, ®val); - wlc_phy_table_write_nphy(pi, 8, 1, 0x17, 16, ®val); + regval = 0x7aab; + wlc_phy_table_write_nphy(pi, 8, 1, 0x07, 16, ®val); + wlc_phy_table_write_nphy(pi, 8, 1, 0x17, 16, ®val); - regval = 0x0800; - wlc_phy_table_write_nphy(pi, 8, 1, 0x06, 16, ®val); - wlc_phy_table_write_nphy(pi, 8, 1, 0x16, 16, ®val); - } + regval = 0x0800; + wlc_phy_table_write_nphy(pi, 8, 1, 0x06, 16, ®val); + wlc_phy_table_write_nphy(pi, 8, 1, 0x16, 16, ®val); + } - write_phy_reg(pi, 0xf8, 0x02d8); - write_phy_reg(pi, 0xf9, 0x0301); - write_phy_reg(pi, 0xfa, 0x02d8); - write_phy_reg(pi, 0xfb, 0x0301); + write_phy_reg(pi, 0xf8, 0x02d8); + write_phy_reg(pi, 0xf9, 0x0301); + write_phy_reg(pi, 0xfa, 0x02d8); + write_phy_reg(pi, 0xfb, 0x0301); - wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, rfseq_rx2tx_events, - rfseq_rx2tx_dlys, - ARRAY_SIZE(rfseq_rx2tx_events)); + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_RX2TX, rfseq_rx2tx_events, + rfseq_rx2tx_dlys, + ARRAY_SIZE(rfseq_rx2tx_events)); - wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_TX2RX, rfseq_tx2rx_events, - rfseq_tx2rx_dlys, - ARRAY_SIZE(rfseq_tx2rx_events)); + wlc_phy_set_rfseq_nphy(pi, NPHY_RFSEQ_TX2RX, rfseq_tx2rx_events, + rfseq_tx2rx_dlys, + ARRAY_SIZE(rfseq_tx2rx_events)); - wlc_phy_workarounds_nphy_gainctrl(pi); + wlc_phy_workarounds_nphy_gainctrl(pi); - if (NREV_LT(pi->pubpi.phy_rev, 2)) { + if (NREV_LT(pi->pubpi.phy_rev, 2)) { - if (read_phy_reg(pi, 0xa0) & NPHY_MLenable) - wlapi_bmac_mhf(pi->sh->physhim, MHF3, - MHF3_NPHY_MLADV_WAR, - MHF3_NPHY_MLADV_WAR, - BRCM_BAND_ALL); + if (read_phy_reg(pi, 0xa0) & NPHY_MLenable) + wlapi_bmac_mhf(pi->sh->physhim, MHF3, + MHF3_NPHY_MLADV_WAR, + MHF3_NPHY_MLADV_WAR, BRCM_BAND_ALL); - } else if (NREV_IS(pi->pubpi.phy_rev, 2)) { - write_phy_reg(pi, 0x1e3, 0x0); - write_phy_reg(pi, 0x1e4, 0x0); - } + } else if (NREV_IS(pi->pubpi.phy_rev, 2)) { + write_phy_reg(pi, 0x1e3, 0x0); + write_phy_reg(pi, 0x1e4, 0x0); + } - if (NREV_LT(pi->pubpi.phy_rev, 2)) - mod_phy_reg(pi, 0x90, (0x1 << 7), 0); + if (NREV_LT(pi->pubpi.phy_rev, 2)) + mod_phy_reg(pi, 0x90, (0x1 << 7), 0); - alpha0 = 293; - alpha1 = 435; - alpha2 = 261; - beta0 = 366; - beta1 = 205; - beta2 = 32; - write_phy_reg(pi, 0x145, alpha0); - write_phy_reg(pi, 0x146, alpha1); - write_phy_reg(pi, 0x147, alpha2); - write_phy_reg(pi, 0x148, beta0); - write_phy_reg(pi, 0x149, beta1); - write_phy_reg(pi, 0x14a, beta2); + alpha0 = 293; + alpha1 = 435; + alpha2 = 261; + beta0 = 366; + beta1 = 205; + beta2 = 32; + write_phy_reg(pi, 0x145, alpha0); + write_phy_reg(pi, 0x146, alpha1); + write_phy_reg(pi, 0x147, alpha2); + write_phy_reg(pi, 0x148, beta0); + write_phy_reg(pi, 0x149, beta1); + write_phy_reg(pi, 0x14a, beta2); - if (NREV_LT(pi->pubpi.phy_rev, 3)) { - mod_phy_reg(pi, 0x142, (0xf << 12), 0); + if (NREV_LT(pi->pubpi.phy_rev, 3)) { + mod_phy_reg(pi, 0x142, (0xf << 12), 0); - write_phy_reg(pi, 0x192, 0xb5); - write_phy_reg(pi, 0x193, 0xa4); - write_phy_reg(pi, 0x194, 0x0); - } + write_phy_reg(pi, 0x192, 0xb5); + write_phy_reg(pi, 0x193, 0xa4); + write_phy_reg(pi, 0x194, 0x0); + } - if (NREV_IS(pi->pubpi.phy_rev, 2)) - mod_phy_reg(pi, 0x221, - NPHY_FORCESIG_DECODEGATEDCLKS, - NPHY_FORCESIG_DECODEGATEDCLKS); + if (NREV_IS(pi->pubpi.phy_rev, 2)) + mod_phy_reg(pi, 0x221, + NPHY_FORCESIG_DECODEGATEDCLKS, + NPHY_FORCESIG_DECODEGATEDCLKS); } static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) From 758f7e06063a87b67c118794a3ba8931cced67c6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 18 Oct 2017 22:45:27 -0700 Subject: [PATCH 1462/2177] bcma: Use bcma_debug and not pr_cont in MIPS driver Commit 66cc04424960 ("bcma: use bcma_debug and pr_cont in MIPS driver") converted a printk(KERN_DEBUG to bcma_debug. bcma_debug is guarded by a #define DEBUG via pr_debug. This means that the bcma_debug will generally not be emitted but any pr_cont following the bcma_debug will be emitted. Correct this by removing the uses of pr_cont by using a temporary. Signed-off-by: Joe Perches Signed-off-by: Kalle Valo --- drivers/bcma/driver_mips.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 5904ef1aa624..f040aba48d50 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -184,11 +184,14 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq) { int i; static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"}; + char interrupts[20]; + char *ints = interrupts; - bcma_debug(dev->bus, "core 0x%04x, irq :", dev->id.id); - for (i = 0; i <= 6; i++) - pr_cont(" %s%s", irq_name[i], i == irq ? "*" : " "); - pr_cont("\n"); + for (i = 0; i < ARRAY_SIZE(irq_name); i++) + ints += sprintf(ints, " %s%c", + irq_name[i], i == irq ? '*' : ' '); + + bcma_debug(dev->bus, "core 0x%04x, irq:%s\n", dev->id.id, interrupts); } static void bcma_core_mips_dump_irq(struct bcma_bus *bus) From 62689167261d4f64eecf4077db247264144df147 Mon Sep 17 00:00:00 2001 From: Nik Nyby Date: Mon, 23 Oct 2017 21:49:09 -0400 Subject: [PATCH 1463/2177] rtlwifi: rtl8821ae: Fix typo in variable name In _rtl8821ae_dbi_write(), wrtie_addr should be write_addr. Signed-off-by: Nik Nyby Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 6408bc8456f3..60c82a5b51cd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1131,13 +1131,13 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) static void _rtl8821ae_dbi_write(struct rtl_priv *rtlpriv, u16 addr, u8 data) { u8 tmp = 0, count = 0; - u16 wrtie_addr, remainder = addr % 4; + u16 write_addr, remainder = addr % 4; - wrtie_addr = REG_DBI_WDATA + remainder; - rtl_write_byte(rtlpriv, wrtie_addr, data); + write_addr = REG_DBI_WDATA + remainder; + rtl_write_byte(rtlpriv, write_addr, data); - wrtie_addr = (addr & 0xfffc) | (BIT(0) << (remainder + 12)); - rtl_write_word(rtlpriv, REG_DBI_ADDR, wrtie_addr); + write_addr = (addr & 0xfffc) | (BIT(0) << (remainder + 12)); + rtl_write_word(rtlpriv, REG_DBI_ADDR, write_addr); rtl_write_byte(rtlpriv, REG_DBI_FLAG, 0x1); From 1d1aa8f1ea247debf59c857f0d1bee324e43c9ea Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Oct 2017 10:03:31 +0800 Subject: [PATCH 1464/2177] rtlwifi: Remove seq_number from rtl_tid_data Since mac80211 maintains the sequence number for each STA/TID, driver doesn't need to maintain a copy. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++---- drivers/net/wireless/realtek/rtlwifi/pci.c | 17 ----------------- drivers/net/wireless/realtek/rtlwifi/usb.c | 17 ----------------- drivers/net/wireless/realtek/rtlwifi/wifi.h | 1 - 4 files changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ea90b460c017..7e3107f9e37f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1618,9 +1618,8 @@ int rtl_tx_agg_start(struct ieee80211_hw *hw, struct ieee80211_vif *vif, RT_TRACE(rtlpriv, COMP_SEND, DBG_DMESG, "on ra = %pM tid = %d seq:%d\n", sta->addr, tid, - tid_data->seq_number); + *ssn); - *ssn = tid_data->seq_number; tid_data->agg.agg_state = RTL_AGG_START; ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); @@ -1679,8 +1678,7 @@ int rtl_rx_agg_start(struct ieee80211_hw *hw, tid_data = &sta_entry->tids[tid]; RT_TRACE(rtlpriv, COMP_RECV, DBG_DMESG, - "on ra = %pM tid = %d seq:%d\n", sta->addr, tid, - tid_data->seq_number); + "on ra = %pM tid = %d\n", sta->addr, tid); tid_data->agg.rx_agg_state = RTL_RX_AGG_START; return 0; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index b9a6d23364be..eb12818b46b3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1623,7 +1623,6 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, struct rtl_tcb_desc *ptcb_desc) { struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_sta_info *sta_entry = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct rtl8192_tx_ring *ring; struct rtl_tx_desc *pdesc; @@ -1635,9 +1634,6 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, __le16 fc = rtl_get_fc(skb); u8 *pda_addr = hdr->addr1; struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - /*ssn */ - u8 tid = 0; - u16 seq_number = 0; u8 own; u8 temp_one = 1; @@ -1699,19 +1695,6 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, return skb->len; } - if (ieee80211_is_data_qos(fc)) { - tid = rtl_get_tid(skb); - if (sta) { - sta_entry = (struct rtl_sta_info *)sta->drv_priv; - seq_number = (le16_to_cpu(hdr->seq_ctrl) & - IEEE80211_SCTL_SEQ) >> 4; - seq_number += 1; - - if (!ieee80211_has_morefrags(hdr->frame_control)) - sta_entry->tids[tid].seq_number = seq_number; - } - } - if (ieee80211_is_data(fc)) rtlpriv->cfg->ops->led_control(hw, LED_CTL_TX); diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 5590d07d0918..39b033b3b53a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -952,17 +952,12 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, u16 hw_queue) { struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct rtl_tx_desc *pdesc = NULL; struct rtl_tcb_desc tcb_desc; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)(skb->data); __le16 fc = hdr->frame_control; u8 *pda_addr = hdr->addr1; - /* ssn */ - u8 *qc = NULL; - u8 tid = 0; - u16 seq_number = 0; memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); if (ieee80211_is_auth(fc)) { @@ -983,20 +978,8 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, rtlpriv->stats.txbytesbroadcast += skb->len; else rtlpriv->stats.txbytesunicast += skb->len; - if (ieee80211_is_data_qos(fc)) { - qc = ieee80211_get_qos_ctl(hdr); - tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK; - seq_number = (le16_to_cpu(hdr->seq_ctrl) & - IEEE80211_SCTL_SEQ) >> 4; - seq_number += 1; - seq_number <<= 4; - } rtlpriv->cfg->ops->fill_tx_desc(hw, hdr, (u8 *)pdesc, NULL, info, sta, skb, hw_queue, &tcb_desc); - if (!ieee80211_has_morefrags(hdr->frame_control)) { - if (qc) - mac->tids[tid].seq_number = seq_number; - } if (ieee80211_is_data(fc)) rtlpriv->cfg->ops->led_control(hw, LED_CTL_TX); } diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 7dfc73b554f1..22afc14c3da6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -1324,7 +1324,6 @@ struct rssi_sta { }; struct rtl_tid_data { - u16 seq_number; struct rtl_ht_agg agg; }; From 399ba77a94e1ee01f747b168c429a121164ac962 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 25 Oct 2017 17:32:05 -0700 Subject: [PATCH 1465/2177] net: dsa: Simplify dsa_slave_phy_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the code that tried to identify if a PHY designated by Device Tree required diversion through the DSA-created MDIO bus. This was created mainly for the bcm_sf2.c driver back when it did not have its own MDIO bus driver, which it now has since 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus"). Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Reviewed-by: Andrew Lunn Tested-by: Martin Hundebøll Signed-off-by: David S. Miller --- net/dsa/slave.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d0ae7010ea45..808e205227c3 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1060,28 +1060,10 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) phy_flags = ds->ops->get_phy_flags(ds, dp->index); if (phy_dn) { - int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); - - /* If this PHY address is part of phys_mii_mask, which means - * that we need to divert reads and writes to/from it, then we - * want to bind this device using the slave MII bus created by - * DSA to make that happen. - */ - if (!phy_is_fixed && phy_id >= 0 && - (ds->phys_mii_mask & (1 << phy_id))) { - ret = dsa_slave_phy_connect(slave_dev, phy_id); - if (ret) { - netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret); - of_node_put(phy_dn); - return ret; - } - } else { - slave_dev->phydev = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, - phy_flags, - p->phy_interface); - } - + slave_dev->phydev = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); of_node_put(phy_dn); } From c0c21458d7d242adf9c3a3e2f3f95392164325ae Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 25 Oct 2017 18:01:05 -0700 Subject: [PATCH 1466/2177] net: systemport: Check DSA notifier master against ourself Check that the master network device that is signaled through the DSA notifier is actually going to be ourself, otherwise, we could just be de-referencing garbage from other drivers. Fixes: 84ff33eeb23d ("net: systemport: Establish DSA network device queue mapping") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 33 ++++++++++++---------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 1d9d5f986e14..dcee843d05d7 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2046,6 +2046,21 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, return tx_ring->index; } +static const struct net_device_ops bcm_sysport_netdev_ops = { + .ndo_start_xmit = bcm_sysport_xmit, + .ndo_tx_timeout = bcm_sysport_tx_timeout, + .ndo_open = bcm_sysport_open, + .ndo_stop = bcm_sysport_stop, + .ndo_set_features = bcm_sysport_set_features, + .ndo_set_rx_mode = bcm_sysport_set_rx_mode, + .ndo_set_mac_address = bcm_sysport_change_mac, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = bcm_sysport_poll_controller, +#endif + .ndo_get_stats64 = bcm_sysport_get_stats64, + .ndo_select_queue = bcm_sysport_select_queue, +}; + static int bcm_sysport_map_queues(struct net_device *dev, struct dsa_notifier_register_info *info) { @@ -2061,6 +2076,9 @@ static int bcm_sysport_map_queues(struct net_device *dev, if (info->switch_number) return 0; + if (dev->netdev_ops != &bcm_sysport_netdev_ops) + return 0; + port = info->port_number; slave_dev = info->info.dev; @@ -2112,21 +2130,6 @@ static int bcm_sysport_dsa_notifier(struct notifier_block *unused, return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); } -static const struct net_device_ops bcm_sysport_netdev_ops = { - .ndo_start_xmit = bcm_sysport_xmit, - .ndo_tx_timeout = bcm_sysport_tx_timeout, - .ndo_open = bcm_sysport_open, - .ndo_stop = bcm_sysport_stop, - .ndo_set_features = bcm_sysport_set_features, - .ndo_set_rx_mode = bcm_sysport_set_rx_mode, - .ndo_set_mac_address = bcm_sysport_change_mac, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = bcm_sysport_poll_controller, -#endif - .ndo_get_stats64 = bcm_sysport_get_stats64, - .ndo_select_queue = bcm_sysport_select_queue, -}; - #define REV_FMT "v%2x.%02x" static const struct bcm_sysport_hw_params bcm_sysport_params[] = { From 392209fa833287a1c5532ffbb098bba584a69dbc Mon Sep 17 00:00:00 2001 From: Felix Manlunas Date: Wed, 25 Oct 2017 18:04:56 -0700 Subject: [PATCH 1467/2177] liquidio: deprecate 1-bit flag indicating watchdog kernel thread is running Deprecate the 1-bit flag (bit 2 in the SLI_SCRATCH_1 Octeon register) that indicates that the liquidio watchdog kernel thread is running for this NIC. Reason is: it is incompatible with the firmware's use for SLI_SCRATCH_1. In lieu of checking that now-deprecated flag, check the value of oct_dev->adapter_refcount to determine whether or not to create the watchdog kernel thread. Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index b4f753c56308..accd038f3f34 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1196,19 +1196,13 @@ liquidio_probe(struct pci_dev *pdev, } if (OCTEON_CN23XX_PF(oct_dev)) { - u64 scratch1; u8 bus, device, function; - scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1); - if (!(scratch1 & 4ULL)) { - /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that - * the lio watchdog kernel thread is running for this - * NIC. Each NIC gets one watchdog kernel thread. + if (atomic_read(oct_dev->adapter_refcount) == 1) { + /* Each NIC gets one watchdog kernel thread. The first + * PF (of each NIC) that gets pci_driver->probe()'d + * creates that thread. */ - scratch1 |= 4ULL; - octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1, - scratch1); - bus = pdev->bus->number; device = PCI_SLOT(pdev->devfn); function = PCI_FUNC(pdev->devfn); From 035226b964c820f65e201cdf123705a8f1d7c670 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Thu, 26 Oct 2017 01:47:42 +0000 Subject: [PATCH 1468/2177] bpf: remove tail_call and get_stackid helper declarations from bpf.h commit afdb09c720b6 ("security: bpf: Add LSM hooks for bpf object related syscall") included linux/bpf.h in linux/security.h. As a result, bpf programs including bpf_helpers.h and some other header that ends up pulling in also security.h, such as several examples under samples/bpf, fail to compile because bpf_tail_call and bpf_get_stackid are now "redefined as different kind of symbol". >From bpf.h: u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); Whereas in bpf_helpers.h they are: static void (*bpf_tail_call)(void *ctx, void *map, int index); static int (*bpf_get_stackid)(void *ctx, void *map, int flags); Fix this by removing the unused declaration of bpf_tail_call and moving the declaration of bpf_get_stackid in bpf_trace.c, which is the only place where it's needed. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 --- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 172be7faf7ba..520aeebe0d93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -231,9 +231,6 @@ struct bpf_event_entry { struct rcu_head rcu; }; -u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); - bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b65011d320e3..136aa6bb0422 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -15,6 +15,8 @@ #include #include "trace.h" +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + /** * trace_call_bpf - invoke BPF program * @call: tracepoint event From 20665a9076d48e9abd9a2db13d307f58f7ef6647 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 19 Oct 2017 11:45:19 -0700 Subject: [PATCH 1469/2177] ath10k: fix build errors with !CONFIG_PM Build errors have been reported with CONFIG_PM=n: drivers/net/wireless/ath/ath10k/pci.c:3416:8: error: implicit declaration of function 'ath10k_pci_suspend' [-Werror=implicit-function-declaration] drivers/net/wireless/ath/ath10k/pci.c:3428:8: error: implicit declaration of function 'ath10k_pci_resume' [-Werror=implicit-function-declaration] These are caused by the combination of the following two commits: 6af1de2e4ec4 ("ath10k: mark PM functions as __maybe_unused") 96378bd2c6cd ("ath10k: fix core PCI suspend when WoWLAN is supported but disabled") Both build fine on their own. But now that ath10k_pci_pm_{suspend,resume}() is compiled unconditionally, we should also compile ath10k_pci_{suspend,resume}() unconditionally. And drop the #ifdef around ath10k_pci_hif_{suspend,resume}() too; they are trivial (empty), so we're not saving much space by compiling them out. And the alternatives would be to sprinkle more __maybe_unused, or spread the #ifdef's further. Build tested with the following combinations: CONFIG_PM=y && CONFIG_PM_SLEEP=y CONFIG_PM=y && CONFIG_PM_SLEEP=n CONFIG_PM=n Fixes: 96378bd2c6cd ("ath10k: fix core PCI suspend when WoWLAN is supported but disabled") Fixes: 096ad2a15fd8 ("Merge branch 'ath-next'") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index b18a9b690df4..d790ea20b95d 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2577,8 +2577,6 @@ void ath10k_pci_hif_power_down(struct ath10k *ar) */ } -#ifdef CONFIG_PM - static int ath10k_pci_hif_suspend(struct ath10k *ar) { /* Nothing to do; the important stuff is in the driver suspend. */ @@ -2627,7 +2625,6 @@ static int ath10k_pci_resume(struct ath10k *ar) return ret; } -#endif static bool ath10k_pci_validate_cal(void *data, size_t size) { @@ -2782,10 +2779,8 @@ static const struct ath10k_hif_ops ath10k_pci_hif_ops = { .power_down = ath10k_pci_hif_power_down, .read32 = ath10k_pci_read32, .write32 = ath10k_pci_write32, -#ifdef CONFIG_PM .suspend = ath10k_pci_hif_suspend, .resume = ath10k_pci_hif_resume, -#endif .fetch_cal_eeprom = ath10k_pci_hif_fetch_cal_eeprom, }; From dd0f9cd6bc6a6d663b23f1d1a84c970d1f042b1c Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Fri, 27 Oct 2017 11:12:27 +0300 Subject: [PATCH 1470/2177] ath10k: move ath10k_mac_tdls_vif*() functions To be able to use ath10k_mac_tdls_vif_stations_count() in ath10k_hw_scan() in the following patch, move the functions earlier in the file. This commit is pure code move, no functional changes. Signed-off-by: Anilkumar Kolli Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 106 +++++++++++++------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 6dbf8a2453ba..485d1966a387 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5585,6 +5585,59 @@ static void ath10k_mac_op_set_coverage_class(struct ieee80211_hw *hw, s16 value) ar->hw_params.hw_ops->set_coverage_class(ar, value); } +struct ath10k_mac_tdls_iter_data { + u32 num_tdls_stations; + struct ieee80211_vif *curr_vif; +}; + +static void ath10k_mac_tdls_vif_stations_count_iter(void *data, + struct ieee80211_sta *sta) +{ + struct ath10k_mac_tdls_iter_data *iter_data = data; + struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; + struct ieee80211_vif *sta_vif = arsta->arvif->vif; + + if (sta->tdls && sta_vif == iter_data->curr_vif) + iter_data->num_tdls_stations++; +} + +static int ath10k_mac_tdls_vif_stations_count(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ath10k_mac_tdls_iter_data data = {}; + + data.curr_vif = vif; + + ieee80211_iterate_stations_atomic(hw, + ath10k_mac_tdls_vif_stations_count_iter, + &data); + return data.num_tdls_stations; +} + +static void ath10k_mac_tdls_vifs_count_iter(void *data, u8 *mac, + struct ieee80211_vif *vif) +{ + struct ath10k_vif *arvif = (void *)vif->drv_priv; + int *num_tdls_vifs = data; + + if (vif->type != NL80211_IFTYPE_STATION) + return; + + if (ath10k_mac_tdls_vif_stations_count(arvif->ar->hw, vif) > 0) + (*num_tdls_vifs)++; +} + +static int ath10k_mac_tdls_vifs_count(struct ieee80211_hw *hw) +{ + int num_tdls_vifs = 0; + + ieee80211_iterate_active_interfaces_atomic(hw, + IEEE80211_IFACE_ITER_NORMAL, + ath10k_mac_tdls_vifs_count_iter, + &num_tdls_vifs); + return num_tdls_vifs; +} + static int ath10k_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req) @@ -6013,59 +6066,6 @@ static void ath10k_mac_dec_num_stations(struct ath10k_vif *arvif, ar->num_stations--; } -struct ath10k_mac_tdls_iter_data { - u32 num_tdls_stations; - struct ieee80211_vif *curr_vif; -}; - -static void ath10k_mac_tdls_vif_stations_count_iter(void *data, - struct ieee80211_sta *sta) -{ - struct ath10k_mac_tdls_iter_data *iter_data = data; - struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; - struct ieee80211_vif *sta_vif = arsta->arvif->vif; - - if (sta->tdls && sta_vif == iter_data->curr_vif) - iter_data->num_tdls_stations++; -} - -static int ath10k_mac_tdls_vif_stations_count(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) -{ - struct ath10k_mac_tdls_iter_data data = {}; - - data.curr_vif = vif; - - ieee80211_iterate_stations_atomic(hw, - ath10k_mac_tdls_vif_stations_count_iter, - &data); - return data.num_tdls_stations; -} - -static void ath10k_mac_tdls_vifs_count_iter(void *data, u8 *mac, - struct ieee80211_vif *vif) -{ - struct ath10k_vif *arvif = (void *)vif->drv_priv; - int *num_tdls_vifs = data; - - if (vif->type != NL80211_IFTYPE_STATION) - return; - - if (ath10k_mac_tdls_vif_stations_count(arvif->ar->hw, vif) > 0) - (*num_tdls_vifs)++; -} - -static int ath10k_mac_tdls_vifs_count(struct ieee80211_hw *hw) -{ - int num_tdls_vifs = 0; - - ieee80211_iterate_active_interfaces_atomic(hw, - IEEE80211_IFACE_ITER_NORMAL, - ath10k_mac_tdls_vifs_count_iter, - &num_tdls_vifs); - return num_tdls_vifs; -} - static int ath10k_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, From a60809313b96c9489780944e6df45357b20d2ef5 Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Fri, 27 Oct 2017 11:12:28 +0300 Subject: [PATCH 1471/2177] ath10k: block offchannel operations if TDLS session is active Do not allow off channel operations like scans/roc when there are active TDLS sessions. The Current firmware 10.4-3.5.1-00035 on QCA9888 does not supports any offchannel operations on active TDLS sessions, either driver needs to block the offchannel operation requests or should teardown the TDLS connection. Signed-off-by: Anilkumar Kolli Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 485d1966a387..0a947eef348d 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5651,6 +5651,11 @@ static int ath10k_hw_scan(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); + if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) { + ret = -EBUSY; + goto exit; + } + spin_lock_bh(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: @@ -6490,6 +6495,11 @@ static int ath10k_remain_on_channel(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); + if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) { + ret = -EBUSY; + goto exit; + } + spin_lock_bh(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: From e5b6853e9922f415765dd8032761e726be94d8b0 Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Fri, 27 Oct 2017 11:12:29 +0300 Subject: [PATCH 1472/2177] ath10k: fix sending wmi cmd during the tdls teardown The current firmware 10.4-3.5.1-00035 on QCA9888 supports TDLS explicit mode, it expects WMI_TDLS_ENABLE_PASSIVE for tdls setup and WMI_TDLS_DISABLE for tdls teardown. Signed-off-by: Anilkumar Kolli Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 38a97086708b..cad2e42dcef6 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -7870,7 +7870,8 @@ ath10k_wmi_10_4_gen_update_fw_tdls_state(struct ath10k *ar, u32 vdev_id, if (!skb) return ERR_PTR(-ENOMEM); - if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) + if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map) && + state == WMI_TDLS_ENABLE_ACTIVE) state = WMI_TDLS_ENABLE_PASSIVE; if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map)) From d83c0d3293bbf3fe40ce9e1ac273309a44bbde91 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Fri, 27 Oct 2017 11:12:35 +0300 Subject: [PATCH 1473/2177] ath10k: spectral: remove redundant check in write_file_spectral_count() Variable val is unsigned, so checking whether it is less than zero is redundant. Signed-off-by: Christos Gkekas Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/spectral.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/spectral.c b/drivers/net/wireless/ath/ath10k/spectral.c index dd9cc0939ea8..2048b1e5262b 100644 --- a/drivers/net/wireless/ath/ath10k/spectral.c +++ b/drivers/net/wireless/ath/ath10k/spectral.c @@ -406,7 +406,7 @@ static ssize_t write_file_spectral_count(struct file *file, if (kstrtoul(buf, 0, &val)) return -EINVAL; - if (val < 0 || val > 255) + if (val > 255) return -EINVAL; mutex_lock(&ar->conf_mutex); From 9d414949a3061ba1a76fbc7900e00902968df4ef Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Fri, 27 Oct 2017 11:12:37 +0300 Subject: [PATCH 1474/2177] ath9k: debug: Remove redundant check Variable val is unsigned, so checking whether it is less than zero is redundant. Signed-off-by: Christos Gkekas Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 5a0a05abd51a..9e8aed5c478c 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -1167,7 +1167,7 @@ static ssize_t write_file_tpc(struct file *file, const char __user *user_buf, if (kstrtoul(buf, 0, &val)) return -EINVAL; - if (val < 0 || val > 1) + if (val > 1) return -EINVAL; tpc_enabled = !!val; From 6bac0d2cda715893568d896d248d3bbd083b8288 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Oct 2017 11:12:38 +0300 Subject: [PATCH 1475/2177] wil6210: remove wil6210_uapi.h from MAINTAINERS This file doesn't exist. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f218fe1e43fe..a28f0aabed05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14565,7 +14565,6 @@ L: wil6210@qca.qualcomm.com S: Supported W: http://wireless.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ -F: include/uapi/linux/wil6210_uapi.h WIMAX STACK M: Inaky Perez-Gonzalez From 698dbbf82bbf549593b78dfb6b8269522adfb62d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Oct 2017 11:12:41 +0300 Subject: [PATCH 1476/2177] wil6210: remove SSID debugfs This driver shouldn't be using wdev->ssid to start with, as it's more or less an internal field in cfg80211 used for various purposes. Reading it is possible through nl80211, even if that's not really what we should be doing there for anything but AP type interfaces. It *really* shouldn't allow modifying it! Remove the whole debugfs entry. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/debugfs.c | 45 ---------------------- 1 file changed, 45 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 6db00c167d2e..e58dc6dc1f9c 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1048,50 +1048,6 @@ static const struct file_operations fops_bf = { .llseek = seq_lseek, }; -/*---------SSID------------*/ -static ssize_t wil_read_file_ssid(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct wil6210_priv *wil = file->private_data; - struct wireless_dev *wdev = wil_to_wdev(wil); - - return simple_read_from_buffer(user_buf, count, ppos, - wdev->ssid, wdev->ssid_len); -} - -static ssize_t wil_write_file_ssid(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct wil6210_priv *wil = file->private_data; - struct wireless_dev *wdev = wil_to_wdev(wil); - struct net_device *ndev = wil_to_ndev(wil); - - if (*ppos != 0) { - wil_err(wil, "Unable to set SSID substring from [%d]\n", - (int)*ppos); - return -EINVAL; - } - - if (count > sizeof(wdev->ssid)) { - wil_err(wil, "SSID too long, len = %d\n", (int)count); - return -EINVAL; - } - if (netif_running(ndev)) { - wil_err(wil, "Unable to change SSID on running interface\n"); - return -EINVAL; - } - - wdev->ssid_len = count; - return simple_write_to_buffer(wdev->ssid, wdev->ssid_len, ppos, - buf, count); -} - -static const struct file_operations fops_ssid = { - .read = wil_read_file_ssid, - .write = wil_write_file_ssid, - .open = simple_open, -}; - /*---------temp------------*/ static void print_temp(struct seq_file *s, const char *prefix, u32 t) { @@ -1695,7 +1651,6 @@ static const struct { {"stations", 0444, &fops_sta}, {"desc", 0444, &fops_txdesc}, {"bf", 0444, &fops_bf}, - {"ssid", 0644, &fops_ssid}, {"mem_val", 0644, &fops_memread}, {"reset", 0244, &fops_reset}, {"rxon", 0244, &fops_rxon}, From 7ac767645ab1f2f96b2c84ee8857d72d25c4eadd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:54 -0700 Subject: [PATCH 1477/2177] ath: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ar5523/ar5523.c | 7 +++---- drivers/net/wireless/ath/ath10k/htt_rx.c | 6 +++--- drivers/net/wireless/ath/ath10k/pci.c | 17 ++++++++--------- drivers/net/wireless/ath/ath10k/pci.h | 2 +- drivers/net/wireless/ath/ath6kl/cfg80211.c | 6 ++---- drivers/net/wireless/ath/ath6kl/core.h | 2 +- drivers/net/wireless/ath/ath6kl/main.c | 5 ++--- drivers/net/wireless/ath/ath6kl/txrx.c | 6 +++--- drivers/net/wireless/ath/ath6kl/wmi.c | 4 ++-- drivers/net/wireless/ath/ath6kl/wmi.h | 2 +- drivers/net/wireless/ath/ath9k/ath9k.h | 4 ++-- drivers/net/wireless/ath/ath9k/channel.c | 14 ++++++-------- drivers/net/wireless/ath/ath9k/gpio.c | 14 ++++++-------- drivers/net/wireless/ath/ath9k/htc.h | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 3 +-- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 4 ++-- drivers/net/wireless/ath/ath9k/init.c | 4 ++-- drivers/net/wireless/ath/ath9k/link.c | 6 +++--- drivers/net/wireless/ath/ath9k/main.c | 4 ++-- drivers/net/wireless/ath/wil6210/main.c | 15 +++++++-------- drivers/net/wireless/ath/wil6210/p2p.c | 4 ++-- drivers/net/wireless/ath/wil6210/wil6210.h | 2 +- 22 files changed, 61 insertions(+), 72 deletions(-) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 68f0463ed8df..b94759daeacc 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -889,9 +889,9 @@ static void ar5523_tx_work(struct work_struct *work) mutex_unlock(&ar->mutex); } -static void ar5523_tx_wd_timer(unsigned long arg) +static void ar5523_tx_wd_timer(struct timer_list *t) { - struct ar5523 *ar = (struct ar5523 *) arg; + struct ar5523 *ar = from_timer(ar, t, tx_wd_timer); ar5523_dbg(ar, "TX watchdog timer triggered\n"); ieee80211_queue_work(ar->hw, &ar->tx_wd_work); @@ -1599,8 +1599,7 @@ static int ar5523_probe(struct usb_interface *intf, mutex_init(&ar->mutex); INIT_DELAYED_WORK(&ar->stat_work, ar5523_stat_work); - init_timer(&ar->tx_wd_timer); - setup_timer(&ar->tx_wd_timer, ar5523_tx_wd_timer, (unsigned long) ar); + timer_setup(&ar->tx_wd_timer, ar5523_tx_wd_timer, 0); INIT_WORK(&ar->tx_wd_work, ar5523_tx_wd_work); INIT_WORK(&ar->tx_work, ar5523_tx_work); INIT_LIST_HEAD(&ar->tx_queue_pending); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index a3f5dc78353f..f068376ec565 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -200,9 +200,9 @@ static void ath10k_htt_rx_msdu_buff_replenish(struct ath10k_htt *htt) spin_unlock_bh(&htt->rx_ring.lock); } -static void ath10k_htt_rx_ring_refill_retry(unsigned long arg) +static void ath10k_htt_rx_ring_refill_retry(struct timer_list *t) { - struct ath10k_htt *htt = (struct ath10k_htt *)arg; + struct ath10k_htt *htt = from_timer(htt, t, rx_ring.refill_retry_timer); ath10k_htt_rx_msdu_buff_replenish(htt); } @@ -507,7 +507,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) *htt->rx_ring.alloc_idx.vaddr = 0; /* Initialize the Rx refill retry timer */ - setup_timer(timer, ath10k_htt_rx_ring_refill_retry, (unsigned long)htt); + timer_setup(timer, ath10k_htt_rx_ring_refill_retry, 0); spin_lock_init(&htt->rx_ring.lock); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index d790ea20b95d..ffea348b2190 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -585,10 +585,10 @@ skip: spin_unlock_irqrestore(&ar_pci->ps_lock, flags); } -static void ath10k_pci_ps_timer(unsigned long ptr) +static void ath10k_pci_ps_timer(struct timer_list *t) { - struct ath10k *ar = (void *)ptr; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + struct ath10k_pci *ar_pci = from_timer(ar_pci, t, ps_timer); + struct ath10k *ar = ar_pci->ar; unsigned long flags; spin_lock_irqsave(&ar_pci->ps_lock, flags); @@ -838,9 +838,10 @@ void ath10k_pci_rx_post(struct ath10k *ar) ath10k_pci_rx_post_pipe(&ar_pci->pipe_info[i]); } -void ath10k_pci_rx_replenish_retry(unsigned long ptr) +void ath10k_pci_rx_replenish_retry(struct timer_list *t) { - struct ath10k *ar = (void *)ptr; + struct ath10k_pci *ar_pci = from_timer(ar_pci, t, rx_post_retry); + struct ath10k *ar = ar_pci->ar; ath10k_pci_rx_post(ar); } @@ -3164,8 +3165,7 @@ int ath10k_pci_setup_resource(struct ath10k *ar) spin_lock_init(&ce->ce_lock); spin_lock_init(&ar_pci->ps_lock); - setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, - (unsigned long)ar); + timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0); if (QCA_REV_6174(ar) || QCA_REV_9377(ar)) ath10k_pci_override_ce_config(ar); @@ -3291,8 +3291,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, ar->id.subsystem_vendor = pdev->subsystem_vendor; ar->id.subsystem_device = pdev->subsystem_device; - setup_timer(&ar_pci->ps_timer, ath10k_pci_ps_timer, - (unsigned long)ar); + timer_setup(&ar_pci->ps_timer, ath10k_pci_ps_timer, 0); ret = ath10k_pci_setup_resource(ar); if (ret) { diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 424ff323b2dc..08704fbc11e3 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -278,7 +278,7 @@ void ath10k_pci_hif_power_down(struct ath10k *ar); int ath10k_pci_alloc_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); -void ath10k_pci_rx_replenish_retry(unsigned long ptr); +void ath10k_pci_rx_replenish_retry(struct timer_list *t); void ath10k_pci_ce_deinit(struct ath10k *ar); void ath10k_pci_init_napi(struct ath10k *ar); int ath10k_pci_init_pipes(struct ath10k *ar); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 414b5b596efc..b53eb2b85f02 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3589,10 +3589,8 @@ static int ath6kl_cfg80211_vif_init(struct ath6kl_vif *vif) return -ENOMEM; } - setup_timer(&vif->disconnect_timer, disconnect_timer_handler, - (unsigned long) vif->ndev); - setup_timer(&vif->sched_scan_timer, ath6kl_wmi_sscan_timer, - (unsigned long) vif); + timer_setup(&vif->disconnect_timer, disconnect_timer_handler, 0); + timer_setup(&vif->sched_scan_timer, ath6kl_wmi_sscan_timer, 0); set_bit(WMM_ENABLED, &vif->flags); spin_lock_init(&vif->if_lock); diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h index 87e99c12d4ba..e23d450babd2 100644 --- a/drivers/net/wireless/ath/ath6kl/core.h +++ b/drivers/net/wireless/ath/ath6kl/core.h @@ -893,7 +893,7 @@ static inline u32 ath6kl_get_hi_item_addr(struct ath6kl *ar, int ath6kl_configure_target(struct ath6kl *ar); void ath6kl_detect_error(unsigned long ptr); -void disconnect_timer_handler(unsigned long ptr); +void disconnect_timer_handler(struct timer_list *t); void init_netdev(struct net_device *dev); void ath6kl_cookie_init(struct ath6kl *ar); void ath6kl_cookie_cleanup(struct ath6kl *ar); diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index b90c77ef792e..db95f85751e3 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -494,10 +494,9 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr, netif_wake_queue(vif->ndev); } -void disconnect_timer_handler(unsigned long ptr) +void disconnect_timer_handler(struct timer_list *t) { - struct net_device *dev = (struct net_device *)ptr; - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = from_timer(vif, t, disconnect_timer); ath6kl_init_profile_info(vif); ath6kl_disconnect(vif); diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c index 4e5cc2b7045a..1379906bf849 100644 --- a/drivers/net/wireless/ath/ath6kl/txrx.c +++ b/drivers/net/wireless/ath/ath6kl/txrx.c @@ -1620,10 +1620,10 @@ void ath6kl_rx(struct htc_target *target, struct htc_packet *packet) ath6kl_deliver_frames_to_nw_stack(vif->ndev, skb); } -static void aggr_timeout(unsigned long arg) +static void aggr_timeout(struct timer_list *t) { u8 i, j; - struct aggr_info_conn *aggr_conn = (struct aggr_info_conn *) arg; + struct aggr_info_conn *aggr_conn = from_timer(aggr_conn, t, timer); struct rxtid *rxtid; struct rxtid_stats *stats; @@ -1753,7 +1753,7 @@ void aggr_conn_init(struct ath6kl_vif *vif, struct aggr_info *aggr_info, aggr_conn->aggr_sz = AGGR_SZ_DEFAULT; aggr_conn->dev = vif->ndev; - setup_timer(&aggr_conn->timer, aggr_timeout, (unsigned long)aggr_conn); + timer_setup(&aggr_conn->timer, aggr_timeout, 0); aggr_conn->aggr_info = aggr_info; aggr_conn->timer_scheduled = false; diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index bfc20b45b806..777acc564ac9 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -1078,9 +1078,9 @@ static int ath6kl_wmi_tkip_micerr_event_rx(struct wmi *wmi, u8 *datap, int len, return 0; } -void ath6kl_wmi_sscan_timer(unsigned long ptr) +void ath6kl_wmi_sscan_timer(struct timer_list *t) { - struct ath6kl_vif *vif = (struct ath6kl_vif *) ptr; + struct ath6kl_vif *vif = from_timer(vif, t, sched_scan_timer); cfg80211_sched_scan_results(vif->ar->wiphy, 0); } diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h index 3af464a73b58..a60bb49fe920 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.h +++ b/drivers/net/wireless/ath/ath6kl/wmi.h @@ -2719,7 +2719,7 @@ int ath6kl_wmi_set_appie_cmd(struct wmi *wmi, u8 if_idx, u8 mgmt_frm_type, int ath6kl_wmi_set_inact_period(struct wmi *wmi, u8 if_idx, int inact_timeout); -void ath6kl_wmi_sscan_timer(unsigned long ptr); +void ath6kl_wmi_sscan_timer(struct timer_list *t); int ath6kl_wmi_get_challenge_resp_cmd(struct wmi *wmi, u32 cookie, u32 source); diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index cf076719c27e..ef0de4f1312c 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -750,14 +750,14 @@ void ath_reset_work(struct work_struct *work); bool ath_hw_check(struct ath_softc *sc); void ath_hw_pll_work(struct work_struct *work); void ath_paprd_calibrate(struct work_struct *work); -void ath_ani_calibrate(unsigned long data); +void ath_ani_calibrate(struct timer_list *t); void ath_start_ani(struct ath_softc *sc); void ath_stop_ani(struct ath_softc *sc); void ath_check_ani(struct ath_softc *sc); int ath_update_survey_stats(struct ath_softc *sc); void ath_update_survey_nf(struct ath_softc *sc, int channel); void ath9k_queue_reset(struct ath_softc *sc, enum ath_reset_type type); -void ath_ps_full_sleep(unsigned long data); +void ath_ps_full_sleep(struct timer_list *t); void __ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop, bool sw_pending, bool timeout_override); diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index fad020aa222e..dfb26f03c1a2 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1043,9 +1043,9 @@ static void ath_scan_channel_start(struct ath_softc *sc) mod_timer(&sc->offchannel.timer, jiffies + sc->offchannel.duration); } -static void ath_chanctx_timer(unsigned long data) +static void ath_chanctx_timer(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *) data; + struct ath_softc *sc = from_timer(sc, t, sched.timer); struct ath_common *common = ath9k_hw_common(sc->sc_ah); ath_dbg(common, CHAN_CTX, @@ -1054,9 +1054,9 @@ static void ath_chanctx_timer(unsigned long data) ath_chanctx_event(sc, NULL, ATH_CHANCTX_EVENT_TSF_TIMER); } -static void ath_offchannel_timer(unsigned long data) +static void ath_offchannel_timer(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *)data; + struct ath_softc *sc = from_timer(sc, t, offchannel.timer); struct ath_chanctx *ctx; struct ath_common *common = ath9k_hw_common(sc->sc_ah); @@ -1362,10 +1362,8 @@ void ath9k_init_channel_context(struct ath_softc *sc) { INIT_WORK(&sc->chanctx_work, ath_chanctx_work); - setup_timer(&sc->offchannel.timer, ath_offchannel_timer, - (unsigned long)sc); - setup_timer(&sc->sched.timer, ath_chanctx_timer, - (unsigned long)sc); + timer_setup(&sc->offchannel.timer, ath_offchannel_timer, 0); + timer_setup(&sc->sched.timer, ath_chanctx_timer, 0); init_completion(&sc->go_beacon); } diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c index ddb28861e7fe..b457e52dd365 100644 --- a/drivers/net/wireless/ath/ath9k/gpio.c +++ b/drivers/net/wireless/ath/ath9k/gpio.c @@ -191,9 +191,9 @@ static void ath_mci_ftp_adjust(struct ath_softc *sc) * 45ms, bt traffic will be given priority during 55% of this * period while wlan gets remaining 45% */ -static void ath_btcoex_period_timer(unsigned long data) +static void ath_btcoex_period_timer(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *) data; + struct ath_softc *sc = from_timer(sc, t, btcoex.period_timer); struct ath_hw *ah = sc->sc_ah; struct ath_btcoex *btcoex = &sc->btcoex; enum ath_stomp_type stomp_type; @@ -252,9 +252,9 @@ skip_hw_wakeup: * Generic tsf based hw timer which configures weight * registers to time slice between wlan and bt traffic */ -static void ath_btcoex_no_stomp_timer(unsigned long arg) +static void ath_btcoex_no_stomp_timer(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *)arg; + struct ath_softc *sc = from_timer(sc, t, btcoex.no_stomp_timer); struct ath_hw *ah = sc->sc_ah; struct ath_btcoex *btcoex = &sc->btcoex; @@ -284,10 +284,8 @@ static void ath_init_btcoex_timer(struct ath_softc *sc) btcoex->btcoex_period / 100; btcoex->bt_stomp_type = ATH_BTCOEX_STOMP_LOW; - setup_timer(&btcoex->period_timer, ath_btcoex_period_timer, - (unsigned long) sc); - setup_timer(&btcoex->no_stomp_timer, ath_btcoex_no_stomp_timer, - (unsigned long) sc); + timer_setup(&btcoex->period_timer, ath_btcoex_period_timer, 0); + timer_setup(&btcoex->no_stomp_timer, ath_btcoex_no_stomp_timer, 0); spin_lock_init(&btcoex->btcoex_lock); } diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 16dff4b89a86..9f64e32381f9 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -584,7 +584,7 @@ void ath9k_htc_tx_clear_slot(struct ath9k_htc_priv *priv, int slot); void ath9k_htc_tx_drain(struct ath9k_htc_priv *priv); void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event); void ath9k_tx_failed_tasklet(unsigned long data); -void ath9k_htc_tx_cleanup_timer(unsigned long data); +void ath9k_htc_tx_cleanup_timer(struct timer_list *t); bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv); int ath9k_rx_init(struct ath9k_htc_priv *priv); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 57de6fa6ff03..e89e5ef2c2a4 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -654,8 +654,7 @@ static int ath9k_init_priv(struct ath9k_htc_priv *priv, INIT_DELAYED_WORK(&priv->ani_work, ath9k_htc_ani_work); INIT_WORK(&priv->ps_work, ath9k_ps_work); INIT_WORK(&priv->fatal_work, ath9k_fatal_work); - setup_timer(&priv->tx.cleanup_timer, ath9k_htc_tx_cleanup_timer, - (unsigned long)priv); + timer_setup(&priv->tx.cleanup_timer, ath9k_htc_tx_cleanup_timer, 0); /* * Cache line size is used to size and align various diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 2682da02da54..585736a837ed 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -752,9 +752,9 @@ static void ath9k_htc_tx_cleanup_queue(struct ath9k_htc_priv *priv, } } -void ath9k_htc_tx_cleanup_timer(unsigned long data) +void ath9k_htc_tx_cleanup_timer(struct timer_list *t) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) data; + struct ath9k_htc_priv *priv = from_timer(priv, t, tx.cleanup_timer); struct ath_common *common = ath9k_hw_common(priv->ah); struct ath9k_htc_tx_event *event, *tmp; struct sk_buff *skb; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 4bc403620cf9..fa58a32227f5 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -369,7 +369,7 @@ static void ath9k_init_misc(struct ath_softc *sc) struct ath_common *common = ath9k_hw_common(sc->sc_ah); int i = 0; - setup_timer(&common->ani.timer, ath_ani_calibrate, (unsigned long)sc); + timer_setup(&common->ani.timer, ath_ani_calibrate, 0); common->last_rssi = ATH_RSSI_DUMMY_MARKER; memcpy(common->bssidmask, ath_bcast_mac, ETH_ALEN); @@ -678,7 +678,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, tasklet_init(&sc->bcon_tasklet, ath9k_beacon_tasklet, (unsigned long)sc); - setup_timer(&sc->sleep_timer, ath_ps_full_sleep, (unsigned long)sc); + timer_setup(&sc->sleep_timer, ath_ps_full_sleep, 0); INIT_WORK(&sc->hw_reset_work, ath_reset_work); INIT_WORK(&sc->paprd_work, ath_paprd_calibrate); INIT_DELAYED_WORK(&sc->hw_pll_work, ath_hw_pll_work); diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index 3f4f01c829f0..9d84003db800 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -301,11 +301,11 @@ fail_paprd: * When the task is complete, it reschedules itself depending on the * appropriate interval that was calculated. */ -void ath_ani_calibrate(unsigned long data) +void ath_ani_calibrate(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *)data; + struct ath_common *common = from_timer(common, t, ani.timer); + struct ath_softc *sc = (struct ath_softc *)common->priv; struct ath_hw *ah = sc->sc_ah; - struct ath_common *common = ath9k_hw_common(ah); bool longcal = false; bool shortcal = false; bool aniflag = false; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 918773a9231b..a3be8add56e1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -93,9 +93,9 @@ static bool ath9k_setpower(struct ath_softc *sc, enum ath9k_power_mode mode) return ret; } -void ath_ps_full_sleep(unsigned long data) +void ath_ps_full_sleep(struct timer_list *t) { - struct ath_softc *sc = (struct ath_softc *) data; + struct ath_softc *sc = from_timer(sc, t, sleep_timer); struct ath_common *common = ath9k_hw_common(sc->sc_ah); unsigned long flags; bool reset; diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index bac829aa950d..885924abf61c 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -336,9 +336,9 @@ static void wil_disconnect_worker(struct work_struct *work) clear_bit(wil_status_fwconnecting, wil->status); } -static void wil_connect_timer_fn(ulong x) +static void wil_connect_timer_fn(struct timer_list *t) { - struct wil6210_priv *wil = (void *)x; + struct wil6210_priv *wil = from_timer(wil, t, connect_timer); bool q; wil_err(wil, "Connect timeout detected, disconnect station\n"); @@ -351,9 +351,9 @@ static void wil_connect_timer_fn(ulong x) wil_dbg_wmi(wil, "queue_work of disconnect_worker -> %d\n", q); } -static void wil_scan_timer_fn(ulong x) +static void wil_scan_timer_fn(struct timer_list *t) { - struct wil6210_priv *wil = (void *)x; + struct wil6210_priv *wil = from_timer(wil, t, scan_timer); clear_bit(wil_status_fwready, wil->status); wil_err(wil, "Scan timeout detected, start fw error recovery\n"); @@ -540,10 +540,9 @@ int wil_priv_init(struct wil6210_priv *wil) init_completion(&wil->halp.comp); wil->bcast_vring = -1; - setup_timer(&wil->connect_timer, wil_connect_timer_fn, (ulong)wil); - setup_timer(&wil->scan_timer, wil_scan_timer_fn, (ulong)wil); - setup_timer(&wil->p2p.discovery_timer, wil_p2p_discovery_timer_fn, - (ulong)wil); + timer_setup(&wil->connect_timer, wil_connect_timer_fn, 0); + timer_setup(&wil->scan_timer, wil_scan_timer_fn, 0); + timer_setup(&wil->p2p.discovery_timer, wil_p2p_discovery_timer_fn, 0); INIT_WORK(&wil->disconnect_worker, wil_disconnect_worker); INIT_WORK(&wil->wmi_event_worker, wmi_event_worker); diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c index 792484756654..7dbee2c3e482 100644 --- a/drivers/net/wireless/ath/wil6210/p2p.c +++ b/drivers/net/wireless/ath/wil6210/p2p.c @@ -65,9 +65,9 @@ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request) (request->channels[0]->hw_value == P2P_DMG_SOCIAL_CHANNEL); } -void wil_p2p_discovery_timer_fn(ulong x) +void wil_p2p_discovery_timer_fn(struct timer_list *t) { - struct wil6210_priv *wil = (void *)x; + struct wil6210_priv *wil = from_timer(wil, t, p2p.discovery_timer); wil_dbg_misc(wil, "p2p_discovery_timer_fn\n"); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 315ec8b59662..1e340d04bd70 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -918,7 +918,7 @@ void wil6210_mask_halp(struct wil6210_priv *wil); /* P2P */ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request); -void wil_p2p_discovery_timer_fn(ulong x); +void wil_p2p_discovery_timer_fn(struct timer_list *t); int wil_p2p_search(struct wil6210_priv *wil, struct cfg80211_scan_request *request); int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev, From 9deef43ddfb1dfa5f42e4066610cfe606212b0a1 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Thu, 26 Oct 2017 10:55:32 +0200 Subject: [PATCH 1478/2177] mlxsw: spectrum: Change stats cache to be local Change the HW stats cache to be local. Rename it for better clarity. It holds the results of the last result of HW stats that are being read periodically, in order to have answer for stats request immediately. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 24 ++++++------------- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 ++-- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 12b6ac487d8d..021926974da6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1328,16 +1328,16 @@ static void update_stats_cache(struct work_struct *work) { struct mlxsw_sp_port *mlxsw_sp_port = container_of(work, struct mlxsw_sp_port, - hw_stats.update_dw.work); + periodic_hw_stats.update_dw.work); if (!netif_carrier_ok(mlxsw_sp_port->dev)) goto out; mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, - mlxsw_sp_port->hw_stats.cache); + &mlxsw_sp_port->periodic_hw_stats.stats); out: - mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, MLXSW_HW_STATS_UPDATE_TIME); } @@ -1350,7 +1350,7 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + memcpy(stats, &mlxsw_sp_port->periodic_hw_stats.stats, sizeof(*stats)); } static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2905,14 +2905,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_sample; } - mlxsw_sp_port->hw_stats.cache = - kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); - - if (!mlxsw_sp_port->hw_stats.cache) { - err = -ENOMEM; - goto err_alloc_hw_stats; - } - INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw, &update_stats_cache); dev->netdev_ops = &mlxsw_sp_port_netdev_ops; @@ -3026,7 +3019,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, mlxsw_sp_port, dev, mlxsw_sp_port->split, module); - mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0); return 0; err_register_netdev: @@ -3049,8 +3042,6 @@ err_dev_addr_init: err_port_swid_set: mlxsw_sp_port_module_unmap(mlxsw_sp_port); err_port_module_map: - kfree(mlxsw_sp_port->hw_stats.cache); -err_alloc_hw_stats: kfree(mlxsw_sp_port->sample); err_alloc_sample: free_percpu(mlxsw_sp_port->pcpu_stats); @@ -3065,7 +3056,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -3075,7 +3066,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp_port); - kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->sample); free_percpu(mlxsw_sp_port->pcpu_stats); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index dc1b739c3ae1..aa0cefb25e18 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -231,9 +231,9 @@ struct mlxsw_sp_port { struct list_head mall_tc_list; struct { #define MLXSW_HW_STATS_UPDATE_TIME HZ - struct rtnl_link_stats64 *cache; + struct rtnl_link_stats64 stats; struct delayed_work update_dw; - } hw_stats; + } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; }; From 3e8c1fd31840bd84c31f08e69aa9c338b2856047 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Thu, 26 Oct 2017 10:55:33 +0200 Subject: [PATCH 1479/2177] mlxsw: reg: Avoid magic number in PPCNT Replace recurring magic number in PPCNT register with a define. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 72 +++++++++++++---------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index a3f31f425550..5a26702267bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3123,6 +3123,7 @@ static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) */ #define MLXSW_REG_PPCNT_ID 0x5008 #define MLXSW_REG_PPCNT_LEN 0x100 +#define MLXSW_REG_PPCNT_COUNTERS_OFFSET 0x08 MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN); @@ -3200,162 +3201,171 @@ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok, - 0x08 + 0x00, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_a_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok, - 0x08 + 0x08, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); /* reg_ppcnt_a_frame_check_sequence_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors, - 0x08 + 0x10, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); /* reg_ppcnt_a_alignment_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_alignment_errors, - 0x08 + 0x18, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64); /* reg_ppcnt_a_octets_transmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok, - 0x08 + 0x20, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); /* reg_ppcnt_a_octets_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok, - 0x08 + 0x28, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); /* reg_ppcnt_a_multicast_frames_xmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok, - 0x08 + 0x30, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); /* reg_ppcnt_a_broadcast_frames_xmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok, - 0x08 + 0x38, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); /* reg_ppcnt_a_multicast_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok, - 0x08 + 0x40, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); /* reg_ppcnt_a_broadcast_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok, - 0x08 + 0x48, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); /* reg_ppcnt_a_in_range_length_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors, - 0x08 + 0x50, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); /* reg_ppcnt_a_out_of_range_length_field * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field, - 0x08 + 0x58, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); /* reg_ppcnt_a_frame_too_long_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors, - 0x08 + 0x60, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); /* reg_ppcnt_a_symbol_error_during_carrier * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier, - 0x08 + 0x68, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); /* reg_ppcnt_a_mac_control_frames_transmitted * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted, - 0x08 + 0x70, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); /* reg_ppcnt_a_mac_control_frames_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received, - 0x08 + 0x78, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64); /* reg_ppcnt_a_unsupported_opcodes_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received, - 0x08 + 0x80, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64); /* reg_ppcnt_a_pause_mac_ctrl_frames_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, - 0x08 + 0x88, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64); /* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, - 0x08 + 0x90, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); /* Ethernet Per Priority Group Counters */ /* reg_ppcnt_rx_octets * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_rx_frames * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); /* reg_ppcnt_tx_octets * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); /* reg_ppcnt_tx_frames * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); /* reg_ppcnt_rx_pause * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); /* reg_ppcnt_rx_pause_duration * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); /* reg_ppcnt_tx_pause * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); /* reg_ppcnt_tx_pause_duration * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); /* reg_ppcnt_rx_pause_transition * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); /* Ethernet Per Traffic Group Counters */ @@ -3365,14 +3375,16 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); * The field cannot be cleared. * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_tc_no_buffer_discard_uc * The number of unicast packets dropped due to lack of shared * buffer resources. * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, From 356c3e9afac0cc19c3d3b0cbc67106ce8efa0743 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 26 Oct 2017 11:00:48 +0200 Subject: [PATCH 1480/2177] net: dsa: lan9303: Move struct lan9303 to include/linux/dsa/lan9303.h The next patch require net/dsa/tag_lan9303.c to access struct lan9303. Therefore move struct lan9303 definitions from drivers/net/dsa/lan9303.h to new file include/linux/dsa/lan9303.h. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 1 + drivers/net/dsa/lan9303.h | 34 +--------------------------------- include/linux/dsa/lan9303.h | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 33 deletions(-) create mode 100644 include/linux/dsa/lan9303.h diff --git a/MAINTAINERS b/MAINTAINERS index e3a7ca9d2783..c9ee7abf4627 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9415,6 +9415,7 @@ M: Florian Fainelli S: Maintained F: net/dsa/ F: include/net/dsa.h +F: include/linux/dsa/ F: drivers/net/dsa/ NETWORKING [GENERAL] diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index d807b1be35f2..b868e5040830 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -2,39 +2,7 @@ #include #include -struct lan9303; - -struct lan9303_phy_ops { - /* PHY 1 and 2 access*/ - int (*phy_read)(struct lan9303 *chip, int port, int regnum); - int (*phy_write)(struct lan9303 *chip, int port, - int regnum, u16 val); -}; - -#define LAN9303_NUM_ALR_RECORDS 512 -struct lan9303_alr_cache_entry { - u8 mac_addr[ETH_ALEN]; - u8 port_map; /* Bitmap of ports. Zero if unused entry */ - u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ -}; - -struct lan9303 { - struct device *dev; - struct regmap *regmap; - struct regmap_irq_chip_data *irq_data; - struct gpio_desc *reset_gpio; - u32 reset_duration; /* in [ms] */ - bool phy_addr_sel_strap; - struct dsa_switch *ds; - struct mutex indirect_mutex; /* protect indexed register access */ - const struct lan9303_phy_ops *ops; - bool is_bridged; /* true if port 1 and 2 are bridged */ - u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ - /* LAN9303 do not offer reading specific ALR entry. Cache all - * static entries in a flat table - **/ - struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; -}; +#include extern const struct regmap_access_table lan9303_register_set; extern const struct lan9303_phy_ops lan9303_indirect_phy_ops; diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h new file mode 100644 index 000000000000..05d8d136baab --- /dev/null +++ b/include/linux/dsa/lan9303.h @@ -0,0 +1,36 @@ +/* Included by drivers/net/dsa/lan9303.h and net/dsa/tag_lan9303.c */ +#include + +struct lan9303; + +struct lan9303_phy_ops { + /* PHY 1 and 2 access*/ + int (*phy_read)(struct lan9303 *chip, int port, int regnum); + int (*phy_write)(struct lan9303 *chip, int port, + int regnum, u16 val); +}; + +#define LAN9303_NUM_ALR_RECORDS 512 +struct lan9303_alr_cache_entry { + u8 mac_addr[ETH_ALEN]; + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ +}; + +struct lan9303 { + struct device *dev; + struct regmap *regmap; + struct regmap_irq_chip_data *irq_data; + struct gpio_desc *reset_gpio; + u32 reset_duration; /* in [ms] */ + bool phy_addr_sel_strap; + struct dsa_switch *ds; + struct mutex indirect_mutex; /* protect indexed register access */ + const struct lan9303_phy_ops *ops; + bool is_bridged; /* true if port 1 and 2 are bridged */ + u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + /* LAN9303 do not offer reading specific ALR entry. Cache all + * static entries in a flat table + **/ + struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; +}; From 535f010d4ba798214bbd88ce18a95cd99cd4a8cb Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 26 Oct 2017 11:00:49 +0200 Subject: [PATCH 1481/2177] net: dsa: lan9303: Learn addresses on CPU port when bridged When CPU transmit directly to port using tag, the LAN9303 does not learn MAC addresses received on the CPU port into the ALR. ALR learning is performed only when transmitting using ALR lookup. Solution: If the two external ports are bridged and the packet is not STP BPDU, then use ALR lookup to allow ALR learning on CPU port. Otherwise transmit directly to port with STP state override. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 57519597c6fc..64092325aac3 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -11,6 +11,7 @@ * GNU General Public License for more details. * */ +#include #include #include #include @@ -39,6 +40,23 @@ */ #define LAN9303_TAG_LEN 4 +# define LAN9303_TAG_TX_USE_ALR BIT(3) +# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4) +#define eth_stp_addr eth_reserved_addr_base + +/* Decide whether to transmit using ALR lookup, or transmit directly to + * port using tag. ALR learning is performed only when using ALR lookup. + * If the two external ports are bridged and the packet is not STP BPDU, + * then use ALR lookup to allow ALR learning on CPU port. + * Otherwise transmit directly to port with STP state override. + * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1 + */ +static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) +{ + struct lan9303 *chip = dp->ds->priv; + + return chip->is_bridged && !ether_addr_equal(dest_addr, eth_stp_addr); +} static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -62,7 +80,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); lan9303_tag[0] = htons(ETH_P_8021Q); - lan9303_tag[1] = htons(dp->index | BIT(4)); + lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ? + LAN9303_TAG_TX_USE_ALR : + dp->index | LAN9303_TAG_TX_STP_OVERRIDE; + lan9303_tag[1] = htons(lan9303_tag[1]); return skb; } From 27887bc7cb7fc5a0a3b8f4b0f27b332c8121515b Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:33 +0530 Subject: [PATCH 1482/2177] cxgb4: collect hardware LA dumps Collect CIM, CIM_MA, ULP_RX, TP, CIM_PIF, and ULP_TX logic analyzer dumps. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 26 +++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 6 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 164 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 18 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 32 ++++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 4 + 6 files changed, 250 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index d7f3392f618f..50540a6379a4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -45,6 +45,32 @@ struct ireg_buf { u32 outbuf[32]; }; +struct cudbg_ulprx_la { + u32 data[ULPRX_LA_SIZE * 8]; + u32 size; +}; + +struct cudbg_tp_la { + u32 size; + u32 mode; + u8 data[0]; +}; + +struct cudbg_cim_pif_la { + int size; + u8 data[0]; +}; + +#define CUDBG_NUM_ULPTX 11 +#define CUDBG_NUM_ULPTX_READ 512 + +struct cudbg_ulptx_la { + u32 rdptr[CUDBG_NUM_ULPTX]; + u32 wrptr[CUDBG_NUM_ULPTX]; + u32 rddata[CUDBG_NUM_ULPTX]; + u32 rd_data[CUDBG_NUM_ULPTX][CUDBG_NUM_ULPTX_READ]; +}; + #define IREG_NUM_ELEM 4 static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 9b8005e67811..f65db1b89fdc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -29,6 +29,8 @@ enum cudbg_dbg_entity_type { CUDBG_REG_DUMP = 1, CUDBG_DEV_LOG = 2, + CUDBG_CIM_LA = 3, + CUDBG_CIM_MA_LA = 4, CUDBG_CIM_IBQ_TP0 = 6, CUDBG_CIM_IBQ_TP1 = 7, CUDBG_CIM_IBQ_ULP = 8, @@ -45,11 +47,15 @@ enum cudbg_dbg_entity_type { CUDBG_EDC1 = 19, CUDBG_TP_INDIRECT = 36, CUDBG_SGE_INDIRECT = 37, + CUDBG_ULPRX_LA = 41, + CUDBG_TP_LA = 43, + CUDBG_CIM_PIF_LA = 45, CUDBG_CIM_OBQ_RXQ0 = 47, CUDBG_CIM_OBQ_RXQ1 = 48, CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, CUDBG_MA_INDIRECT = 61, + CUDBG_ULPTX_LA = 62, CUDBG_UP_CIM_INDIRECT = 64, CUDBG_MBOX_LOG = 66, CUDBG_HMA_INDIRECT = 67, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 19da54f83e52..8b5a12b19844 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -129,6 +129,69 @@ int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + u32 cfg = 0; + + if (is_t6(padap->params.chip)) { + size = padap->params.cim_la_size / 10 + 1; + size *= 11 * sizeof(u32); + } else { + size = padap->params.cim_la_size / 8; + size *= 8 * sizeof(u32); + } + + size += sizeof(cfg); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + rc = t4_cim_read(padap, UP_UP_DBG_LA_CFG_A, 1, &cfg); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + memcpy((char *)temp_buff.data, &cfg, sizeof(cfg)); + rc = t4_cim_read_la(padap, + (u32 *)((char *)temp_buff.data + sizeof(cfg)), + NULL); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + + size = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + t4_cim_read_ma_la(padap, + (u32 *)temp_buff.data, + (u32 *)((char *)temp_buff.data + + 5 * CIM_MALA_SIZE)); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err, int qid) @@ -574,6 +637,72 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ulprx_la *ulprx_la_buff; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulprx_la), + &temp_buff); + if (rc) + return rc; + + ulprx_la_buff = (struct cudbg_ulprx_la *)temp_buff.data; + t4_ulprx_read_la(padap, (u32 *)ulprx_la_buff->data); + ulprx_la_buff->size = ULPRX_LA_SIZE; + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tp_la *tp_la_buff; + int size, rc; + + size = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + tp_la_buff = (struct cudbg_tp_la *)temp_buff.data; + tp_la_buff->mode = DBGLAMODE_G(t4_read_reg(padap, TP_DBG_LA_CONFIG_A)); + t4_tp_read_la(padap, (u64 *)tp_la_buff->data, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct cudbg_cim_pif_la *cim_pif_la_buff; + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + + size = sizeof(struct cudbg_cim_pif_la) + + 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + cim_pif_la_buff = (struct cudbg_cim_pif_la *)temp_buff.data; + cim_pif_la_buff->size = CIM_PIFLA_SIZE; + t4_cim_read_pif_la(padap, (u32 *)cim_pif_la_buff->data, + (u32 *)cim_pif_la_buff->data + 6 * CIM_PIFLA_SIZE, + NULL, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) @@ -743,6 +872,41 @@ int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ulptx_la *ulptx_la_buff; + u32 i, j; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulptx_la), + &temp_buff); + if (rc) + return rc; + + ulptx_la_buff = (struct cudbg_ulptx_la *)temp_buff.data; + for (i = 0; i < CUDBG_NUM_ULPTX; i++) { + ulptx_la_buff->rdptr[i] = t4_read_reg(padap, + ULP_TX_LA_RDPTR_0_A + + 0x10 * i); + ulptx_la_buff->wrptr[i] = t4_read_reg(padap, + ULP_TX_LA_WRPTR_0_A + + 0x10 * i); + ulptx_la_buff->rddata[i] = t4_read_reg(padap, + ULP_TX_LA_RDDATA_0_A + + 0x10 * i); + for (j = 0; j < CUDBG_NUM_ULPTX_READ; j++) + ulptx_la_buff->rd_data[i][j] = + t4_read_reg(padap, + ULP_TX_LA_RDDATA_0_A + 0x10 * i); + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index df24c409c82f..ad6eff3c33c3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -24,6 +24,12 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -72,6 +78,15 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -87,6 +102,9 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 59740ac7e46e..8bc1b1decf30 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -29,6 +29,8 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_MBOX_LOG, cudbg_collect_mbox_log }, { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, + { CUDBG_CIM_LA, cudbg_collect_cim_la }, + { CUDBG_CIM_MA_LA, cudbg_collect_cim_ma_la }, { CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 }, { CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 }, { CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp }, @@ -43,11 +45,15 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, + { CUDBG_ULPRX_LA, cudbg_collect_ulprx_la }, + { CUDBG_TP_LA, cudbg_collect_tp_la }, + { CUDBG_CIM_PIF_LA, cudbg_collect_cim_pif_la }, { CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 }, { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 }, { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, + { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, { CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect }, }; @@ -73,6 +79,19 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_DEV_LOG: len = adap->params.devlog.size; break; + case CUDBG_CIM_LA: + if (is_t6(adap->params.chip)) { + len = adap->params.cim_la_size / 10 + 1; + len *= 11 * sizeof(u32); + } else { + len = adap->params.cim_la_size / 8; + len *= 8 * sizeof(u32); + } + len += sizeof(u32); /* for reading CIM LA configuration */ + break; + case CUDBG_CIM_MA_LA: + len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); + break; case CUDBG_CIM_IBQ_TP0: case CUDBG_CIM_IBQ_TP1: case CUDBG_CIM_IBQ_ULP: @@ -142,6 +161,16 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_SGE_INDIRECT: len = sizeof(struct ireg_buf) * 2; break; + case CUDBG_ULPRX_LA: + len = sizeof(struct cudbg_ulprx_la); + break; + case CUDBG_TP_LA: + len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64); + break; + case CUDBG_CIM_PIF_LA: + len = sizeof(struct cudbg_cim_pif_la); + len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); + break; case CUDBG_PCIE_INDIRECT: n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n * 2; @@ -157,6 +186,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = sizeof(struct ireg_buf) * n * 2; } break; + case CUDBG_ULPTX_LA: + len = sizeof(struct cudbg_ulptx_la); + break; case CUDBG_UP_CIM_INDIRECT: n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 82614e078f50..b5cd9a5ad808 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1629,6 +1629,10 @@ #define IESPI_PAR_ERROR_V(x) ((x) << IESPI_PAR_ERROR_S) #define IESPI_PAR_ERROR_F IESPI_PAR_ERROR_V(1U) +#define ULP_TX_LA_RDPTR_0_A 0x8ec0 +#define ULP_TX_LA_RDDATA_0_A 0x8ec4 +#define ULP_TX_LA_WRPTR_0_A 0x8ec8 + #define PMRX_E_PCMD_PAR_ERROR_S 0 #define PMRX_E_PCMD_PAR_ERROR_V(x) ((x) << PMRX_E_PCMD_PAR_ERROR_S) #define PMRX_E_PCMD_PAR_ERROR_F PMRX_E_PCMD_PAR_ERROR_V(1U) From 3044d0fb016ecd953724c966bede8c8626f32bd5 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:34 +0530 Subject: [PATCH 1483/2177] cxgb4: collect CIM queue configuration dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 9 +++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 39 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 4 ++ 5 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 50540a6379a4..ab15c3dfa04e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -33,6 +33,15 @@ struct cudbg_mbox_log { u32 lo[MBOX_LEN / 8]; }; +struct cudbg_cim_qcfg { + u8 chip; + u16 base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; + u16 size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; + u16 thres[CIM_NUM_IBQ]; + u32 obq_wr[2 * CIM_NUM_OBQ_T5]; + u32 stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)]; +}; + struct ireg_field { u32 ireg_addr; u32 ireg_data; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index f65db1b89fdc..be031aba2706 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -31,6 +31,7 @@ enum cudbg_dbg_entity_type { CUDBG_DEV_LOG = 2, CUDBG_CIM_LA = 3, CUDBG_CIM_MA_LA = 4, + CUDBG_CIM_QCFG = 5, CUDBG_CIM_IBQ_TP0 = 6, CUDBG_CIM_IBQ_TP1 = 7, CUDBG_CIM_IBQ_ULP = 8, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 8b5a12b19844..596f2b8e41cf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -192,6 +192,45 @@ int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_cim_qcfg *cim_qcfg_data; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_cim_qcfg), + &temp_buff); + if (rc) + return rc; + + cim_qcfg_data = (struct cudbg_cim_qcfg *)temp_buff.data; + cim_qcfg_data->chip = padap->params.chip; + rc = t4_cim_read(padap, UP_IBQ_0_RDADDR_A, + ARRAY_SIZE(cim_qcfg_data->stat), cim_qcfg_data->stat); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + rc = t4_cim_read(padap, UP_OBQ_0_REALADDR_A, + ARRAY_SIZE(cim_qcfg_data->obq_wr), + cim_qcfg_data->obq_wr); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + t4_read_cimq_cfg(padap, cim_qcfg_data->base, cim_qcfg_data->size, + cim_qcfg_data->thres); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err, int qid) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index ad6eff3c33c3..f42b7420ff09 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -30,6 +30,9 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 8bc1b1decf30..611ece7b7e5a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -31,6 +31,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, { CUDBG_CIM_LA, cudbg_collect_cim_la }, { CUDBG_CIM_MA_LA, cudbg_collect_cim_ma_la }, + { CUDBG_CIM_QCFG, cudbg_collect_cim_qcfg }, { CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 }, { CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 }, { CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp }, @@ -92,6 +93,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_CIM_MA_LA: len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); break; + case CUDBG_CIM_QCFG: + len = sizeof(struct cudbg_cim_qcfg); + break; case CUDBG_CIM_IBQ_TP0: case CUDBG_CIM_IBQ_TP1: case CUDBG_CIM_IBQ_ULP: From 28b445561fbac2e3c9886231b0a414336878e20f Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:35 +0530 Subject: [PATCH 1484/2177] cxgb4: collect RSS dumps Collect RSS table and RSS VF configuration dumps. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 5 ++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 2 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 46 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 6 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 9 ++++ 5 files changed, 68 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index ab15c3dfa04e..b24175b2bb3c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -42,6 +42,11 @@ struct cudbg_cim_qcfg { u32 stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)]; }; +struct cudbg_rss_vf_conf { + u32 rss_vf_vfl; + u32 rss_vf_vfh; +}; + struct ireg_field { u32 ireg_addr; u32 ireg_data; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index be031aba2706..34f241892fca 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -46,6 +46,8 @@ enum cudbg_dbg_entity_type { CUDBG_CIM_OBQ_NCSI = 17, CUDBG_EDC0 = 18, CUDBG_EDC1 = 19, + CUDBG_RSS = 22, + CUDBG_RSS_VF_CONF = 25, CUDBG_TP_INDIRECT = 36, CUDBG_SGE_INDIRECT = 37, CUDBG_ULPRX_LA = 41, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 596f2b8e41cf..efab94465337 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -528,6 +528,52 @@ int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, MEM_EDC1); } +int cudbg_collect_rss(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc; + + rc = cudbg_get_buff(dbg_buff, RSS_NENTRIES * sizeof(u16), &temp_buff); + if (rc) + return rc; + + rc = t4_read_rss(padap, (u16 *)temp_buff.data); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_rss_vf_conf *vfconf; + int vf, rc, vf_count; + + vf_count = padap->params.arch.vfcount; + rc = cudbg_get_buff(dbg_buff, + vf_count * sizeof(struct cudbg_rss_vf_conf), + &temp_buff); + if (rc) + return rc; + + vfconf = (struct cudbg_rss_vf_conf *)temp_buff.data; + for (vf = 0; vf < vf_count; vf++) + t4_read_rss_vf_config(padap, vf, &vfconf[vf].rss_vf_vfl, + &vfconf[vf].rss_vf_vfh, true); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index f42b7420ff09..634c643fdd39 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -75,6 +75,12 @@ int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_rss(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 611ece7b7e5a..7996220db485 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -44,6 +44,8 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_CIM_OBQ_ULP3, cudbg_collect_cim_obq_ulp3 }, { CUDBG_CIM_OBQ_SGE, cudbg_collect_cim_obq_sge }, { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, + { CUDBG_RSS, cudbg_collect_rss }, + { CUDBG_RSS_VF_CONF, cudbg_collect_rss_vf_config }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, { CUDBG_ULPRX_LA, cudbg_collect_ulprx_la }, @@ -144,6 +146,13 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) } len = cudbg_mbytes_to_bytes(len); break; + case CUDBG_RSS: + len = RSS_NENTRIES * sizeof(u16); + break; + case CUDBG_RSS_VF_CONF: + len = adap->params.arch.vfcount * + sizeof(struct cudbg_rss_vf_conf); + break; case CUDBG_TP_INDIRECT: switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { case CHELSIO_T5: From 9030e49897f57dea3126e35d97a33588c5307aa1 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:36 +0530 Subject: [PATCH 1485/2177] cxgb4: collect TID info dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 39 +++++++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 85 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 + .../ethernet/chelsio/cxgb4/cudbg_lib_common.h | 6 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 4 + drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 1 + drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 5 +- 8 files changed, 143 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index b24175b2bb3c..d023e3c5a029 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -21,6 +21,8 @@ #define EDC0_FLAG 3 #define EDC1_FLAG 4 +#define CUDBG_ENTITY_SIGNATURE 0xCCEDB001 + struct card_mem { u16 size_edc0; u16 size_edc1; @@ -75,6 +77,43 @@ struct cudbg_cim_pif_la { u8 data[0]; }; +struct cudbg_tid_info_region { + u32 ntids; + u32 nstids; + u32 stid_base; + u32 hash_base; + + u32 natids; + u32 nftids; + u32 ftid_base; + u32 aftid_base; + u32 aftid_end; + + u32 sftid_base; + u32 nsftids; + + u32 uotid_base; + u32 nuotids; + + u32 sb; + u32 flags; + u32 le_db_conf; + u32 ip_users; + u32 ipv6_users; + + u32 hpftid_base; + u32 nhpftids; +}; + +#define CUDBG_TID_INFO_REV 1 + +struct cudbg_tid_info_region_rev1 { + struct cudbg_ver_hdr ver_hdr; + struct cudbg_tid_info_region tid; + u32 tid_start; + u32 reserved[16]; +}; + #define CUDBG_NUM_ULPTX 11 #define CUDBG_NUM_ULPTX_READ 512 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 34f241892fca..655259f153f3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -57,6 +57,7 @@ enum cudbg_dbg_entity_type { CUDBG_CIM_OBQ_RXQ1 = 48, CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, + CUDBG_TID_INFO = 54, CUDBG_MA_INDIRECT = 61, CUDBG_ULPTX_LA = 62, CUDBG_UP_CIM_INDIRECT = 64, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index efab94465337..4697f113f9c6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -902,6 +902,91 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_tid(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_tid_info_region_rev1 *tid1; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tid_info_region *tid; + u32 para[2], val[2]; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_tid_info_region_rev1), + &temp_buff); + if (rc) + return rc; + + tid1 = (struct cudbg_tid_info_region_rev1 *)temp_buff.data; + tid = &tid1->tid; + tid1->ver_hdr.signature = CUDBG_ENTITY_SIGNATURE; + tid1->ver_hdr.revision = CUDBG_TID_INFO_REV; + tid1->ver_hdr.size = sizeof(struct cudbg_tid_info_region_rev1) - + sizeof(struct cudbg_ver_hdr); + +#define FW_PARAM_PFVF_A(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \ + FW_PARAMS_PARAM_Y_V(0) | \ + FW_PARAMS_PARAM_Z_V(0)) + + para[0] = FW_PARAM_PFVF_A(ETHOFLD_START); + para[1] = FW_PARAM_PFVF_A(ETHOFLD_END); + rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, para, val); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + tid->uotid_base = val[0]; + tid->nuotids = val[1] - val[0] + 1; + + if (is_t5(padap->params.chip)) { + tid->sb = t4_read_reg(padap, LE_DB_SERVER_INDEX_A) / 4; + } else if (is_t6(padap->params.chip)) { + tid1->tid_start = + t4_read_reg(padap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + tid->sb = t4_read_reg(padap, LE_DB_SRVR_START_INDEX_A); + + para[0] = FW_PARAM_PFVF_A(HPFILTER_START); + para[1] = FW_PARAM_PFVF_A(HPFILTER_END); + rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, + para, val); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + tid->hpftid_base = val[0]; + tid->nhpftids = val[1] - val[0] + 1; + } + + tid->ntids = padap->tids.ntids; + tid->nstids = padap->tids.nstids; + tid->stid_base = padap->tids.stid_base; + tid->hash_base = padap->tids.hash_base; + + tid->natids = padap->tids.natids; + tid->nftids = padap->tids.nftids; + tid->ftid_base = padap->tids.ftid_base; + tid->aftid_base = padap->tids.aftid_base; + tid->aftid_end = padap->tids.aftid_end; + + tid->sftid_base = padap->tids.sftid_base; + tid->nsftids = padap->tids.nsftids; + + tid->flags = padap->flags; + tid->le_db_conf = t4_read_reg(padap, LE_DB_CONFIG_A); + tid->ip_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV4_A); + tid->ipv6_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV6_A); + +#undef FW_PARAM_PFVF_A + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 634c643fdd39..f3bfd7cf4186 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -108,6 +108,9 @@ int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_tid(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h index b150c5d1f7c0..24b33f28e548 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -57,6 +57,12 @@ struct cudbg_entity_hdr { u32 reserved[5]; }; +struct cudbg_ver_hdr { + u32 signature; + u16 revision; + u16 size; +}; + struct cudbg_buffer { u32 size; u32 offset; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 7996220db485..c5371e2ecf3c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -55,6 +55,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 }, { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, + { CUDBG_TID_INFO, cudbg_collect_tid }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, @@ -192,6 +193,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n * 2; break; + case CUDBG_TID_INFO: + len = sizeof(struct cudbg_tid_info_region_rev1); + break; case CUDBG_MA_INDIRECT: if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { n = sizeof(t6_ma_ireg_array) / diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index b5cd9a5ad808..57a36a048313 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2856,6 +2856,7 @@ #define T6_LIPMISS_F T6_LIPMISS_V(1U) #define LE_DB_CONFIG_A 0x19c04 +#define LE_DB_ACTIVE_TABLE_START_INDEX_A 0x19c10 #define LE_DB_SERVER_INDEX_A 0x19c18 #define LE_DB_SRVR_START_INDEX_A 0x19c18 #define LE_DB_ACT_CNT_IPV4_A 0x19c20 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 875d4a72b3ef..2ba890926c73 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1244,9 +1244,12 @@ enum fw_params_param_pfvf { FW_PARAMS_PARAM_PFVF_EQ_END = 0x2C, FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D, FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E, + FW_PARAMS_PARAM_PFVF_ETHOFLD_START = 0x2F, FW_PARAMS_PARAM_PFVF_ETHOFLD_END = 0x30, FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31, - FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x32, + FW_PARAMS_PARAM_PFVF_HPFILTER_START = 0x32, + FW_PARAMS_PARAM_PFVF_HPFILTER_END = 0x33, + FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39, FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A, }; From b289593e1398480f5ac1a1df2dae479516a21372 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:37 +0530 Subject: [PATCH 1486/2177] cxgb4: collect MPS-TCAM dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 21 ++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 184 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 5 + drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 16 ++ 6 files changed, 230 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index d023e3c5a029..1860cf2b5286 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -114,6 +114,27 @@ struct cudbg_tid_info_region_rev1 { u32 reserved[16]; }; +#define CUDBG_MAX_RPLC_SIZE 128 + +struct cudbg_mps_tcam { + u64 mask; + u32 rplc[8]; + u32 idx; + u32 cls_lo; + u32 cls_hi; + u32 rplc_size; + u32 vniy; + u32 vnix; + u32 dip_hit; + u32 vlan_vld; + u32 repli; + u16 ivlan; + u8 addr[ETH_ALEN]; + u8 lookup_type; + u8 port_num; + u8 reserved[2]; +}; + #define CUDBG_NUM_ULPTX 11 #define CUDBG_NUM_ULPTX_READ 512 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 655259f153f3..ce7bb909b8fd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -58,6 +58,7 @@ enum cudbg_dbg_entity_type { CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, CUDBG_TID_INFO = 54, + CUDBG_MPS_TCAM = 57, CUDBG_MA_INDIRECT = 61, CUDBG_ULPTX_LA = 62, CUDBG_UP_CIM_INDIRECT = 64, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 4697f113f9c6..03f1ec0c0160 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -987,6 +987,190 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, return rc; } +static inline void cudbg_tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) +{ + *mask = x | y; + y = (__force u64)cpu_to_be64(y); + memcpy(addr, (char *)&y + 2, ETH_ALEN); +} + +static void cudbg_mps_rpl_backdoor(struct adapter *padap, + struct fw_ldst_mps_rplc *mps_rplc) +{ + if (is_t5(padap->params.chip)) { + mps_rplc->rplc255_224 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP3_A)); + mps_rplc->rplc223_192 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP2_A)); + mps_rplc->rplc191_160 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP1_A)); + mps_rplc->rplc159_128 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP0_A)); + } else { + mps_rplc->rplc255_224 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP7_A)); + mps_rplc->rplc223_192 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP6_A)); + mps_rplc->rplc191_160 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP5_A)); + mps_rplc->rplc159_128 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP4_A)); + } + mps_rplc->rplc127_96 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP3_A)); + mps_rplc->rplc95_64 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP2_A)); + mps_rplc->rplc63_32 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP1_A)); + mps_rplc->rplc31_0 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP0_A)); +} + +static int cudbg_collect_tcam_index(struct adapter *padap, + struct cudbg_mps_tcam *tcam, u32 idx) +{ + u64 tcamy, tcamx, val; + u32 ctl, data2; + int rc = 0; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) >= CHELSIO_T6) { + /* CtlReqID - 1: use Host Driver Requester ID + * CtlCmdType - 0: Read, 1: Write + * CtlTcamSel - 0: TCAM0, 1: TCAM1 + * CtlXYBitSel- 0: Y bit, 1: X bit + */ + + /* Read tcamy */ + ctl = CTLREQID_V(1) | CTLCMDTYPE_V(0) | CTLXYBITSEL_V(0); + if (idx < 256) + ctl |= CTLTCAMINDEX_V(idx) | CTLTCAMSEL_V(0); + else + ctl |= CTLTCAMINDEX_V(idx - 256) | CTLTCAMSEL_V(1); + + t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl); + val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A); + tcamy = DMACH_G(val) << 32; + tcamy |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A); + data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A); + tcam->lookup_type = DATALKPTYPE_G(data2); + + /* 0 - Outer header, 1 - Inner header + * [71:48] bit locations are overloaded for + * outer vs. inner lookup types. + */ + if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) { + /* Inner header VNI */ + tcam->vniy = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2); + tcam->vniy = (tcam->vniy << 16) | VIDL_G(val); + tcam->dip_hit = data2 & DATADIPHIT_F; + } else { + tcam->vlan_vld = data2 & DATAVIDH2_F; + tcam->ivlan = VIDL_G(val); + } + + tcam->port_num = DATAPORTNUM_G(data2); + + /* Read tcamx. Change the control param */ + ctl |= CTLXYBITSEL_V(1); + t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl); + val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A); + tcamx = DMACH_G(val) << 32; + tcamx |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A); + data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A); + if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) { + /* Inner header VNI mask */ + tcam->vnix = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2); + tcam->vnix = (tcam->vnix << 16) | VIDL_G(val); + } + } else { + tcamy = t4_read_reg64(padap, MPS_CLS_TCAM_Y_L(idx)); + tcamx = t4_read_reg64(padap, MPS_CLS_TCAM_X_L(idx)); + } + + /* If no entry, return */ + if (tcamx & tcamy) + return rc; + + tcam->cls_lo = t4_read_reg(padap, MPS_CLS_SRAM_L(idx)); + tcam->cls_hi = t4_read_reg(padap, MPS_CLS_SRAM_H(idx)); + + if (is_t5(padap->params.chip)) + tcam->repli = (tcam->cls_lo & REPLICATE_F); + else if (is_t6(padap->params.chip)) + tcam->repli = (tcam->cls_lo & T6_REPLICATE_F); + + if (tcam->repli) { + struct fw_ldst_cmd ldst_cmd; + struct fw_ldst_mps_rplc mps_rplc; + + memset(&ldst_cmd, 0, sizeof(ldst_cmd)); + ldst_cmd.op_to_addrspace = + htonl(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MPS)); + ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd)); + ldst_cmd.u.mps.rplc.fid_idx = + htons(FW_LDST_CMD_FID_V(FW_LDST_MPS_RPLC) | + FW_LDST_CMD_IDX_V(idx)); + + rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd, sizeof(ldst_cmd), + &ldst_cmd); + if (rc) + cudbg_mps_rpl_backdoor(padap, &mps_rplc); + else + mps_rplc = ldst_cmd.u.mps.rplc; + + tcam->rplc[0] = ntohl(mps_rplc.rplc31_0); + tcam->rplc[1] = ntohl(mps_rplc.rplc63_32); + tcam->rplc[2] = ntohl(mps_rplc.rplc95_64); + tcam->rplc[3] = ntohl(mps_rplc.rplc127_96); + if (padap->params.arch.mps_rplc_size > CUDBG_MAX_RPLC_SIZE) { + tcam->rplc[4] = ntohl(mps_rplc.rplc159_128); + tcam->rplc[5] = ntohl(mps_rplc.rplc191_160); + tcam->rplc[6] = ntohl(mps_rplc.rplc223_192); + tcam->rplc[7] = ntohl(mps_rplc.rplc255_224); + } + } + cudbg_tcamxy2valmask(tcamx, tcamy, tcam->addr, &tcam->mask); + tcam->idx = idx; + tcam->rplc_size = padap->params.arch.mps_rplc_size; + return rc; +} + +int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 size = 0, i, n, total_size = 0; + struct cudbg_mps_tcam *tcam; + int rc; + + n = padap->params.arch.mps_tcam_size; + size = sizeof(struct cudbg_mps_tcam) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + tcam = (struct cudbg_mps_tcam *)temp_buff.data; + for (i = 0; i < n; i++) { + rc = cudbg_collect_tcam_index(padap, tcam, i); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + total_size += sizeof(struct cudbg_mps_tcam); + tcam++; + } + + if (!total_size) { + rc = CUDBG_SYSTEM_ERROR; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index f3bfd7cf4186..280423f9869d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -111,6 +111,9 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_tid(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index c5371e2ecf3c..f1a1ece45b97 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -56,6 +56,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, { CUDBG_TID_INFO, cudbg_collect_tid }, + { CUDBG_MPS_TCAM, cudbg_collect_mps_tcam }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, @@ -196,6 +197,10 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_TID_INFO: len = sizeof(struct cudbg_tid_info_region_rev1); break; + case CUDBG_MPS_TCAM: + len = sizeof(struct cudbg_mps_tcam) * + adap->params.arch.mps_tcam_size; + break; case CUDBG_MA_INDIRECT: if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { n = sizeof(t6_ma_ireg_array) / diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 57a36a048313..775a591c0fba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2439,6 +2439,18 @@ #define MPS_CLS_TCAM_DATA0_A 0xf000 #define MPS_CLS_TCAM_DATA1_A 0xf004 +#define CTLREQID_S 30 +#define CTLREQID_V(x) ((x) << CTLREQID_S) + +#define MPS_VF_RPLCT_MAP0_A 0x1111c +#define MPS_VF_RPLCT_MAP1_A 0x11120 +#define MPS_VF_RPLCT_MAP2_A 0x11124 +#define MPS_VF_RPLCT_MAP3_A 0x11128 +#define MPS_VF_RPLCT_MAP4_A 0x11300 +#define MPS_VF_RPLCT_MAP5_A 0x11304 +#define MPS_VF_RPLCT_MAP6_A 0x11308 +#define MPS_VF_RPLCT_MAP7_A 0x1130c + #define VIDL_S 16 #define VIDL_M 0xffffU #define VIDL_G(x) (((x) >> VIDL_S) & VIDL_M) @@ -2463,6 +2475,10 @@ #define DATAVIDH1_M 0x7fU #define DATAVIDH1_G(x) (((x) >> DATAVIDH1_S) & DATAVIDH1_M) +#define MPS_CLS_TCAM_RDATA0_REQ_ID1_A 0xf020 +#define MPS_CLS_TCAM_RDATA1_REQ_ID1_A 0xf024 +#define MPS_CLS_TCAM_RDATA2_REQ_ID1_A 0xf028 + #define USED_S 16 #define USED_M 0x7ffU #define USED_G(x) (((x) >> USED_S) & USED_M) From db8cd7ce208a7e7d440856b5c3e4e96af6dd9917 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:38 +0530 Subject: [PATCH 1487/2177] cxgb4: collect PBT tables dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 15 ++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 68 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 4 ++ 5 files changed, 91 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 1860cf2b5286..22853372abdf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -145,6 +145,21 @@ struct cudbg_ulptx_la { u32 rd_data[CUDBG_NUM_ULPTX][CUDBG_NUM_ULPTX_READ]; }; +#define CUDBG_CHAC_PBT_ADDR 0x2800 +#define CUDBG_CHAC_PBT_LRF 0x3000 +#define CUDBG_CHAC_PBT_DATA 0x3800 +#define CUDBG_PBT_DYNAMIC_ENTRIES 8 +#define CUDBG_PBT_STATIC_ENTRIES 16 +#define CUDBG_LRF_ENTRIES 8 +#define CUDBG_PBT_DATA_ENTRIES 512 + +struct cudbg_pbt_tables { + u32 pbt_dynamic[CUDBG_PBT_DYNAMIC_ENTRIES]; + u32 pbt_static[CUDBG_PBT_STATIC_ENTRIES]; + u32 lrf_table[CUDBG_LRF_ENTRIES]; + u32 pbt_data[CUDBG_PBT_DATA_ENTRIES]; +}; + #define IREG_NUM_ELEM 4 static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index ce7bb909b8fd..f672799cc8d3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -62,6 +62,7 @@ enum cudbg_dbg_entity_type { CUDBG_MA_INDIRECT = 61, CUDBG_ULPTX_LA = 62, CUDBG_UP_CIM_INDIRECT = 64, + CUDBG_PBT_TABLE = 65, CUDBG_MBOX_LOG = 66, CUDBG_HMA_INDIRECT = 67, CUDBG_MAX_ENTITY = 70, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 03f1ec0c0160..c4096967c434 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1310,6 +1310,74 @@ int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_pbt_tables *pbt; + int i, rc; + u32 addr; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pbt_tables), + &temp_buff); + if (rc) + return rc; + + pbt = (struct cudbg_pbt_tables *)temp_buff.data; + /* PBT dynamic entries */ + addr = CUDBG_CHAC_PBT_ADDR; + for (i = 0; i < CUDBG_PBT_DYNAMIC_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_dynamic[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* PBT static entries */ + /* static entries start when bit 6 is set */ + addr = CUDBG_CHAC_PBT_ADDR + (1 << 6); + for (i = 0; i < CUDBG_PBT_STATIC_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_static[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* LRF entries */ + addr = CUDBG_CHAC_PBT_LRF; + for (i = 0; i < CUDBG_LRF_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->lrf_table[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* PBT data entries */ + addr = CUDBG_CHAC_PBT_DATA; + for (i = 0; i < CUDBG_PBT_DATA_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_data[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 280423f9869d..311b330bc3b2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -123,6 +123,9 @@ int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index f1a1ece45b97..003deb345ff2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -60,6 +60,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, + { CUDBG_PBT_TABLE, cudbg_collect_pbt_tables }, { CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect }, }; @@ -215,6 +216,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n; break; + case CUDBG_PBT_TABLE: + len = sizeof(struct cudbg_pbt_tables); + break; case CUDBG_MBOX_LOG: len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size; break; From 08c4901bfe0b3beb12e7a5d7749e3522d7b1471e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:39 +0530 Subject: [PATCH 1488/2177] cxgb4: collect hardware scheduler dumps Collect hardware TX traffic scheduler and pace tables. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 8 +++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 2 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 25 ++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 9 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 4 ++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 57 +++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 1 + drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 10 ++++ 9 files changed, 119 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 22853372abdf..9757609a86b9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -49,6 +49,14 @@ struct cudbg_rss_vf_conf { u32 rss_vf_vfh; }; +struct cudbg_hw_sched { + u32 kbps[NTX_SCHED]; + u32 ipg[NTX_SCHED]; + u32 pace_tab[NTX_SCHED]; + u32 mode; + u32 map; +}; + struct ireg_field { u32 ireg_addr; u32 ireg_data; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index f672799cc8d3..e5c44b96d0a7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -22,6 +22,7 @@ #define CUDBG_STATUS_NO_MEM -19 #define CUDBG_STATUS_ENTITY_NOT_FOUND -24 #define CUDBG_SYSTEM_ERROR -29 +#define CUDBG_STATUS_CCLK_NOT_DEFINED -32 #define CUDBG_MAJOR_VERSION 1 #define CUDBG_MINOR_VERSION 14 @@ -48,6 +49,7 @@ enum cudbg_dbg_entity_type { CUDBG_EDC1 = 19, CUDBG_RSS = 22, CUDBG_RSS_VF_CONF = 25, + CUDBG_HW_SCHED = 31, CUDBG_TP_INDIRECT = 36, CUDBG_SGE_INDIRECT = 37, CUDBG_ULPRX_LA = 41, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index c4096967c434..0e01a2916ab8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -574,6 +574,31 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_hw_sched *hw_sched_buff; + int i, rc = 0; + + if (!padap->params.vpd.cclk) + return CUDBG_STATUS_CCLK_NOT_DEFINED; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_hw_sched), + &temp_buff); + hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data; + hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A); + hw_sched_buff->mode = TIMERMODE_G(t4_read_reg(padap, TP_MOD_CONFIG_A)); + t4_read_pace_tbl(padap, hw_sched_buff->pace_tab); + for (i = 0; i < NTX_SCHED; ++i) + t4_get_tx_sched(padap, i, &hw_sched_buff->kbps[i], + &hw_sched_buff->ipg[i], true); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 311b330bc3b2..3f62c1900fe3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -84,6 +84,9 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 92a0b022687e..a57761b28edc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1335,6 +1335,12 @@ static inline unsigned int core_ticks_to_us(const struct adapter *adapter, adapter->params.vpd.cclk); } +static inline unsigned int dack_ticks_to_usec(const struct adapter *adap, + unsigned int ticks) +{ + return (ticks << adap->params.tp.dack_re) / core_ticks_per_usec(adap); +} + void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val); @@ -1636,6 +1642,9 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, int filter_index, int *enabled); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); +void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]); +void t4_get_tx_sched(struct adapter *adap, unsigned int sched, + unsigned int *kbps, unsigned int *ipg, bool sleep_ok); int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 003deb345ff2..35575e4d020c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -46,6 +46,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, { CUDBG_RSS, cudbg_collect_rss }, { CUDBG_RSS_VF_CONF, cudbg_collect_rss_vf_config }, + { CUDBG_HW_SCHED, cudbg_collect_hw_sched }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, { CUDBG_ULPRX_LA, cudbg_collect_ulprx_la }, @@ -156,6 +157,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = adap->params.arch.vfcount * sizeof(struct cudbg_rss_vf_conf); break; + case CUDBG_HW_SCHED: + len = sizeof(struct cudbg_hw_sched); + break; case CUDBG_TP_INDIRECT: switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { case CHELSIO_T5: diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 006414758f65..c289ca1efc1b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -9547,6 +9547,63 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd); } +/** + * t4_read_pace_tbl - read the pace table + * @adap: the adapter + * @pace_vals: holds the returned values + * + * Returns the values of TP's pace table in microseconds. + */ +void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]) +{ + unsigned int i, v; + + for (i = 0; i < NTX_SCHED; i++) { + t4_write_reg(adap, TP_PACE_TABLE_A, 0xffff0000 + i); + v = t4_read_reg(adap, TP_PACE_TABLE_A); + pace_vals[i] = dack_ticks_to_usec(adap, v); + } +} + +/** + * t4_get_tx_sched - get the configuration of a Tx HW traffic scheduler + * @adap: the adapter + * @sched: the scheduler index + * @kbps: the byte rate in Kbps + * @ipg: the interpacket delay in tenths of nanoseconds + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Return the current configuration of a HW Tx scheduler. + */ +void t4_get_tx_sched(struct adapter *adap, unsigned int sched, + unsigned int *kbps, unsigned int *ipg, bool sleep_ok) +{ + unsigned int v, addr, bpt, cpt; + + if (kbps) { + addr = TP_TX_MOD_Q1_Q0_RATE_LIMIT_A - sched / 2; + t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok); + if (sched & 1) + v >>= 16; + bpt = (v >> 8) & 0xff; + cpt = v & 0xff; + if (!cpt) { + *kbps = 0; /* scheduler disabled */ + } else { + v = (adap->params.vpd.cclk * 1000) / cpt; /* ticks/s */ + *kbps = (v * bpt) / 125; + } + } + if (ipg) { + addr = TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A - sched / 2; + t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok); + if (sched & 1) + v >>= 16; + v &= 0xffff; + *ipg = (10000 * v) / core_ticks_per_usec(adap); + } +} + int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 7f59ca458431..7c6af14905c2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -47,6 +47,7 @@ enum { TCB_SIZE = 128, /* TCB size */ NMTUS = 16, /* size of MTU table */ NCCTRL_WIN = 32, /* # of congestion control windows */ + NTX_SCHED = 8, /* # of HW Tx scheduling queues */ PM_NSTATS = 5, /* # of PM stats */ T6_PM_NSTATS = 7, /* # of PM stats in T6 */ MBOX_LEN = 64, /* mailbox size in bytes */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 775a591c0fba..483fb7644355 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1415,6 +1415,7 @@ #define ROWINDEX_V(x) ((x) << ROWINDEX_S) #define TP_CCTRL_TABLE_A 0x7ddc +#define TP_PACE_TABLE_A 0x7dd8 #define TP_MTU_TABLE_A 0x7de4 #define MTUINDEX_S 24 @@ -1449,6 +1450,15 @@ #define TP_TM_PIO_ADDR_A 0x7e18 #define TP_TM_PIO_DATA_A 0x7e1c +#define TP_MOD_CONFIG_A 0x7e24 + +#define TIMERMODE_S 8 +#define TIMERMODE_M 0xffU +#define TIMERMODE_G(x) (((x) >> TIMERMODE_S) & TIMERMODE_M) + +#define TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A 0x3 +#define TP_TX_MOD_Q1_Q0_RATE_LIMIT_A 0x8 + #define TP_PIO_ADDR_A 0x7e40 #define TP_PIO_DATA_A 0x7e44 #define TP_MIB_INDEX_A 0x7e50 From 6f92a6544f1a4ed2d495a937283f01ee7d590fec Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Oct 2017 17:18:40 +0530 Subject: [PATCH 1489/2177] cxgb4: collect hardware misc dumps Collect path mtu, PM stats, TP clock info, congestion control, and VPD data dumps. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 36 +++++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 5 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 135 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 15 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 20 +++ 5 files changed, 211 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 9757609a86b9..239c43084e77 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -49,6 +49,13 @@ struct cudbg_rss_vf_conf { u32 rss_vf_vfh; }; +struct cudbg_pm_stats { + u32 tx_cnt[T6_PM_NSTATS]; + u32 rx_cnt[T6_PM_NSTATS]; + u64 tx_cyc[T6_PM_NSTATS]; + u64 rx_cyc[T6_PM_NSTATS]; +}; + struct cudbg_hw_sched { u32 kbps[NTX_SCHED]; u32 ipg[NTX_SCHED]; @@ -85,6 +92,22 @@ struct cudbg_cim_pif_la { u8 data[0]; }; +struct cudbg_clk_info { + u64 retransmit_min; + u64 retransmit_max; + u64 persist_timer_min; + u64 persist_timer_max; + u64 keepalive_idle_timer; + u64 keepalive_interval; + u64 initial_srtt; + u64 finwait2_timer; + u32 dack_timer; + u32 res; + u32 cclk_ps; + u32 tre; + u32 dack_re; +}; + struct cudbg_tid_info_region { u32 ntids; u32 nstids; @@ -143,6 +166,19 @@ struct cudbg_mps_tcam { u8 reserved[2]; }; +struct cudbg_vpd_data { + u8 sn[SERNUM_LEN + 1]; + u8 bn[PN_LEN + 1]; + u8 na[MACADDR_LEN + 1]; + u8 mn[ID_LEN + 1]; + u16 fw_major; + u16 fw_minor; + u16 fw_micro; + u16 fw_build; + u32 scfg_vers; + u32 vpd_vers; +}; + #define CUDBG_NUM_ULPTX 11 #define CUDBG_NUM_ULPTX_READ 512 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index e5c44b96d0a7..e484c514e9ae 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -49,18 +49,23 @@ enum cudbg_dbg_entity_type { CUDBG_EDC1 = 19, CUDBG_RSS = 22, CUDBG_RSS_VF_CONF = 25, + CUDBG_PATH_MTU = 27, + CUDBG_PM_STATS = 30, CUDBG_HW_SCHED = 31, CUDBG_TP_INDIRECT = 36, CUDBG_SGE_INDIRECT = 37, CUDBG_ULPRX_LA = 41, CUDBG_TP_LA = 43, CUDBG_CIM_PIF_LA = 45, + CUDBG_CLK = 46, CUDBG_CIM_OBQ_RXQ0 = 47, CUDBG_CIM_OBQ_RXQ1 = 48, CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, CUDBG_TID_INFO = 54, CUDBG_MPS_TCAM = 57, + CUDBG_VPD_DATA = 58, + CUDBG_CCTRL = 60, CUDBG_MA_INDIRECT = 61, CUDBG_ULPTX_LA = 62, CUDBG_UP_CIM_INDIRECT = 64, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 0e01a2916ab8..fe3a9ef0ec3f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -574,6 +574,44 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc; + + rc = cudbg_get_buff(dbg_buff, NMTUS * sizeof(u16), &temp_buff); + if (rc) + return rc; + + t4_read_mtu_tbl(padap, (u16 *)temp_buff.data, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_pm_stats *pm_stats_buff; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pm_stats), + &temp_buff); + if (rc) + return rc; + + pm_stats_buff = (struct cudbg_pm_stats *)temp_buff.data; + t4_pmtx_get_stats(padap, pm_stats_buff->tx_cnt, pm_stats_buff->tx_cyc); + t4_pmrx_get_stats(padap, pm_stats_buff->rx_cnt, pm_stats_buff->rx_cyc); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) @@ -813,6 +851,55 @@ int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_clk_info *clk_info_buff; + u64 tp_tick_us; + int rc; + + if (!padap->params.vpd.cclk) + return CUDBG_STATUS_CCLK_NOT_DEFINED; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_clk_info), + &temp_buff); + if (rc) + return rc; + + clk_info_buff = (struct cudbg_clk_info *)temp_buff.data; + clk_info_buff->cclk_ps = 1000000000 / padap->params.vpd.cclk; /* psec */ + clk_info_buff->res = t4_read_reg(padap, TP_TIMER_RESOLUTION_A); + clk_info_buff->tre = TIMERRESOLUTION_G(clk_info_buff->res); + clk_info_buff->dack_re = DELAYEDACKRESOLUTION_G(clk_info_buff->res); + tp_tick_us = (clk_info_buff->cclk_ps << clk_info_buff->tre) / 1000000; + + clk_info_buff->dack_timer = + (clk_info_buff->cclk_ps << clk_info_buff->dack_re) / 1000000 * + t4_read_reg(padap, TP_DACK_TIMER_A); + clk_info_buff->retransmit_min = + tp_tick_us * t4_read_reg(padap, TP_RXT_MIN_A); + clk_info_buff->retransmit_max = + tp_tick_us * t4_read_reg(padap, TP_RXT_MAX_A); + clk_info_buff->persist_timer_min = + tp_tick_us * t4_read_reg(padap, TP_PERS_MIN_A); + clk_info_buff->persist_timer_max = + tp_tick_us * t4_read_reg(padap, TP_PERS_MAX_A); + clk_info_buff->keepalive_idle_timer = + tp_tick_us * t4_read_reg(padap, TP_KEEP_IDLE_A); + clk_info_buff->keepalive_interval = + tp_tick_us * t4_read_reg(padap, TP_KEEP_INTVL_A); + clk_info_buff->initial_srtt = + tp_tick_us * INITSRTT_G(t4_read_reg(padap, TP_INIT_SRTT_A)); + clk_info_buff->finwait2_timer = + tp_tick_us * t4_read_reg(padap, TP_FINWAIT2_TIMER_A); + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) @@ -1196,6 +1283,54 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, return rc; } +int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_vpd_data *vpd_data; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_vpd_data), + &temp_buff); + if (rc) + return rc; + + vpd_data = (struct cudbg_vpd_data *)temp_buff.data; + memcpy(vpd_data->sn, padap->params.vpd.sn, SERNUM_LEN + 1); + memcpy(vpd_data->bn, padap->params.vpd.pn, PN_LEN + 1); + memcpy(vpd_data->na, padap->params.vpd.na, MACADDR_LEN + 1); + memcpy(vpd_data->mn, padap->params.vpd.id, ID_LEN + 1); + vpd_data->scfg_vers = padap->params.scfg_vers; + vpd_data->vpd_vers = padap->params.vpd_vers; + vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(padap->params.fw_vers); + vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(padap->params.fw_vers); + vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(padap->params.fw_vers); + vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(padap->params.fw_vers); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 size; + int rc; + + size = sizeof(u16) * NMTUS * NCCTRL_WIN; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + t4_read_cong_tbl(padap, (void *)temp_buff.data); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 3f62c1900fe3..230ba88a6a81 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -84,6 +84,12 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -99,6 +105,9 @@ int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -117,6 +126,12 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 35575e4d020c..7373617da635 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -46,18 +46,23 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, { CUDBG_RSS, cudbg_collect_rss }, { CUDBG_RSS_VF_CONF, cudbg_collect_rss_vf_config }, + { CUDBG_PATH_MTU, cudbg_collect_path_mtu }, + { CUDBG_PM_STATS, cudbg_collect_pm_stats }, { CUDBG_HW_SCHED, cudbg_collect_hw_sched }, { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, { CUDBG_ULPRX_LA, cudbg_collect_ulprx_la }, { CUDBG_TP_LA, cudbg_collect_tp_la }, { CUDBG_CIM_PIF_LA, cudbg_collect_cim_pif_la }, + { CUDBG_CLK, cudbg_collect_clk_info }, { CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 }, { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 }, { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, { CUDBG_TID_INFO, cudbg_collect_tid }, { CUDBG_MPS_TCAM, cudbg_collect_mps_tcam }, + { CUDBG_VPD_DATA, cudbg_collect_vpd_data }, + { CUDBG_CCTRL, cudbg_collect_cctrl }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, @@ -157,6 +162,12 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = adap->params.arch.vfcount * sizeof(struct cudbg_rss_vf_conf); break; + case CUDBG_PATH_MTU: + len = NMTUS * sizeof(u16); + break; + case CUDBG_PM_STATS: + len = sizeof(struct cudbg_pm_stats); + break; case CUDBG_HW_SCHED: len = sizeof(struct cudbg_hw_sched); break; @@ -191,6 +202,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = sizeof(struct cudbg_cim_pif_la); len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); break; + case CUDBG_CLK: + len = sizeof(struct cudbg_clk_info); + break; case CUDBG_PCIE_INDIRECT: n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); len = sizeof(struct ireg_buf) * n * 2; @@ -206,6 +220,12 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = sizeof(struct cudbg_mps_tcam) * adap->params.arch.mps_tcam_size; break; + case CUDBG_VPD_DATA: + len = sizeof(struct cudbg_vpd_data); + break; + case CUDBG_CCTRL: + len = sizeof(u16) * NMTUS * NCCTRL_WIN; + break; case CUDBG_MA_INDIRECT: if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { n = sizeof(t6_ma_ireg_array) / From 4fa112f6b5fb055478e76f2b446673565d2ea35f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Oct 2017 07:16:01 -0500 Subject: [PATCH 1490/2177] net: bcmgenet: Use BUG_ON instead of if condition followed by BUG Use BUG_ON instead of if condition followed by BUG. Something to notice in this particular case is that unlikely() is already being called inside BUG_ON macro. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9713374ebf14..24b4f4ceceef 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1580,8 +1580,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i <= nr_frags; i++) { tx_cb_ptr = bcmgenet_get_txcb(priv, ring); - if (unlikely(!tx_cb_ptr)) - BUG(); + BUG_ON(!tx_cb_ptr); if (!i) { /* Transmit single SKB or head of fragment list */ From 5bca178eed601cd4584c38c5290f7abbcacf3fb3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Oct 2017 07:27:45 -0500 Subject: [PATCH 1491/2177] net: faraday: ftmac100: Use BUG_ON instead of if condition followed by BUG. Notice that in this particular case unlikely() is already being called inside BUG_ON macro. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftmac100.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 66928a922824..aecc76504b69 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -402,6 +402,7 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed) struct page *page; dma_addr_t map; int length; + bool ret; rxdes = ftmac100_rx_locate_first_segment(priv); if (!rxdes) @@ -416,8 +417,8 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed) * It is impossible to get multi-segment packets * because we always provide big enough receive buffers. */ - if (unlikely(!ftmac100_rxdes_last_segment(rxdes))) - BUG(); + ret = ftmac100_rxdes_last_segment(rxdes); + BUG_ON(!ret); /* start processing */ skb = netdev_alloc_skb_ip_align(netdev, 128); From bff7b688d5b10a8cb8cecefdea5e255408b78f2f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:51 -0400 Subject: [PATCH 1492/2177] net: dsa: add dsa_is_unused_port helper As the comment above the chunk states, the b53 driver attempts to disable the unused ports. But using ds->enabled_port_mask is misleading, because this mask reports in fact the user ports. To avoid confusion and fix this, this patch introduces an explicit dsa_is_unused_port helper which ensures the corresponding bit is not masked in any of the switch port masks. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- include/net/dsa.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b48cf0487b43..c74a50112551 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -873,7 +873,7 @@ static int b53_setup(struct dsa_switch *ds) for (port = 0; port < dev->num_ports; port++) { if (dsa_is_cpu_port(ds, port)) b53_enable_cpu_port(dev, port); - else if (!(BIT(port) & ds->enabled_port_mask)) + else if (dsa_is_unused_port(ds, port)) b53_disable_port(ds, port, NULL); } diff --git a/include/net/dsa.h b/include/net/dsa.h index 38961ef91d3d..6b1bc1c8f7e2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,13 @@ struct dsa_switch { struct dsa_port ports[]; }; +static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) +{ + u32 m = ds->enabled_port_mask | ds->dsa_port_mask | ds->cpu_port_mask; + + return !(m & BIT(p)); +} + static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { return !!(ds->cpu_port_mask & (1 << p)); From 91dee144817e51bbfbdc06af4dc1acce6e92bdda Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:52 -0400 Subject: [PATCH 1493/2177] net: dsa: mv88e6xxx: skip unused ports The unused ports are currently configured in normal mode. This does not prevent the switch from being functional, but it is unnecessary. Skip unused ports. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 677d6902807e..2d8cf66e8f74 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2005,6 +2005,9 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { + if (dsa_is_unused_port(ds, i)) + continue; + err = mv88e6xxx_setup_port(chip, i); if (err) goto unlock; From deb8ee0b51204273c120a3b3848efbb5695ad658 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:53 -0400 Subject: [PATCH 1494/2177] net: dsa: fix dsa_is_normal_port helper In order to know if a port is of type user, dsa_is_normal_port checks that the given port is not of type DSA nor CPU. This is not enough because a port can be unused. Without the previous fix, this caused the unused mv88e6xxx ports to be configured in normal mode. The ds->enabled_port_mask reports the user ports, so check this instead. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 6b1bc1c8f7e2..4ad432ad2d40 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -273,7 +273,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) { - return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); + return !!(ds->enabled_port_mask & BIT(p)); } static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) From 2b3e9891cb607f7c7d5a4b11fb5a6e775e7f3ef4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:54 -0400 Subject: [PATCH 1495/2177] net: dsa: rename dsa_is_normal_port helper This patch renames dsa_is_normal_port to dsa_is_user_port because "user" is the correct term in the DSA terminology, not "normal". Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- include/net/dsa.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 2d8cf66e8f74..09a66d4d9492 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1676,7 +1676,7 @@ static int mv88e6xxx_setup_port_mode(struct mv88e6xxx_chip *chip, int port) if (dsa_is_dsa_port(chip->ds, port)) return mv88e6xxx_set_port_mode_dsa(chip, port); - if (dsa_is_normal_port(chip->ds, port)) + if (dsa_is_user_port(chip->ds, port)) return mv88e6xxx_set_port_mode_normal(chip, port); /* Setup CPU port mode depending on its supported tag format */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 4ad432ad2d40..49701d958663 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -271,7 +271,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) return !!((ds->dsa_port_mask) & (1 << p)); } -static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) +static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { return !!(ds->enabled_port_mask & BIT(p)); } From 4a5b85ffe2a001b52d165931ad05d2d620daca3c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:55 -0400 Subject: [PATCH 1496/2177] net: dsa: use dsa_is_user_port everywhere Most of the DSA code still check ds->enabled_port_mask directly to inspect a given port type instead of using the provided dsa_is_user_port helper. Change this. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 9 ++++----- drivers/net/dsa/bcm_sf2_cfp.c | 2 +- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/qca8k.c | 4 ++-- net/dsa/dsa.c | 2 +- net/dsa/legacy.c | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 2574a52ee161..7f47400e557e 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -652,8 +652,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) * bcm_sf2_sw_setup */ for (port = 0; port < DSA_MAX_PORTS; port++) { - if ((1 << port) & ds->enabled_port_mask || - dsa_is_cpu_port(ds, port)) + if (dsa_is_user_port(ds, port) || dsa_is_cpu_port(ds, port)) bcm_sf2_port_disable(ds, port, NULL); } @@ -676,7 +675,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) bcm_sf2_gphy_enable_set(ds, true); for (port = 0; port < DSA_MAX_PORTS; port++) { - if ((1 << port) & ds->enabled_port_mask) + if (dsa_is_user_port(ds, port)) bcm_sf2_port_setup(ds, port, NULL); else if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); @@ -771,7 +770,7 @@ static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_CLEAR); for (port = 0; port < priv->hw_params.num_ports; port++) { - if (!((1 << port) & ds->enabled_port_mask)) + if (!dsa_is_user_port(ds, port)) continue; core_writel(priv, 1, CORE_DEFAULT_1Q_TAG_P(port)); @@ -786,7 +785,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) /* Enable all valid ports and disable those unused */ for (port = 0; port < priv->hw_params.num_ports; port++) { /* IMP port receives special treatment */ - if ((1 << port) & ds->enabled_port_mask) + if (dsa_is_user_port(ds, port)) bcm_sf2_port_setup(ds, port, NULL); else if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 4feb507eeee0..b721a2009b50 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -750,7 +750,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; if (fs->ring_cookie == RX_CLS_FLOW_DISC || - !(BIT(port_num) & ds->enabled_port_mask) || + !dsa_is_user_port(ds, port_num) || port_num >= priv->hw_params.num_ports) return -EINVAL; /* diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 21431be2831e..d1d4eea6a875 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -781,7 +781,7 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, * same bridge. If the port is disabled, port matrix is kept * and not being setup until the port becomes enabled. */ - if (ds->enabled_port_mask & BIT(i) && i != port) { + if (dsa_is_user_port(ds, i) && i != port) { if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) @@ -818,7 +818,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, * in the same bridge. If the port is disabled, port matrix * is kept and not being setup until the port becomes enabled. */ - if (ds->enabled_port_mask & BIT(i) && i != port) { + if (dsa_is_user_port(ds, i) && i != port) { if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index d1b0b1fb632f..37125858fe80 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -536,7 +536,7 @@ qca8k_setup(struct dsa_switch *ds) /* Disable MAC by default on all user ports */ for (i = 1; i < QCA8K_NUM_PORTS; i++) - if (ds->enabled_port_mask & BIT(i)) + if (dsa_is_user_port(ds, i)) qca8k_port_set_status(priv, i, 0); /* Forward all unknown frames to CPU port for Linux processing */ @@ -556,7 +556,7 @@ qca8k_setup(struct dsa_switch *ds) } /* Invividual user ports get connected to CPU port only */ - if (ds->enabled_port_mask & BIT(i)) { + if (dsa_is_user_port(ds, i)) { int shift = 16 * (i % 2); qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index a3abf7a7b9a2..fe0081730305 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -201,7 +201,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_PM_SLEEP static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { - return ds->enabled_port_mask & (1 << p) && ds->ports[p].slave; + return dsa_is_user_port(ds, p) && ds->ports[p].slave; } int dsa_switch_suspend(struct dsa_switch *ds) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 93c1c43bcc58..0b79c6171d0d 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -190,7 +190,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, ds->ports[i].dn = cd->port_dn[i]; ds->ports[i].cpu_dp = dst->cpu_dp; - if (!(ds->enabled_port_mask & (1 << i))) + if (dsa_is_user_port(ds, i)) continue; ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]); @@ -258,7 +258,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) /* Destroy network devices for physical switch ports. */ for (port = 0; port < ds->num_ports; port++) { - if (!(ds->enabled_port_mask & (1 << port))) + if (!dsa_is_user_port(ds, port)) continue; if (!ds->ports[port].slave) From 02bc6e546e858b209c3ebe380a13a73b333b1b3f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:56 -0400 Subject: [PATCH 1497/2177] net: dsa: introduce dsa_user_ports helper Introduce a dsa_user_ports() helper to return the ds->enabled_port_mask mask which is more explicit. This will also minimize diffs when touching this internal mask. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 2 +- drivers/net/dsa/mv88e6060.c | 5 ++--- drivers/net/dsa/qca8k.c | 3 +-- include/net/dsa.h | 5 +++++ net/dsa/dsa2.c | 2 +- net/dsa/legacy.c | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d1d4eea6a875..627c039f12ca 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -688,7 +688,7 @@ mt7530_cpu_port_enable(struct mt7530_priv *priv, * the switch */ mt7530_write(priv, MT7530_PCR_P(port), - PCR_MATRIX(priv->ds->enabled_port_mask)); + PCR_MATRIX(dsa_user_ports(priv->ds))); return 0; } diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index f78b9e13be1c..45768e3c5bc5 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -175,9 +175,8 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) */ REG_WRITE(addr, PORT_VLAN_MAP, ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) | - (dsa_is_cpu_port(ds, p) ? - ds->enabled_port_mask : - BIT(dsa_to_port(ds, p)->cpu_dp->index))); + (dsa_is_cpu_port(ds, p) ? dsa_user_ports(ds) : + BIT(dsa_to_port(ds, p)->cpu_dp->index))); /* Port Association Vector: when learning source addresses * of packets, add the address to the address database using diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 37125858fe80..cf72e274275f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -551,8 +551,7 @@ qca8k_setup(struct dsa_switch *ds) /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), - QCA8K_PORT_LOOKUP_MEMBER, - ds->enabled_port_mask); + QCA8K_PORT_LOOKUP_MEMBER, dsa_user_ports(ds)); } /* Invividual user ports get connected to CPU port only */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 49701d958663..dc7728062396 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -276,6 +276,11 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) return !!(ds->enabled_port_mask & BIT(p)); } +static inline u32 dsa_user_ports(struct dsa_switch *ds) +{ + return ds->enabled_port_mask; +} + static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { return &ds->ports[p]; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 62485a57dbfc..d43c59c91058 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -312,7 +312,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ - ds->phys_mii_mask = ds->enabled_port_mask; + ds->phys_mii_mask |= dsa_user_ports(ds); /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 0b79c6171d0d..fa543c4a6061 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -136,7 +136,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, /* Make the built-in MII bus mask match the number of ports, * switch drivers can override this later */ - ds->phys_mii_mask = ds->enabled_port_mask; + ds->phys_mii_mask |= dsa_user_ports(ds); /* * If the CPU connects to this switch, set the switch tree From 057cad2c59d73b0c4a6638546f3099d6fb444094 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:57 -0400 Subject: [PATCH 1498/2177] net: dsa: define port types Introduce an enumerated type for ports, which will be way more explicit to identify a port type instead of digging into switch port masks. A port can be of type CPU, DSA, user, or unused by default. This is a static parsed information that cannot be changed at runtime. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ net/dsa/dsa2.c | 3 +++ net/dsa/legacy.c | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index dc7728062396..8da20c4a6552 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -180,6 +180,13 @@ struct dsa_port { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); + enum { + DSA_PORT_TYPE_UNUSED = 0, + DSA_PORT_TYPE_CPU, + DSA_PORT_TYPE_DSA, + DSA_PORT_TYPE_USER, + } type; + struct dsa_switch *ds; unsigned int index; const char *name; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index d43c59c91058..dd6f35b92937 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -185,6 +185,7 @@ static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) return err; ds->dsa_port_mask |= BIT(index); + port->type = DSA_PORT_TYPE_DSA; } return 0; @@ -504,6 +505,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, * net/dsa/dsa.c::dsa_switch_setup_one does. */ ds->cpu_port_mask |= BIT(index); + port->type = DSA_PORT_TYPE_CPU; tag_protocol = ds->ops->get_tag_protocol(ds); tag_ops = dsa_resolve_tag_protocol(tag_protocol); @@ -543,6 +545,7 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) * net/dsa/dsa.c::dsa_switch_setup_one does. */ ds->enabled_port_mask |= BIT(index); + port->type = DSA_PORT_TYPE_USER; } } diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index fa543c4a6061..9fd5b3adab1e 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -101,6 +101,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; int index = ds->index; + struct dsa_port *dp; int i, ret; /* @@ -109,6 +110,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, for (i = 0; i < ds->num_ports; i++) { char *name; + dp = &ds->ports[i]; + name = cd->port_names[i]; if (name == NULL) continue; @@ -122,10 +125,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, dst->cpu_dp = &ds->ports[i]; dst->cpu_dp->master = master; ds->cpu_port_mask |= 1 << i; + dp->type = DSA_PORT_TYPE_CPU; } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; + dp->type = DSA_PORT_TYPE_DSA; } else { ds->enabled_port_mask |= 1 << i; + dp->type = DSA_PORT_TYPE_USER; } valid_name_found = true; } From c38c5a66506e4e8223fd03e950b1bde99190701e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:58 -0400 Subject: [PATCH 1499/2177] net: dsa: use new port type in helpers Now that DSA exposes an enumerated type for the ports, we can use them directly instead of checking bitmaps, which is more consistent. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 8da20c4a6552..07dfbd7f4fd5 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -261,36 +261,41 @@ struct dsa_switch { struct dsa_port ports[]; }; +static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) +{ + return &ds->ports[p]; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { - u32 m = ds->enabled_port_mask | ds->dsa_port_mask | ds->cpu_port_mask; - - return !(m & BIT(p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds->cpu_port_mask & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) { - return !!((ds->dsa_port_mask) & (1 << p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { - return !!(ds->enabled_port_mask & BIT(p)); + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } static inline u32 dsa_user_ports(struct dsa_switch *ds) { - return ds->enabled_port_mask; -} + u32 mask = 0; + int p; -static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) -{ - return &ds->ports[p]; + for (p = 0; p < ds->num_ports; p++) + if (dsa_is_user_port(ds, p)) + mask |= BIT(p); + + return mask; } static inline u8 dsa_upstream_port(struct dsa_switch *ds) From 5749f0f3772b9d98f37e3a92539f49fafaa64eca Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 11:22:59 -0400 Subject: [PATCH 1500/2177] net: dsa: remove port masks Now that DSA core provides port types, there is no need to keep this information at the switch level. This is a static information that is part of a DSA core dsa_port structure. Remove them. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 3 --- net/dsa/dsa2.c | 11 ----------- net/dsa/legacy.c | 3 --- 3 files changed, 17 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 07dfbd7f4fd5..50e276dc4c01 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -240,9 +240,6 @@ struct dsa_switch { /* * Slave mii_bus and devices for the individual ports. */ - u32 dsa_port_mask; - u32 cpu_port_mask; - u32 enabled_port_mask; u32 phys_mii_mask; struct mii_bus *slave_mii_bus; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dd6f35b92937..ec58654a71cd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -184,7 +184,6 @@ static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err != 0) return err; - ds->dsa_port_mask |= BIT(index); port->type = DSA_PORT_TYPE_DSA; } @@ -500,11 +499,6 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->master = ethernet_dev; } - /* Initialize cpu_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->cpu_port_mask |= BIT(index); port->type = DSA_PORT_TYPE_CPU; tag_protocol = ds->ops->get_tag_protocol(ds); @@ -540,11 +534,6 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err) return err; } else { - /* Initialize enabled_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->enabled_port_mask |= BIT(index); port->type = DSA_PORT_TYPE_USER; } diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 9fd5b3adab1e..93e1b116ef83 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -124,13 +124,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, } dst->cpu_dp = &ds->ports[i]; dst->cpu_dp->master = master; - ds->cpu_port_mask |= 1 << i; dp->type = DSA_PORT_TYPE_CPU; } else if (!strcmp(name, "dsa")) { - ds->dsa_port_mask |= 1 << i; dp->type = DSA_PORT_TYPE_DSA; } else { - ds->enabled_port_mask |= 1 << i; dp->type = DSA_PORT_TYPE_USER; } valid_name_found = true; From 57922b0a2f7ef9effbcdbbf7d1f8dad95aa567f7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:19 -0400 Subject: [PATCH 1501/2177] bnxt_en: Update firmware interface to 1.8.3.1 Vxlan encap/decap filters are added to this firmware spec. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 420 +++++++++++++++++- 1 file changed, 396 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index cb04cc76e8ad..c99f4d0880e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -11,21 +11,21 @@ #ifndef BNXT_HSI_H #define BNXT_HSI_H -/* HSI and HWRM Specification 1.8.1 */ +/* HSI and HWRM Specification 1.8.3 */ #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 8 -#define HWRM_VERSION_UPDATE 1 +#define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 4 /* non-zero means beta version */ +#define HWRM_VERSION_RSVD 1 /* non-zero means beta version */ -#define HWRM_VERSION_STR "1.8.1.4" +#define HWRM_VERSION_STR "1.8.3.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. */ #define HWRM_NA_SIGNATURE ((__le32)(-1)) #define HWRM_MAX_REQ_LEN (128) /* hwrm_func_buf_rgtr */ -#define HWRM_MAX_RESP_LEN (248) /* hwrm_selftest_qlist */ +#define HWRM_MAX_RESP_LEN (280) /* hwrm_selftest_qlist */ #define HW_HASH_INDEX_SIZE 0x80 /* 7 bit indirection table index. */ #define HW_HASH_KEY_SIZE 40 #define HWRM_RESP_VALID_KEY 1 /* valid key for HWRM response */ @@ -111,6 +111,7 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL #define ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL #define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE 0x34UL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; @@ -835,8 +836,7 @@ struct hwrm_func_qcfg_output { u8 port_pf_cnt; #define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL 0x0UL __le16 dflt_vnic_id; - u8 unused_0; - u8 unused_1; + __le16 max_mtu_configured; __le32 min_bw; #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 @@ -873,12 +873,12 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL - u8 unused_2; + u8 unused_0; __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; __le16 alloc_sp_tx_rings; - u8 unused_3; + u8 unused_1; u8 valid; }; @@ -3407,6 +3407,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL + #define VNIC_CFG_REQ_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE 0x40UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -3463,6 +3464,7 @@ struct hwrm_vnic_qcaps_output { #define VNIC_QCAPS_RESP_FLAGS_ROCE_DUAL_VNIC_CAP 0x8UL #define VNIC_QCAPS_RESP_FLAGS_ROCE_ONLY_VNIC_CAP 0x10UL #define VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP 0x20UL + #define VNIC_QCAPS_RESP_FLAGS_ROCE_MIRROING_CAPABLE_VNIC_CAP 0x40UL __le32 unused_2; u8 unused_3; u8 unused_4; @@ -3994,6 +3996,7 @@ struct hwrm_cfa_l2_filter_alloc_input { #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; @@ -4122,6 +4125,14 @@ struct hwrm_cfa_l2_set_rx_mask_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_cfa_l2_set_rx_mask_cmd_err { + u8 code; + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL + u8 unused_0[7]; +}; + /* hwrm_cfa_tunnel_filter_alloc */ /* Input (88 bytes) */ struct hwrm_cfa_tunnel_filter_alloc_input { @@ -4161,6 +4172,7 @@ struct hwrm_cfa_tunnel_filter_alloc_input { #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; @@ -4323,6 +4335,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input { #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL @@ -4355,6 +4368,14 @@ struct hwrm_cfa_ntuple_filter_alloc_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_cfa_ntuple_filter_alloc_cmd_err { + u8 code; + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL + u8 unused_0[7]; +}; + /* hwrm_cfa_ntuple_filter_free */ /* Input (24 bytes) */ struct hwrm_cfa_ntuple_filter_free_input { @@ -4413,6 +4434,116 @@ struct hwrm_cfa_ntuple_filter_cfg_output { u8 valid; }; +/* hwrm_cfa_decap_filter_alloc */ +/* Input (104 bytes) */ +struct hwrm_cfa_decap_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL 0x1UL + __le32 enables; + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_OVLAN_VID 0x10UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IVLAN_VID 0x20UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_OVLAN_VID 0x40UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID 0x80UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x100UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR 0x200UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR 0x400UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE 0x800UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL 0x1000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_PORT 0x2000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT 0x4000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_ID 0x8000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x10000UL + __be32 tunnel_id; + u8 tunnel_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + u8 unused_0; + __le16 unused_1; + u8 src_macaddr[6]; + u8 unused_2; + u8 unused_3; + u8 dst_macaddr[6]; + __be16 ovlan_vid; + __be16 ivlan_vid; + __be16 t_ovlan_vid; + __be16 t_ivlan_vid; + __be16 ethertype; + u8 ip_addr_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL + u8 ip_protocol; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x11UL + u8 unused_4; + u8 unused_5; + u8 unused_6[3]; + u8 unused_7; + __be32 src_ipaddr[4]; + __be32 dst_ipaddr[4]; + __be16 src_port; + __be16 dst_port; + __le16 dst_id; + __le16 l2_ctxt_ref_id; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_decap_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 decap_filter_id; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; +}; + +/* hwrm_cfa_decap_filter_free */ +/* Input (24 bytes) */ +struct hwrm_cfa_decap_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 decap_filter_id; + __le32 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_decap_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_cfa_flow_alloc */ /* Input (128 bytes) */ struct hwrm_cfa_flow_alloc_input { @@ -4634,6 +4765,7 @@ struct hwrm_tunnel_dst_port_query_input { u8 tunnel_type; #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0[7]; }; @@ -4662,9 +4794,10 @@ struct hwrm_tunnel_dst_port_alloc_input { u8 tunnel_type; #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0; __be16 tunnel_dst_port_val; - __le32 unused_1; + __be32 unused_1; }; /* Output (16 bytes) */ @@ -4693,6 +4826,7 @@ struct hwrm_tunnel_dst_port_free_input { u8 tunnel_type; #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -4848,6 +4982,8 @@ struct hwrm_fw_reset_input { #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL u8 selfrst_status; #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL @@ -4888,6 +5024,8 @@ struct hwrm_fw_qstatus_input { #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL u8 unused_0[7]; }; @@ -5324,6 +5462,32 @@ struct hwrm_wol_reason_qcfg_output { u8 valid; }; +/* hwrm_dbg_read_direct */ +/* Input (32 bytes) */ +struct hwrm_dbg_read_direct_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 read_addr; + __le32 read_len32; +}; + +/* Output (16 bytes) */ +struct hwrm_dbg_read_direct_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -5676,6 +5840,105 @@ struct hwrm_nvm_install_update_cmd_err { u8 unused_0[7]; }; +/* hwrm_nvm_get_variable */ +/* Input (40 bytes) */ +struct hwrm_nvm_get_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_nvm_get_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; +}; + +/* Command specific Error Codes (8 bytes) */ +struct hwrm_nvm_get_variable_cmd_err { + u8 code; + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + u8 unused_0[7]; +}; + +/* hwrm_nvm_set_variable */ +/* Input (40 bytes) */ +struct hwrm_nvm_set_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_SET_VARIABLE_REQ_FLAGS_FORCE_FLUSH 0x1UL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_MASK 0xeUL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_SFT 1 + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_NONE (0x0UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 (0x1UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_LAST NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_nvm_set_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* Command specific Error Codes (8 bytes) */ +struct hwrm_nvm_set_variable_cmd_err { + u8 code; + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + u8 unused_0[7]; +}; + /* hwrm_selftest_qlist */ /* Input (16 bytes) */ struct hwrm_selftest_qlist_input { @@ -5686,7 +5949,7 @@ struct hwrm_selftest_qlist_input { __le64 resp_addr; }; -/* Output (248 bytes) */ +/* Output (280 bytes) */ struct hwrm_selftest_qlist_output { __le16 error_code; __le16 req_type; @@ -5698,15 +5961,15 @@ struct hwrm_selftest_qlist_output { #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_LINK_TEST 0x2UL #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 offline_tests; #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_NVM_TEST 0x1UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_LINK_TEST 0x2UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 unused_0; __le16 test_timeout; u8 unused_1; @@ -5719,6 +5982,11 @@ struct hwrm_selftest_qlist_output { char test5_name[32]; char test6_name[32]; char test7_name[32]; + __le32 unused_3; + u8 unused_4; + u8 unused_5; + u8 unused_6; + u8 valid; }; /* hwrm_selftest_exec */ @@ -5734,8 +6002,8 @@ struct hwrm_selftest_exec_input { #define SELFTEST_EXEC_REQ_FLAGS_LINK_TEST 0x2UL #define SELFTEST_EXEC_REQ_FLAGS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_REQ_FLAGS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_EXEC_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL u8 unused_0[7]; }; @@ -5750,16 +6018,21 @@ struct hwrm_selftest_exec_output { #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_LINK_TEST 0x2UL #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 test_success; #define SELFTEST_EXEC_RESP_TEST_SUCCESS_NVM_TEST 0x1UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_LINK_TEST 0x2UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_EYE_TEST 0x20UL - __le16 unused_0[3]; + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 unused_4; + u8 valid; }; /* hwrm_selftest_irq */ @@ -5772,12 +6045,50 @@ struct hwrm_selftest_irq_input { __le64 resp_addr; }; -/* Output (8 bytes) */ +/* Output (16 bytes) */ struct hwrm_selftest_irq_output { __le16 error_code; __le16 req_type; __le16 seq_id; __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* hwrm_selftest_retrieve_serdes_data */ +/* Input (32 bytes) */ +struct hwrm_selftest_retrieve_serdes_data_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 resp_data_addr; + __le32 resp_data_offset; + __le16 data_len; + u8 flags; + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_MASK 0xfUL + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_SFT 0 + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_selftest_retrieve_serdes_data_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 total_data_len; + __le16 copied_data_len; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; }; /* Hardware Resource Manager Specification */ @@ -5938,10 +6249,16 @@ struct cmd_nums { #define HWRM_CFA_DECAP_FILTER_ALLOC (0x108UL) #define HWRM_CFA_DECAP_FILTER_FREE (0x109UL) #define HWRM_CFA_VLAN_ANTISPOOF_QCFG (0x10aUL) + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC (0x10bUL) + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE (0x10cUL) + #define HWRM_CFA_PAIR_ALLOC (0x10dUL) + #define HWRM_CFA_PAIR_FREE (0x10eUL) + #define HWRM_CFA_PAIR_INFO (0x10fUL) + #define HWRM_FW_IPC_MSG (0x110UL) #define HWRM_SELFTEST_QLIST (0x200UL) #define HWRM_SELFTEST_EXEC (0x201UL) #define HWRM_SELFTEST_IRQ (0x202UL) - #define HWRM_SELFTEST_RETREIVE_EYE_DATA (0x203UL) + #define HWRM_SELFTEST_RETRIEVE_SERDES_DATA (0x203UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) @@ -5949,6 +6266,9 @@ struct cmd_nums { #define HWRM_DBG_DUMP (0xff14UL) #define HWRM_DBG_ERASE_NVM (0xff15UL) #define HWRM_DBG_CFG (0xff16UL) + #define HWRM_DBG_COREDUMP_LIST (0xff17UL) + #define HWRM_DBG_COREDUMP_INITIATE (0xff18UL) + #define HWRM_DBG_COREDUMP_RETRIEVE (0xff19UL) #define HWRM_NVM_FACTORY_DEFAULTS (0xffeeUL) #define HWRM_NVM_VALIDATE_OPTION (0xffefUL) #define HWRM_NVM_FLUSH (0xfff0UL) @@ -6123,6 +6443,58 @@ struct rx_port_stats { __le64 rx_stat_err; }; +/* VXLAN IPv4 encapsulation structure (16 bytes) */ +struct hwrm_vxlan_ipv4_hdr { + u8 ver_hlen; + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK 0xfUL + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT 0 + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK 0xf0UL + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT 4 + u8 tos; + __be16 ip_id; + __be16 flags_frag_offset; + u8 ttl; + u8 protocol; + __be32 src_ip_addr; + __be32 dest_ip_addr; +}; + +/* VXLAN IPv6 encapsulation structure (32 bytes) */ +struct hwrm_vxlan_ipv6_hdr { + __be32 ver_tc_flow_label; + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT 0x1cUL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK 0xf0000000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT 0x14UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK 0xff00000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT 0x0UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK 0xfffffUL + __be16 payload_len; + u8 next_hdr; + u8 ttl; + __be32 src_ip_addr[4]; + __be32 dest_ip_addr[4]; +}; + +/* VXLAN encapsulation structure (72 bytes) */ +struct hwrm_cfa_encap_data_vxlan { + u8 src_mac_addr[6]; + __le16 unused_0; + u8 dst_mac_addr[6]; + u8 num_vlan_tags; + u8 unused_1; + __be16 ovlan_tpid; + __be16 ovlan_tci; + __be16 ivlan_tpid; + __be16 ivlan_tci; + __le32 l3[10]; + #define CFA_ENCAP_DATA_VXLAN_L3_VER_MASK 0xfUL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4 0x4UL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 0x6UL + __be16 src_port; + __be16 dst_port; + __be32 vni; +}; + /* Periodic Statistics Context DMA to host (160 bytes) */ struct ctx_hw_stats { __le64 rx_ucast_pkts; From 8ed693b7bbd179949f6947adaae5eff2e386a534 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Thu, 26 Oct 2017 11:51:20 -0400 Subject: [PATCH 1502/2177] bnxt_en: Add PCIe device ID for bcm58804 Add new PCIe device ID and chip number for bcm58804 Signed-off-by: Ray Jui Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 24d55724ceff..a9db4e677d24 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -108,6 +108,7 @@ enum board_idx { BCM57452, BCM57454, BCM58802, + BCM58804, BCM58808, NETXTREME_E_VF, NETXTREME_C_VF, @@ -146,6 +147,7 @@ static const struct { [BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" }, [BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" }, + [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" }, @@ -186,6 +188,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 }, { PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, + { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV { PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF }, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index c911e69ff25f..d193923d20a6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1013,6 +1013,7 @@ struct bnxt { #define CHIP_NUM_5745X 0xd730 #define CHIP_NUM_58802 0xd802 +#define CHIP_NUM_58804 0xd804 #define CHIP_NUM_58808 0xd808 #define BNXT_CHIP_NUM_5730X(chip_num) \ @@ -1048,6 +1049,7 @@ struct bnxt { #define BNXT_CHIP_NUM_588XX(chip_num) \ ((chip_num) == CHIP_NUM_58802 || \ + (chip_num) == CHIP_NUM_58804 || \ (chip_num) == CHIP_NUM_58808) struct net_device *dev; From 618784e3ee1870e43e50e1c7922cc123cc050566 Mon Sep 17 00:00:00 2001 From: Rob Miller Date: Thu, 26 Oct 2017 11:51:21 -0400 Subject: [PATCH 1503/2177] bnxt_en: adding PCI ID for SMARTNIC VF support Signed-off-by: Rob Miller Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a9db4e677d24..d2403f47e102 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -112,6 +112,7 @@ enum board_idx { BCM58808, NETXTREME_E_VF, NETXTREME_C_VF, + NETXTREME_S_VF, }; /* indexed by enum above */ @@ -151,6 +152,7 @@ static const struct { [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" }, + [NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -198,6 +200,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } }; @@ -222,7 +225,8 @@ static struct workqueue_struct *bnxt_pf_wq; static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || + idx == NETXTREME_S_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) From e0ad8fc5980b362028cfd63ec037f4b491e726c6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:22 -0400 Subject: [PATCH 1504/2177] bnxt_en: Check for zero length value in bnxt_get_nvram_item(). Return -EINVAL if the length is zero and not proceed to do essentially nothing. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 3cbe771b3352..85be1cb41f1a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1773,6 +1773,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, dma_addr_t dma_handle; struct hwrm_nvm_read_input req = {0}; + if (!length) + return -EINVAL; + buf = dma_alloc_coherent(&bp->pdev->dev, length, &dma_handle, GFP_KERNEL); if (!buf) { From 431aa1eb20d8ae2674723292adb832b968da868e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:23 -0400 Subject: [PATCH 1505/2177] bnxt_en: Get firmware package version one time. The current code retrieves the firmware package version from firmware everytime ethtool -i is run. There is no reason to do that as the firmware will not change while the driver is loaded. Get the version once at init time. Also, display the full 4-part firmware version string and remove the less useful interface spec version. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +-- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d2403f47e102..ffc6c494f6ea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4892,9 +4892,9 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) resp->hwrm_intf_upd); netdev_warn(bp->dev, "Please update firmware with HWRM interface 1.0.0 or newer.\n"); } - snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d/%d.%d.%d", + snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d.%d", resp->hwrm_fw_maj, resp->hwrm_fw_min, resp->hwrm_fw_bld, - resp->hwrm_intf_maj, resp->hwrm_intf_min, resp->hwrm_intf_upd); + resp->hwrm_fw_rsvd); bp->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout); if (!bp->hwrm_cmd_timeout) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 85be1cb41f1a..fe93625e0539 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -26,8 +26,6 @@ #define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) #define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) -static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); - static u32 bnxt_get_msglevel(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -822,20 +820,10 @@ static void bnxt_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct bnxt *bp = netdev_priv(dev); - char *pkglog; - char *pkgver = NULL; - pkglog = kmalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL); - if (pkglog) - pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH); strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); - if (pkgver && *pkgver != 0 && isdigit(*pkgver)) - snprintf(info->fw_version, sizeof(info->fw_version) - 1, - "%s pkg %s", bp->fw_ver_str, pkgver); - else - strlcpy(info->fw_version, bp->fw_ver_str, - sizeof(info->fw_version)); + strlcpy(info->fw_version, bp->fw_ver_str, sizeof(info->fw_version)); strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); info->n_stats = bnxt_get_num_stats(bp); info->testinfo_len = bp->num_tests; @@ -843,7 +831,6 @@ static void bnxt_get_drvinfo(struct net_device *dev, info->eedump_len = 0; /* TODO CHIMP FW: reg dump details */ info->regdump_len = 0; - kfree(pkglog); } static void bnxt_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -2503,8 +2490,23 @@ void bnxt_ethtool_init(struct bnxt *bp) struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_selftest_qlist_input req = {0}; struct bnxt_test_info *test_info; + struct net_device *dev = bp->dev; + char *pkglog; int i, rc; + pkglog = kzalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL); + if (pkglog) { + char *pkgver; + int len; + + pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH); + if (pkgver && *pkgver != 0 && isdigit(*pkgver)) { + len = strlen(bp->fw_ver_str); + snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, + "/pkg %s", pkgver); + } + kfree(pkglog); + } if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp)) return; From c1a7bdff17247332ecff7f243e42d269b3f74c65 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:24 -0400 Subject: [PATCH 1506/2177] bnxt_en: Optimize .ndo_set_mac_address() for VFs. No need to call bnxt_approve_mac() which will send a message to the PF if the MAC address hasn't changed. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ffc6c494f6ea..ef5a5e52eb04 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7249,13 +7249,13 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) + return 0; + rc = bnxt_approve_mac(bp, addr->sa_data); if (rc) return rc; - if (ether_addr_equal(addr->sa_data, dev->dev_addr)) - return 0; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); if (netif_running(dev)) { bnxt_close_nic(bp, false, false); From 7eb9bb3a0c7c29741df2249cc3b99f06a7978d61 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:25 -0400 Subject: [PATCH 1507/2177] bnxt_en: Check maximum supported MTU from firmware. Some NICs have a firmware enforced maximum MTU setting by management firmware. Set up netdev->max_mtu accordingly. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ef5a5e52eb04..285863ba51a3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2835,7 +2835,8 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) if (page_mode) { if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) return -EOPNOTSUPP; - bp->dev->max_mtu = BNXT_MAX_PAGE_MODE_MTU; + bp->dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); bp->flags &= ~BNXT_FLAG_AGG_RINGS; bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE; bp->dev->hw_features &= ~NETIF_F_LRO; @@ -2843,7 +2844,7 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) bp->rx_dir = DMA_BIDIRECTIONAL; bp->rx_skb_func = bnxt_rx_page_skb; } else { - bp->dev->max_mtu = BNXT_MAX_MTU; + bp->dev->max_mtu = bp->max_mtu; bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; bp->rx_dir = DMA_FROM_DEVICE; bp->rx_skb_func = bnxt_rx_skb; @@ -4732,6 +4733,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) else bp->br_mode = BRIDGE_MODE_UNDEF; + bp->max_mtu = le16_to_cpu(resp->max_mtu_configured); + if (!bp->max_mtu) + bp->max_mtu = BNXT_MAX_MTU; + func_qcfg_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; @@ -8095,10 +8100,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features |= dev->hw_features | NETIF_F_HIGHDMA; dev->priv_flags |= IFF_UNICAST_FLT; - /* MTU range: 60 - 9500 */ - dev->min_mtu = ETH_ZLEN; - dev->max_mtu = BNXT_MAX_MTU; - #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); mutex_init(&bp->sriov_lock); @@ -8146,6 +8147,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_ethtool_init(bp); bnxt_dcb_init(bp); + /* MTU range: 60 - FW defined max */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = bp->max_mtu; + rc = bnxt_probe_phy(bp); if (rc) goto init_err_pci_clean; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d193923d20a6..5f1fce4a724d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1172,6 +1172,7 @@ struct bnxt { int nr_vnics; u32 rss_hash_cfg; + u16 max_mtu; u8 max_tc; u8 max_lltc; /* lossless TCs */ struct bnxt_queue_info q_info[BNXT_MAX_QUEUE]; From 49f7972fd16407b3d1f03c2d447d2f1e1b95e9ba Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 26 Oct 2017 11:51:26 -0400 Subject: [PATCH 1508/2177] bnxt_en: Add ethtool reset method This is a firmware internal reset after driver is unloaded. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 37 ++++++++++++++++++- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.h | 2 + 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fe93625e0539..fc32df7f979f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1337,7 +1337,6 @@ static int bnxt_firmware_reset(struct net_device *dev, bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1); - /* TODO: Support ASAP ChiMP self-reset (e.g. upon PF driver unload) */ /* TODO: Address self-reset of APE/KONG/BONO/TANG or ungraceful reset */ /* (e.g. when firmware isn't already running) */ switch (dir_type) { @@ -1363,6 +1362,10 @@ static int bnxt_firmware_reset(struct net_device *dev, case BNX_DIR_TYPE_BONO_PATCH: req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE; break; + case BNXT_FW_RESET_CHIP: + req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP; + req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP; + break; default: return -EINVAL; } @@ -2485,6 +2488,37 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } } +static int bnxt_reset(struct net_device *dev, u32 *flags) +{ + struct bnxt *bp = netdev_priv(dev); + int rc = 0; + + if (!BNXT_PF(bp)) { + netdev_err(dev, "Reset is not supported from a VF\n"); + return -EOPNOTSUPP; + } + + if (pci_vfs_assigned(bp->pdev)) { + netdev_err(dev, + "Reset not allowed when VFs are assigned to VMs\n"); + return -EBUSY; + } + + if (*flags == ETH_RESET_ALL) { + /* This feature is not supported in older firmware versions */ + if (bp->hwrm_spec_code < 0x10803) + return -EOPNOTSUPP; + + rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP); + if (!rc) + netdev_info(dev, "Reset request successful. Reload driver to complete reset\n"); + } else { + rc = -EINVAL; + } + + return rc; +} + void bnxt_ethtool_init(struct bnxt *bp) { struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr; @@ -2597,4 +2631,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .nway_reset = bnxt_nway_reset, .set_phys_id = bnxt_set_phys_id, .self_test = bnxt_self_test, + .reset = bnxt_reset, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index f1bc90b6fb5b..ff601b42fcc8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -34,6 +34,8 @@ struct bnxt_led_cfg { #define BNXT_LED_DFLT_ENABLES(x) \ cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x))) +#define BNXT_FW_RESET_CHIP 0xffff + extern const struct ethtool_ops bnxt_ethtool_ops; u32 _bnxt_fw_to_ethtool_adv_spds(u16, u8); From 18775aa8a91fcd4cd07c722d575b4b852e3624c3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:27 -0400 Subject: [PATCH 1509/2177] bnxt_en: Reorganize the coalescing parameters. The current IRQ coalescing logic is a little messy. The ethtool parameters are mapped to hardware parameters in a way that is difficult to understand. The first step is to better organize the parameters by adding the new structure bnxt_coal. The structure is used by both the RX and TX sets of coalescing parameters. Adjust the default coal_ticks to 14 us and 28 us for RX and TX. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 60 +++++++++++-------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 21 ++++--- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 46 ++++++++------ 3 files changed, 77 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 285863ba51a3..52fff1605172 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4569,34 +4569,31 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) /* Each rx completion (2 records) should be DMAed immediately. * DMA 1/4 of the completion buffers at a time. */ - max_buf = min_t(u16, bp->rx_coal_bufs / 4, 2); + max_buf = min_t(u16, bp->rx_coal.coal_bufs / 4, 2); /* max_buf must not be zero */ max_buf = clamp_t(u16, max_buf, 1, 63); - max_buf_irq = clamp_t(u16, bp->rx_coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks); + max_buf_irq = clamp_t(u16, bp->rx_coal.coal_bufs_irq, 1, 63); + buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal.coal_ticks); /* buf timer set to 1/4 of interrupt timer */ buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks_irq); + buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal.coal_ticks_irq); buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; - /* RING_IDLE generates more IRQs for lower latency. Enable it only - * if coal_ticks is less than 25 us. - */ - if (bp->rx_coal_ticks < 25) + if (bp->rx_coal.coal_ticks < bp->rx_coal.idle_thresh) flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE; bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf, buf_tmr_irq << 16 | buf_tmr, flags, &req_rx); /* max_buf must not be zero */ - max_buf = clamp_t(u16, bp->tx_coal_bufs, 1, 63); - max_buf_irq = clamp_t(u16, bp->tx_coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks); + max_buf = clamp_t(u16, bp->tx_coal.coal_bufs, 1, 63); + max_buf_irq = clamp_t(u16, bp->tx_coal.coal_bufs_irq, 1, 63); + buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal.coal_ticks); /* buf timer set to 1/4 of interrupt timer */ buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks_irq); + buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal.coal_ticks_irq); buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; @@ -7146,6 +7143,32 @@ static void bnxt_cleanup_pci(struct bnxt *bp) pci_disable_device(bp->pdev); } +static void bnxt_init_dflt_coal(struct bnxt *bp) +{ + struct bnxt_coal *coal; + + /* Tick values in micro seconds. + * 1 coal_buf x bufs_per_record = 1 completion record. + */ + coal = &bp->rx_coal; + coal->coal_ticks = 14; + coal->coal_bufs = 30; + coal->coal_ticks_irq = 1; + coal->coal_bufs_irq = 2; + coal->idle_thresh = 25; + coal->bufs_per_record = 2; + coal->budget = 64; /* NAPI budget */ + + coal = &bp->tx_coal; + coal->coal_ticks = 28; + coal->coal_bufs = 30; + coal->coal_ticks_irq = 2; + coal->coal_bufs_irq = 2; + coal->bufs_per_record = 1; + + bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; +} + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@ -7214,18 +7237,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) bp->rx_ring_size = BNXT_DEFAULT_RX_RING_SIZE; bp->tx_ring_size = BNXT_DEFAULT_TX_RING_SIZE; - /* tick values in micro seconds */ - bp->rx_coal_ticks = 12; - bp->rx_coal_bufs = 30; - bp->rx_coal_ticks_irq = 1; - bp->rx_coal_bufs_irq = 2; - - bp->tx_coal_ticks = 25; - bp->tx_coal_bufs = 30; - bp->tx_coal_ticks_irq = 2; - bp->tx_coal_bufs_irq = 2; - - bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; + bnxt_init_dflt_coal(bp); setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp); bp->current_interval = BNXT_TIMER_INTERVAL; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5f1fce4a724d..2188f1606209 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -944,6 +944,17 @@ struct bnxt_test_info { #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_BASE 0x300000 +struct bnxt_coal { + u16 coal_ticks; + u16 coal_ticks_irq; + u16 coal_bufs; + u16 coal_bufs_irq; + /* RING_IDLE enabled when coal ticks < idle_thresh */ + u16 idle_thresh; + u8 bufs_per_record; + u8 budget; +}; + struct bnxt_tc_info { bool enabled; @@ -1235,14 +1246,8 @@ struct bnxt { u8 port_count; u16 br_mode; - u16 rx_coal_ticks; - u16 rx_coal_ticks_irq; - u16 rx_coal_bufs; - u16 rx_coal_bufs_irq; - u16 tx_coal_ticks; - u16 tx_coal_ticks_irq; - u16 tx_coal_bufs; - u16 tx_coal_bufs_irq; + struct bnxt_coal rx_coal; + struct bnxt_coal tx_coal; #define BNXT_USEC_TO_COAL_TIMER(x) ((x) * 25 / 2) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fc32df7f979f..5cd1a501c62b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -44,19 +44,24 @@ static int bnxt_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct bnxt *bp = netdev_priv(dev); + struct bnxt_coal *hw_coal; + u16 mult; memset(coal, 0, sizeof(*coal)); - coal->rx_coalesce_usecs = bp->rx_coal_ticks; - /* 2 completion records per rx packet */ - coal->rx_max_coalesced_frames = bp->rx_coal_bufs / 2; - coal->rx_coalesce_usecs_irq = bp->rx_coal_ticks_irq; - coal->rx_max_coalesced_frames_irq = bp->rx_coal_bufs_irq / 2; + hw_coal = &bp->rx_coal; + mult = hw_coal->bufs_per_record; + coal->rx_coalesce_usecs = hw_coal->coal_ticks; + coal->rx_max_coalesced_frames = hw_coal->coal_bufs / mult; + coal->rx_coalesce_usecs_irq = hw_coal->coal_ticks_irq; + coal->rx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult; - coal->tx_coalesce_usecs = bp->tx_coal_ticks; - coal->tx_max_coalesced_frames = bp->tx_coal_bufs; - coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq; - coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq; + hw_coal = &bp->tx_coal; + mult = hw_coal->bufs_per_record; + coal->tx_coalesce_usecs = hw_coal->coal_ticks; + coal->tx_max_coalesced_frames = hw_coal->coal_bufs / mult; + coal->tx_coalesce_usecs_irq = hw_coal->coal_ticks_irq; + coal->tx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult; coal->stats_block_coalesce_usecs = bp->stats_coal_ticks; @@ -68,18 +73,23 @@ static int bnxt_set_coalesce(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); bool update_stats = false; + struct bnxt_coal *hw_coal; int rc = 0; + u16 mult; - bp->rx_coal_ticks = coal->rx_coalesce_usecs; - /* 2 completion records per rx packet */ - bp->rx_coal_bufs = coal->rx_max_coalesced_frames * 2; - bp->rx_coal_ticks_irq = coal->rx_coalesce_usecs_irq; - bp->rx_coal_bufs_irq = coal->rx_max_coalesced_frames_irq * 2; + hw_coal = &bp->rx_coal; + mult = hw_coal->bufs_per_record; + hw_coal->coal_ticks = coal->rx_coalesce_usecs; + hw_coal->coal_bufs = coal->rx_max_coalesced_frames * mult; + hw_coal->coal_ticks_irq = coal->rx_coalesce_usecs_irq; + hw_coal->coal_bufs_irq = coal->rx_max_coalesced_frames_irq * mult; - bp->tx_coal_ticks = coal->tx_coalesce_usecs; - bp->tx_coal_bufs = coal->tx_max_coalesced_frames; - bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq; - bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq; + hw_coal = &bp->rx_coal; + mult = hw_coal->bufs_per_record; + hw_coal->coal_ticks = coal->tx_coalesce_usecs; + hw_coal->coal_bufs = coal->tx_max_coalesced_frames * mult; + hw_coal->coal_ticks_irq = coal->tx_coalesce_usecs_irq; + hw_coal->coal_bufs_irq = coal->tx_max_coalesced_frames_irq * mult; if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) { u32 stats_ticks = coal->stats_block_coalesce_usecs; From f8503969d27b2b26ff0adbce4b7d7cf4ba5e43c2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 26 Oct 2017 11:51:28 -0400 Subject: [PATCH 1510/2177] bnxt_en: Refactor and simplify coalescing code. The mapping of the ethtool coalescing parameters to hardware parameters is now done in bnxt_hwrm_set_coal_params(). The same function can handle both RX and TX settings. The code is now more clear. Some adjustments have been made to get better hardware settings. The coal_frames setting is now accurately set in hardware. The max_timer is set to coal_ticks value. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 81 ++++++++++------------- 1 file changed, 35 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 52fff1605172..c76729122143 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4537,19 +4537,42 @@ static int bnxt_hwrm_check_tx_rings(struct bnxt *bp, int tx_rings) return 0; } -static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs, - u32 buf_tmrs, u16 flags, +static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req) { + u16 val, tmr, max, flags; + + max = hw_coal->bufs_per_record * 128; + if (hw_coal->budget) + max = hw_coal->bufs_per_record * hw_coal->budget; + + val = clamp_t(u16, hw_coal->coal_bufs, 1, max); + req->num_cmpl_aggr_int = cpu_to_le16(val); + req->num_cmpl_dma_aggr = cpu_to_le16(val); + + val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, max); + req->num_cmpl_dma_aggr_during_int = cpu_to_le16(val); + + tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks); + tmr = max_t(u16, tmr, 1); + req->int_lat_tmr_max = cpu_to_le16(tmr); + + /* min timer set to 1/2 of interrupt timer */ + val = tmr / 2; + req->int_lat_tmr_min = cpu_to_le16(val); + + /* buf timer set to 1/4 of interrupt timer */ + val = max_t(u16, tmr / 4, 1); + req->cmpl_aggr_dma_tmr = cpu_to_le16(val); + + tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks_irq); + tmr = max_t(u16, tmr, 1); + req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr); + + flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; + if (hw_coal->idle_thresh && hw_coal->coal_ticks < hw_coal->idle_thresh) + flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE; req->flags = cpu_to_le16(flags); - req->num_cmpl_dma_aggr = cpu_to_le16((u16)max_bufs); - req->num_cmpl_dma_aggr_during_int = cpu_to_le16(max_bufs >> 16); - req->cmpl_aggr_dma_tmr = cpu_to_le16((u16)buf_tmrs); - req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(buf_tmrs >> 16); - /* Minimum time between 2 interrupts set to buf_tmr x 2 */ - req->int_lat_tmr_min = cpu_to_le16((u16)buf_tmrs * 2); - req->int_lat_tmr_max = cpu_to_le16((u16)buf_tmrs * 4); - req->num_cmpl_aggr_int = cpu_to_le16((u16)max_bufs * 4); } int bnxt_hwrm_set_coal(struct bnxt *bp) @@ -4557,48 +4580,14 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) int i, rc = 0; struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0}, req_tx = {0}, *req; - u16 max_buf, max_buf_irq; - u16 buf_tmr, buf_tmr_irq; - u32 flags; bnxt_hwrm_cmd_hdr_init(bp, &req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1); bnxt_hwrm_cmd_hdr_init(bp, &req_tx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1); - /* Each rx completion (2 records) should be DMAed immediately. - * DMA 1/4 of the completion buffers at a time. - */ - max_buf = min_t(u16, bp->rx_coal.coal_bufs / 4, 2); - /* max_buf must not be zero */ - max_buf = clamp_t(u16, max_buf, 1, 63); - max_buf_irq = clamp_t(u16, bp->rx_coal.coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal.coal_ticks); - /* buf timer set to 1/4 of interrupt timer */ - buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal.coal_ticks_irq); - buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); - - flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; - - if (bp->rx_coal.coal_ticks < bp->rx_coal.idle_thresh) - flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE; - - bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf, - buf_tmr_irq << 16 | buf_tmr, flags, &req_rx); - - /* max_buf must not be zero */ - max_buf = clamp_t(u16, bp->tx_coal.coal_bufs, 1, 63); - max_buf_irq = clamp_t(u16, bp->tx_coal.coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal.coal_ticks); - /* buf timer set to 1/4 of interrupt timer */ - buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal.coal_ticks_irq); - buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); - - flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; - bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf, - buf_tmr_irq << 16 | buf_tmr, flags, &req_tx); + bnxt_hwrm_set_coal_params(&bp->rx_coal, &req_rx); + bnxt_hwrm_set_coal_params(&bp->tx_coal, &req_tx); mutex_lock(&bp->hwrm_cmd_lock); for (i = 0; i < bp->cp_nr_rings; i++) { From 8c95f773b4a367f7b9bcca7ab5f85675cfc812e9 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 26 Oct 2017 11:51:29 -0400 Subject: [PATCH 1511/2177] bnxt_en: add support for Flower based vxlan encap/decap offload This patch adds IPv4 vxlan encap/decap action support to TC-flower offload. For vxlan encap, the driver maintains a tunnel encap hash-table. When a new flow with a tunnel encap action arrives, this table is looked up; if an encap entry exists, it uses the already programmed encap_record_handle as the tunnel_handle in the hwrm_cfa_flow_alloc cmd. Else, a new encap node is added and the L2 header fields are queried via a route lookup. hwrm_cfa_encap_record_alloc cmd is used to create a new encap record and the encap_record_handle is used as the tunnel_handle while adding the flow. For vxlan decap, the driver maintains a tunnel decap hash-table. When a new flow with a tunnel decap action arrives, this table is looked up; if a decap entry exists, it uses the already programmed decap_filter_handle as the tunnel_handle in the hwrm_cfa_flow_alloc cmd. Else, a new decap node is added and a decap_filter_handle is alloc'd via the hwrm_cfa_decap_filter_alloc cmd. This handle is used as the tunnel_handle while adding the flow. The code to issue the HWRM FW cmds is introduced in a follow-up patch. Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 9 + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 568 +++++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 66 +- 4 files changed, 632 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2188f1606209..d88d864db7d4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -965,6 +965,15 @@ struct bnxt_tc_info { /* hash table to store L2 keys of TC flows */ struct rhashtable l2_table; struct rhashtable_params l2_ht_params; + /* hash table to store L2 keys for TC tunnel decap */ + struct rhashtable decap_l2_table; + struct rhashtable_params decap_l2_ht_params; + /* hash table to store tunnel decap entries */ + struct rhashtable decap_table; + struct rhashtable_params decap_ht_params; + /* hash table to store tunnel encap entries */ + struct rhashtable encap_table; + struct rhashtable_params encap_ht_params; /* lock to atomically add/del an l2 node when a flow is * added or deleted. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index f3f6aa868d6c..402fa32f7a88 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -29,7 +29,7 @@ int bnxt_dl_register(struct bnxt *bp) if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) return 0; - if (bp->hwrm_spec_code < 0x10800) { + if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); return -ENOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index a9cb653b4d29..f14edc9c1412 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -89,6 +90,23 @@ static void bnxt_tc_parse_vlan(struct bnxt *bp, } } +static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, + struct bnxt_tc_actions *actions, + const struct tc_action *tc_act) +{ + struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act); + struct ip_tunnel_key *tun_key = &tun_info->key; + + if (ip_tunnel_info_af(tun_info) != AF_INET) { + netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); + return -EOPNOTSUPP; + } + + actions->tun_encap_key = *tun_key; + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP; + return 0; +} + static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, struct tcf_exts *tc_exts) @@ -123,9 +141,35 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, bnxt_tc_parse_vlan(bp, actions, tc_act); continue; } + + /* Tunnel encap */ + if (is_tcf_tunnel_set(tc_act)) { + rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act); + if (rc) + return rc; + continue; + } + + /* Tunnel decap */ + if (is_tcf_tunnel_release(tc_act)) { + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP; + continue; + } } - return 0; + if (rc) + return rc; + + /* Tunnel encap/decap action must be accompanied by a redirect action */ + if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) && + !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) { + netdev_info(bp->dev, + "error: no redir action along with encap/decap"); + return -EINVAL; + } + + return rc; } #define GET_KEY(flow_cmd, key_type) \ @@ -252,6 +296,54 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, flow->l4_mask.icmp.code = mask->code; } + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_CONTROL); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + struct flow_dissector_key_ipv4_addrs *mask = + GET_MASK(tc_flow_cmd, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS; + flow->tun_key.u.ipv4.dst = key->dst; + flow->tun_mask.u.ipv4.dst = mask->dst; + flow->tun_key.u.ipv4.src = key->src; + flow->tun_mask.u.ipv4.src = mask->src; + } else if (dissector_uses_key(dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + return -EOPNOTSUPP; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + struct flow_dissector_key_keyid *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID; + flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid); + flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid); + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + struct flow_dissector_key_ports *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS; + flow->tun_key.tp_dst = key->dst; + flow->tun_mask.tp_dst = mask->dst; + flow->tun_key.tp_src = key->src; + flow->tun_mask.tp_src = mask->src; + } + return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts); } @@ -293,7 +385,8 @@ static bool is_wildcard(void *mask, int len) } static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, - __le16 ref_flow_handle, __le16 *flow_handle) + __le16 ref_flow_handle, + __le32 tunnel_handle, __le16 *flow_handle) { struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr; struct bnxt_tc_actions *actions = &flow->actions; @@ -307,6 +400,14 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.src_fid = cpu_to_le16(flow->src_fid); req.ref_flow_handle = ref_flow_handle; + + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { + req.tunnel_handle = tunnel_handle; + flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL; + action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL; + } + req.ethertype = flow->l2_key.ether_type; req.ip_proto = flow->l4_key.ip_proto; @@ -478,6 +579,35 @@ static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, return rc; } +static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_info, + __le32 ref_decap_handle, + __le32 *decap_filter_handle) +{ + return 0; +} + +static int hwrm_cfa_decap_filter_free(struct bnxt *bp, + __le32 decap_filter_handle) +{ + return 0; +} + +static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, + struct ip_tunnel_key *encap_key, + struct bnxt_tc_l2_key *l2_info, + __le32 *encap_record_handle) +{ + return 0; +} + +static int hwrm_cfa_encap_record_free(struct bnxt *bp, + __le32 encap_record_handle) +{ + return 0; +} + static int bnxt_tc_put_l2_node(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { @@ -519,7 +649,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table, rc = rhashtable_insert_fast(l2_table, &l2_node->node, ht_params); if (rc) { - kfree(l2_node); + kfree_rcu(l2_node, rcu); netdev_err(bp->dev, "Error: %s: rhashtable_insert_fast: %d", __func__, rc); @@ -588,6 +718,376 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) return true; } +/* Returns the final refcount of the node on success + * or a -ve error code on failure + */ +static int bnxt_tc_put_tunnel_node(struct bnxt *bp, + struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct bnxt_tc_tunnel_node *tunnel_node) +{ + int rc; + + if (--tunnel_node->refcount == 0) { + rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + rc = -1; + } + kfree_rcu(tunnel_node, rcu); + return rc; + } else { + return tunnel_node->refcount; + } +} + +/* Get (or add) either encap or decap tunnel node from/to the supplied + * hash table. + */ +static struct bnxt_tc_tunnel_node * +bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct ip_tunnel_key *tun_key) +{ + struct bnxt_tc_tunnel_node *tunnel_node; + int rc; + + tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params); + if (!tunnel_node) { + tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL); + if (!tunnel_node) { + rc = -ENOMEM; + goto err; + } + + tunnel_node->key = *tun_key; + tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE; + rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + kfree_rcu(tunnel_node, rcu); + goto err; + } + } + tunnel_node->refcount++; + return tunnel_node; +err: + netdev_info(bp->dev, "error rc=%d", rc); + return NULL; +} + +static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_key, + struct bnxt_tc_flow_node *flow_node, + __le32 *ref_decap_handle) +{ + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_flow_node *ref_flow_node; + struct bnxt_tc_l2_node *decap_l2_node; + + decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table, + tc_info->decap_l2_ht_params, + l2_key); + if (!decap_l2_node) + return -1; + + /* If any other flow is using this decap_l2_node, use it's decap_handle + * as the ref_decap_handle + */ + if (decap_l2_node->refcount > 0) { + ref_flow_node = + list_first_entry(&decap_l2_node->common_l2_flows, + struct bnxt_tc_flow_node, + decap_l2_list_node); + *ref_decap_handle = ref_flow_node->decap_node->tunnel_handle; + } else { + *ref_decap_handle = INVALID_TUNNEL_HANDLE; + } + + /* Insert the l2_node into the flow_node so that subsequent flows + * with a matching decap l2 key can use the decap_filter_handle of + * this flow as their ref_decap_handle + */ + flow_node->decap_l2_node = decap_l2_node; + list_add(&flow_node->decap_l2_list_node, + &decap_l2_node->common_l2_flows); + decap_l2_node->refcount++; + return 0; +} + +static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + /* remove flow_node from the decap L2 sharing flow list */ + list_del(&flow_node->decap_l2_list_node); + if (--decap_l2_node->refcount == 0) { + rc = rhashtable_remove_fast(&tc_info->decap_l2_table, + &decap_l2_node->node, + tc_info->decap_l2_ht_params); + if (rc) + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + kfree_rcu(decap_l2_node, rcu); + } +} + +static void bnxt_tc_put_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + __le32 decap_handle = flow_node->decap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + if (flow_node->decap_l2_node) + bnxt_tc_put_decap_l2_node(bp, flow_node); + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + if (!rc && decap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_decap_filter_free(bp, decap_handle); +} + +static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, + struct ip_tunnel_key *tun_key, + struct bnxt_tc_l2_key *l2_info, + struct net_device *real_dst_dev) +{ + struct flowi4 flow = { {0} }; + struct net_device *dst_dev; + struct neighbour *nbr; + struct rtable *rt; + int rc; + + flow.flowi4_proto = IPPROTO_UDP; + flow.fl4_dport = tun_key->tp_dst; + flow.daddr = tun_key->u.ipv4.dst; + + rt = ip_route_output_key(dev_net(real_dst_dev), &flow); + if (IS_ERR(rt)) { + netdev_info(bp->dev, "no route to %pI4b", &flow.daddr); + return -EOPNOTSUPP; + } + + /* The route must either point to the real_dst_dev or a dst_dev that + * uses the real_dst_dev. + */ + dst_dev = rt->dst.dev; + if (is_vlan_dev(dst_dev)) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev); + + if (vlan->real_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) doesn't use PF-if(%s)", + netdev_name(dst_dev), + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + l2_info->inner_vlan_tci = htons(vlan->vlan_id); + l2_info->inner_vlan_tpid = vlan->vlan_proto; + l2_info->num_vlans = 1; + } else if (dst_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) for %pI4b is not PF-if(%s)", + netdev_name(dst_dev), &flow.daddr, + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + + nbr = dst_neigh_lookup(&rt->dst, &flow.daddr); + if (!nbr) { + netdev_info(bp->dev, "can't lookup neighbor for %pI4b", + &flow.daddr); + rc = -EOPNOTSUPP; + goto put_rt; + } + + tun_key->u.ipv4.src = flow.saddr; + tun_key->ttl = ip4_dst_hoplimit(&rt->dst); + neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev); + ether_addr_copy(l2_info->smac, dst_dev->dev_addr); + neigh_release(nbr); + ip_rt_put(rt); + + return 0; +put_rt: + ip_rt_put(rt); + return rc; +} + +static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *decap_filter_handle) +{ + struct ip_tunnel_key *decap_key = &flow->tun_key; + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_l2_key l2_info = { {0} }; + struct bnxt_tc_tunnel_node *decap_node; + struct ip_tunnel_key tun_key = { 0 }; + struct bnxt_tc_l2_key *decap_l2_info; + __le32 ref_decap_handle; + int rc; + + /* Check if there's another flow using the same tunnel decap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + decap_key); + if (!decap_node) + return -ENOMEM; + + flow_node->decap_node = decap_node; + + if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + /* Resolve the L2 fields for tunnel decap + * Resolve the route for remote vtep (saddr) of the decap key + * Find it's next-hop mac addrs + */ + tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src; + tun_key.tp_dst = flow->tun_key.tp_dst; + rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev); + if (rc) + goto put_decap; + + decap_key->ttl = tun_key.ttl; + decap_l2_info = &decap_node->l2_info; + ether_addr_copy(decap_l2_info->dmac, l2_info.smac); + ether_addr_copy(decap_l2_info->smac, l2_info.dmac); + if (l2_info.num_vlans) { + decap_l2_info->num_vlans = l2_info.num_vlans; + decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid; + decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci; + } + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS; + + /* For getting a decap_filter_handle we first need to check if + * there are any other decap flows that share the same tunnel L2 + * key and if so, pass that flow's decap_filter_handle as the + * ref_decap_handle for this flow. + */ + rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node, + &ref_decap_handle); + if (rc) + goto put_decap; + + /* Issue the hwrm cmd to allocate a decap filter handle */ + rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info, + ref_decap_handle, + &decap_node->tunnel_handle); + if (rc) + goto put_decap_l2; + +done: + *decap_filter_handle = decap_node->tunnel_handle; + return 0; + +put_decap_l2: + bnxt_tc_put_decap_l2_node(bp, flow_node); +put_decap: + bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + return rc; +} + +static void bnxt_tc_put_encap_handle(struct bnxt *bp, + struct bnxt_tc_tunnel_node *encap_node) +{ + __le32 encap_handle = encap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc; + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + if (!rc && encap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_encap_record_free(bp, encap_handle); +} + +/* Lookup the tunnel encap table and check if there's an encap_handle + * alloc'd already. + * If not, query L2 info via a route lookup and issue an encap_record_alloc + * cmd to FW. + */ +static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *encap_handle) +{ + struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key; + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_tunnel_node *encap_node; + int rc; + + /* Check if there's another flow using the same tunnel encap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, + encap_key); + if (!encap_node) + return -ENOMEM; + + flow_node->encap_node = encap_node; + + if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info, + flow->actions.dst_dev); + if (rc) + goto put_encap; + + /* Allocate a new tunnel encap record */ + rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info, + &encap_node->tunnel_handle); + if (rc) + goto put_encap; + +done: + *encap_handle = encap_node->tunnel_handle; + return 0; + +put_encap: + bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + return rc; +} + +static void bnxt_tc_put_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + bnxt_tc_put_decap_handle(bp, flow_node); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + bnxt_tc_put_encap_handle(bp, flow_node->encap_node); +} + +static int bnxt_tc_get_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *tunnel_handle) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + return bnxt_tc_get_decap_handle(bp, flow, flow_node, + tunnel_handle); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + return bnxt_tc_get_encap_handle(bp, flow, flow_node, + tunnel_handle); + else + return 0; +} static int __bnxt_tc_del_flow(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { @@ -599,6 +1099,9 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, mutex_lock(&tc_info->lock); + /* release references to any tunnel encap/decap nodes */ + bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node); + /* release reference to l2 node */ bnxt_tc_put_l2_node(bp, flow_node); @@ -633,6 +1136,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, struct bnxt_tc_flow_node *new_node, *old_node; struct bnxt_tc_info *tc_info = &bp->tc_info; struct bnxt_tc_flow *flow; + __le32 tunnel_handle = 0; __le16 ref_flow_handle; int rc; @@ -670,12 +1174,17 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, if (rc) goto unlock; - /* send HWRM cmd to alloc the flow */ - rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle, - &new_node->flow_handle); + /* If the flow involves tunnel encap/decap, get tunnel_handle */ + rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle); if (rc) goto put_l2; + /* send HWRM cmd to alloc the flow */ + rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle, + tunnel_handle, &new_node->flow_handle); + if (rc) + goto put_tunnel; + /* add new flow to flow-table */ rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node, tc_info->flow_ht_params); @@ -687,12 +1196,14 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, hwrm_flow_free: bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle); +put_tunnel: + bnxt_tc_put_tunnel_handle(bp, flow, new_node); put_l2: bnxt_tc_put_l2_node(bp, new_node); unlock: mutex_unlock(&tc_info->lock); free_node: - kfree(new_node); + kfree_rcu(new_node, rcu); done: netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d", __func__, tc_flow_cmd->cookie, rc); @@ -781,6 +1292,20 @@ static const struct rhashtable_params bnxt_tc_l2_ht_params = { .automatic_shrinking = true }; +static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = { + .head_offset = offsetof(struct bnxt_tc_l2_node, node), + .key_offset = offsetof(struct bnxt_tc_l2_node, key), + .key_len = BNXT_TC_L2_KEY_LEN, + .automatic_shrinking = true +}; + +static const struct rhashtable_params bnxt_tc_tunnel_ht_params = { + .head_offset = offsetof(struct bnxt_tc_tunnel_node, node), + .key_offset = offsetof(struct bnxt_tc_tunnel_node, key), + .key_len = sizeof(struct ip_tunnel_key), + .automatic_shrinking = true +}; + /* convert counter width in bits to a mask */ #define mask(width) ((u64)~0 >> (64 - (width))) @@ -789,7 +1314,7 @@ int bnxt_init_tc(struct bnxt *bp) struct bnxt_tc_info *tc_info = &bp->tc_info; int rc; - if (bp->hwrm_spec_code < 0x10800) { + if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "Firmware does not support TC flower offload.\n"); return -ENOTSUPP; @@ -810,11 +1335,35 @@ int bnxt_init_tc(struct bnxt *bp) if (rc) goto destroy_flow_table; + tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params; + rc = rhashtable_init(&tc_info->decap_l2_table, + &tc_info->decap_l2_ht_params); + if (rc) + goto destroy_l2_table; + + tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->decap_table, + &tc_info->decap_ht_params); + if (rc) + goto destroy_decap_l2_table; + + tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->encap_table, + &tc_info->encap_ht_params); + if (rc) + goto destroy_decap_table; + tc_info->enabled = true; bp->dev->hw_features |= NETIF_F_HW_TC; bp->dev->features |= NETIF_F_HW_TC; return 0; +destroy_decap_table: + rhashtable_destroy(&tc_info->decap_table); +destroy_decap_l2_table: + rhashtable_destroy(&tc_info->decap_l2_table); +destroy_l2_table: + rhashtable_destroy(&tc_info->l2_table); destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); return rc; @@ -829,4 +1378,7 @@ void bnxt_shutdown_tc(struct bnxt *bp) rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); + rhashtable_destroy(&tc_info->decap_l2_table); + rhashtable_destroy(&tc_info->decap_table); + rhashtable_destroy(&tc_info->encap_table); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 6c4c1ed279ef..2beccd41c886 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -12,6 +12,8 @@ #ifdef CONFIG_BNXT_FLOWER_OFFLOAD +#include + /* Structs used for storing the filter/actions of the TC cmd. */ struct bnxt_tc_l2_key { @@ -50,6 +52,13 @@ struct bnxt_tc_l4_key { }; }; +struct bnxt_tc_tunnel_key { + struct bnxt_tc_l2_key l2; + struct bnxt_tc_l3_key l3; + struct bnxt_tc_l4_key l4; + __be32 id; +}; + struct bnxt_tc_actions { u32 flags; #define BNXT_TC_ACTION_FLAG_FWD BIT(0) @@ -57,11 +66,16 @@ struct bnxt_tc_actions { #define BNXT_TC_ACTION_FLAG_PUSH_VLAN BIT(3) #define BNXT_TC_ACTION_FLAG_POP_VLAN BIT(4) #define BNXT_TC_ACTION_FLAG_DROP BIT(5) +#define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP BIT(6) +#define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP BIT(7) u16 dst_fid; struct net_device *dst_dev; __be16 push_vlan_tpid; __be16 push_vlan_tci; + + /* tunnel encap */ + struct ip_tunnel_key tun_encap_key; }; struct bnxt_tc_flow_stats { @@ -76,6 +90,16 @@ struct bnxt_tc_flow { #define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS BIT(3) #define BNXT_TC_FLOW_FLAGS_PORTS BIT(4) #define BNXT_TC_FLOW_FLAGS_ICMP BIT(5) +#define BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS BIT(6) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS BIT(7) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS BIT(8) +#define BNXT_TC_FLOW_FLAGS_TUNL_PORTS BIT(9) +#define BNXT_TC_FLOW_FLAGS_TUNL_ID BIT(10) +#define BNXT_TC_FLOW_FLAGS_TUNNEL (BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS |\ + BNXT_TC_FLOW_FLAGS_TUNL_PORTS |\ + BNXT_TC_FLOW_FLAGS_TUNL_ID) /* flow applicable to pkts ingressing on this fid */ u16 src_fid; @@ -85,6 +109,8 @@ struct bnxt_tc_flow { struct bnxt_tc_l3_key l3_mask; struct bnxt_tc_l4_key l4_key; struct bnxt_tc_l4_key l4_mask; + struct ip_tunnel_key tun_key; + struct ip_tunnel_key tun_mask; struct bnxt_tc_actions actions; @@ -95,11 +121,33 @@ struct bnxt_tc_flow { unsigned long lastused; /* jiffies */ }; +/* Tunnel encap/decap hash table + * This table is used to maintain a list of flows that use + * the same tunnel encap/decap params (ip_daddrs, vni, udp_dport) + * and the FW returned handle. + * A separate table is maintained for encap and decap + */ +struct bnxt_tc_tunnel_node { + struct ip_tunnel_key key; + struct rhash_head node; + + /* tunnel l2 info */ + struct bnxt_tc_l2_key l2_info; + +#define INVALID_TUNNEL_HANDLE cpu_to_le32(0xffffffff) + /* tunnel handle returned by FW */ + __le32 tunnel_handle; + + u32 refcount; + struct rcu_head rcu; +}; + /* L2 hash table - * This data-struct is used for L2-flow table. - * The L2 part of a flow is stored in a hash table. + * The same data-struct is used for L2-flow table and L2-tunnel table. + * The L2 part of a flow or tunnel is stored in a hash table. * A flow that shares the same L2 key/mask with an - * already existing flow must refer to it's flow handle. + * already existing flow/tunnel must refer to it's flow handle or + * decap_filter_id respectively. */ struct bnxt_tc_l2_node { /* hash key: first 16b of key */ @@ -110,7 +158,7 @@ struct bnxt_tc_l2_node { /* a linked list of flows that share the same l2 key */ struct list_head common_l2_flows; - /* number of flows sharing the l2 key */ + /* number of flows/tunnels sharing the l2 key */ u16 refcount; struct rcu_head rcu; @@ -130,6 +178,16 @@ struct bnxt_tc_flow_node { /* for the shared_flows list maintained in l2_node */ struct list_head l2_list_node; + /* tunnel encap related */ + struct bnxt_tc_tunnel_node *encap_node; + + /* tunnel decap related */ + struct bnxt_tc_tunnel_node *decap_node; + /* L2 node in tunnel-l2 hashtable that shares flow's tunnel l2 key */ + struct bnxt_tc_l2_node *decap_l2_node; + /* for the shared_flows list maintained in tunnel decap l2_node */ + struct list_head decap_l2_list_node; + struct rcu_head rcu; }; From f484f6782e013138946122ae09c100c9e4b547e3 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 26 Oct 2017 11:51:30 -0400 Subject: [PATCH 1512/2177] bnxt_en: add hwrm FW cmds for cfa_encap_record and decap_filter Add routines for issuing the hwrm_cfa_encap_record_alloc/free and hwrm_cfa_decap_filter_alloc/free FW cmds needed for supporting vxlan encap/decap offload. Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 129 ++++++++++++++++++- 1 file changed, 125 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index f14edc9c1412..0d258d303eef 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -585,13 +585,85 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, __le32 ref_decap_handle, __le32 *decap_filter_handle) { - return 0; + struct hwrm_cfa_decap_filter_alloc_output *resp = + bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_decap_filter_alloc_input req = { 0 }; + struct ip_tunnel_key *tun_key = &flow->tun_key; + u32 enables = 0; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1); + + req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL); + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL; + req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN; + req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP; + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID; + /* tunnel_id is wrongly defined in hsi defn. as __le32 */ + req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id); + } + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR; + ether_addr_copy(req.dst_macaddr, l2_info->dmac); + ether_addr_copy(req.src_macaddr, l2_info->smac); + } + if (l2_info->num_vlans) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID; + req.t_ivlan_vid = l2_info->inner_vlan_tci; + } + + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE; + req.ethertype = htons(ETH_P_IP); + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE; + req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4; + req.dst_ipaddr[0] = tun_key->u.ipv4.dst; + req.src_ipaddr[0] = tun_key->u.ipv4.src; + } + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT; + req.dst_port = tun_key->tp_dst; + } + + /* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc + * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16. + */ + req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle; + req.enables = cpu_to_le32(enables); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *decap_filter_handle = resp->decap_filter_id; + else + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + mutex_unlock(&bp->hwrm_cmd_lock); + + return rc; } static int hwrm_cfa_decap_filter_free(struct bnxt *bp, __le32 decap_filter_handle) { - return 0; + struct hwrm_cfa_decap_filter_free_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1); + req.decap_filter_id = decap_filter_handle; + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + return rc; } static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, @@ -599,13 +671,62 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, struct bnxt_tc_l2_key *l2_info, __le32 *encap_record_handle) { - return 0; + struct hwrm_cfa_encap_record_alloc_output *resp = + bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_encap_record_alloc_input req = { 0 }; + struct hwrm_cfa_encap_data_vxlan *encap = + (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data; + struct hwrm_vxlan_ipv4_hdr *encap_ipv4 = + (struct hwrm_vxlan_ipv4_hdr *)encap->l3; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1); + + req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN; + + ether_addr_copy(encap->dst_mac_addr, l2_info->dmac); + ether_addr_copy(encap->src_mac_addr, l2_info->smac); + if (l2_info->num_vlans) { + encap->num_vlan_tags = l2_info->num_vlans; + encap->ovlan_tci = l2_info->inner_vlan_tci; + encap->ovlan_tpid = l2_info->inner_vlan_tpid; + } + + encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT; + encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT; + encap_ipv4->ttl = encap_key->ttl; + + encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst; + encap_ipv4->src_ip_addr = encap_key->u.ipv4.src; + encap_ipv4->protocol = IPPROTO_UDP; + + encap->dst_port = encap_key->tp_dst; + encap->vni = tunnel_id_to_key32(encap_key->tun_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *encap_record_handle = resp->encap_record_id; + else + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + mutex_unlock(&bp->hwrm_cmd_lock); + + return rc; } static int hwrm_cfa_encap_record_free(struct bnxt *bp, __le32 encap_record_handle) { - return 0; + struct hwrm_cfa_encap_record_free_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1); + req.encap_record_id = encap_record_handle; + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + return rc; } static int bnxt_tc_put_l2_node(struct bnxt *bp, From 5a84acbebb22f93dfc9ce1e5f0427c45c94acb33 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 26 Oct 2017 11:51:31 -0400 Subject: [PATCH 1513/2177] bnxt_en: query cfa flow stats periodically to compute 'lastused' attribute This patch implements periodic querying of cfa flow stats in batches to compute the 'lastused' attribute of TC flow stats. Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 14 + drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 256 +++++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 24 +- 4 files changed, 218 insertions(+), 85 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c76729122143..bbf6da389f86 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6979,6 +6979,11 @@ static void bnxt_timer(unsigned long data) set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); bnxt_queue_sp_work(bp); } + + if (bnxt_tc_flower_enabled(bp)) { + set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event); + bnxt_queue_sp_work(bp); + } bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } @@ -7069,6 +7074,10 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_get_port_module_status(bp); mutex_unlock(&bp->link_lock); } + + if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event)) + bnxt_tc_flow_stats_work(bp); + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They * must be the last functions to be called before exiting. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d88d864db7d4..b8343ee4182c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -955,6 +955,11 @@ struct bnxt_coal { u8 budget; }; +struct bnxt_tc_flow_stats { + u64 packets; + u64 bytes; +}; + struct bnxt_tc_info { bool enabled; @@ -980,6 +985,14 @@ struct bnxt_tc_info { */ struct mutex lock; + /* Fields used for batching stats query */ + struct rhashtable_iter iter; +#define BNXT_FLOW_STATS_BATCH_MAX 10 + struct bnxt_tc_stats_batch { + void *flow_node; + struct bnxt_tc_flow_stats hw_stats; + } stats_batch[BNXT_FLOW_STATS_BATCH_MAX]; + /* Stat counter mask (width) */ u64 bytes_mask; u64 packets_mask; @@ -1282,6 +1295,7 @@ struct bnxt { #define BNXT_GENEVE_ADD_PORT_SP_EVENT 12 #define BNXT_GENEVE_DEL_PORT_SP_EVENT 13 #define BNXT_LINK_SPEED_CHNG_SP_EVENT 14 +#define BNXT_FLOW_STATS_SP_EVENT 15 struct bnxt_pf_info pf; #ifdef CONFIG_BNXT_SRIOV diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 0d258d303eef..71828a5beefe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -504,81 +504,6 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, return rc; } -/* Add val to accum while handling a possible wraparound - * of val. Eventhough val is of type u64, its actual width - * is denoted by mask and will wrap-around beyond that width. - */ -static void accumulate_val(u64 *accum, u64 val, u64 mask) -{ -#define low_bits(x, mask) ((x) & (mask)) -#define high_bits(x, mask) ((x) & ~(mask)) - bool wrapped = val < low_bits(*accum, mask); - - *accum = high_bits(*accum, mask) + val; - if (wrapped) - *accum += (mask + 1); -} - -/* The HW counters' width is much less than 64bits. - * Handle possible wrap-around while updating the stat counters - */ -static void bnxt_flow_stats_fix_wraparound(struct bnxt_tc_info *tc_info, - struct bnxt_tc_flow_stats *stats, - struct bnxt_tc_flow_stats *hw_stats) -{ - accumulate_val(&stats->bytes, hw_stats->bytes, tc_info->bytes_mask); - accumulate_val(&stats->packets, hw_stats->packets, - tc_info->packets_mask); -} - -/* Fix possible wraparound of the stats queried from HW, calculate - * the delta from prev_stats, and also update the prev_stats. - * The HW flow stats are fetched under the hwrm_cmd_lock mutex. - * This routine is best called while under the mutex so that the - * stats processing happens atomically. - */ -static void bnxt_flow_stats_calc(struct bnxt_tc_info *tc_info, - struct bnxt_tc_flow *flow, - struct bnxt_tc_flow_stats *stats) -{ - struct bnxt_tc_flow_stats *acc_stats, *prev_stats; - - acc_stats = &flow->stats; - bnxt_flow_stats_fix_wraparound(tc_info, acc_stats, stats); - - prev_stats = &flow->prev_stats; - stats->bytes = acc_stats->bytes - prev_stats->bytes; - stats->packets = acc_stats->packets - prev_stats->packets; - *prev_stats = *acc_stats; -} - -static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, - __le16 flow_handle, - struct bnxt_tc_flow *flow, - struct bnxt_tc_flow_stats *stats) -{ - struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_cfa_flow_stats_input req = { 0 }; - int rc; - - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1); - req.num_flows = cpu_to_le16(1); - req.flow_handle_0 = flow_handle; - - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (!rc) { - stats->packets = le64_to_cpu(resp->packet_0); - stats->bytes = le64_to_cpu(resp->byte_0); - bnxt_flow_stats_calc(&bp->tc_info, flow, stats); - } else { - netdev_info(bp->dev, "error rc=%d", rc); - } - - mutex_unlock(&bp->hwrm_cmd_lock); - return rc; -} - static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, struct bnxt_tc_l2_key *l2_info, @@ -1306,6 +1231,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, if (rc) goto put_tunnel; + flow->lastused = jiffies; + spin_lock_init(&flow->stats_lock); /* add new flow to flow-table */ rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node, tc_info->flow_ht_params); @@ -1352,10 +1279,11 @@ static int bnxt_tc_del_flow(struct bnxt *bp, static int bnxt_tc_get_flow_stats(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd) { + struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats; struct bnxt_tc_info *tc_info = &bp->tc_info; struct bnxt_tc_flow_node *flow_node; - struct bnxt_tc_flow_stats stats; - int rc; + struct bnxt_tc_flow *flow; + unsigned long lastused; flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, @@ -1366,15 +1294,183 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, return -1; } - rc = bnxt_hwrm_cfa_flow_stats_get(bp, flow_node->flow_handle, - &flow_node->flow, &stats); + flow = &flow_node->flow; + curr_stats = &flow->stats; + prev_stats = &flow->prev_stats; + + spin_lock(&flow->stats_lock); + stats.packets = curr_stats->packets - prev_stats->packets; + stats.bytes = curr_stats->bytes - prev_stats->bytes; + *prev_stats = *curr_stats; + lastused = flow->lastused; + spin_unlock(&flow->stats_lock); + + tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, + lastused); + return 0; +} + +static int +bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, + struct bnxt_tc_stats_batch stats_batch[]) +{ + struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_flow_stats_input req = { 0 }; + __le16 *req_flow_handles = &req.flow_handle_0; + int rc, i; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1); + req.num_flows = cpu_to_le16(num_flows); + for (i = 0; i < num_flows; i++) { + struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node; + + req_flow_handles[i] = flow_node->flow_handle; + } + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + __le64 *resp_packets = &resp->packet_0; + __le64 *resp_bytes = &resp->byte_0; + + for (i = 0; i < num_flows; i++) { + stats_batch[i].hw_stats.packets = + le64_to_cpu(resp_packets[i]); + stats_batch[i].hw_stats.bytes = + le64_to_cpu(resp_bytes[i]); + } + } else { + netdev_info(bp->dev, "error rc=%d", rc); + } + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + +/* Add val to accum while handling a possible wraparound + * of val. Eventhough val is of type u64, its actual width + * is denoted by mask and will wrap-around beyond that width. + */ +static void accumulate_val(u64 *accum, u64 val, u64 mask) +{ +#define low_bits(x, mask) ((x) & (mask)) +#define high_bits(x, mask) ((x) & ~(mask)) + bool wrapped = val < low_bits(*accum, mask); + + *accum = high_bits(*accum, mask) + val; + if (wrapped) + *accum += (mask + 1); +} + +/* The HW counters' width is much less than 64bits. + * Handle possible wrap-around while updating the stat counters + */ +static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info, + struct bnxt_tc_flow_stats *acc_stats, + struct bnxt_tc_flow_stats *hw_stats) +{ + accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask); + accumulate_val(&acc_stats->packets, hw_stats->packets, + tc_info->packets_mask); +} + +static int +bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows, + struct bnxt_tc_stats_batch stats_batch[]) +{ + struct bnxt_tc_info *tc_info = &bp->tc_info; + int rc, i; + + rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch); if (rc) return rc; - tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, 0); + for (i = 0; i < num_flows; i++) { + struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node; + struct bnxt_tc_flow *flow = &flow_node->flow; + + spin_lock(&flow->stats_lock); + bnxt_flow_stats_accum(tc_info, &flow->stats, + &stats_batch[i].hw_stats); + if (flow->stats.packets != flow->prev_stats.packets) + flow->lastused = jiffies; + spin_unlock(&flow->stats_lock); + } + return 0; } +static int +bnxt_tc_flow_stats_batch_prep(struct bnxt *bp, + struct bnxt_tc_stats_batch stats_batch[], + int *num_flows) +{ + struct bnxt_tc_info *tc_info = &bp->tc_info; + struct rhashtable_iter *iter = &tc_info->iter; + void *flow_node; + int rc, i; + + rc = rhashtable_walk_start(iter); + if (rc && rc != -EAGAIN) { + i = 0; + goto done; + } + + rc = 0; + for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) { + flow_node = rhashtable_walk_next(iter); + if (IS_ERR(flow_node)) { + i = 0; + if (PTR_ERR(flow_node) == -EAGAIN) { + continue; + } else { + rc = PTR_ERR(flow_node); + goto done; + } + } + + /* No more flows */ + if (!flow_node) + goto done; + + stats_batch[i].flow_node = flow_node; + } +done: + rhashtable_walk_stop(iter); + *num_flows = i; + return rc; +} + +void bnxt_tc_flow_stats_work(struct bnxt *bp) +{ + struct bnxt_tc_info *tc_info = &bp->tc_info; + int num_flows, rc; + + num_flows = atomic_read(&tc_info->flow_table.nelems); + if (!num_flows) + return; + + rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter); + + for (;;) { + rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch, + &num_flows); + if (rc) { + if (rc == -EAGAIN) + continue; + break; + } + + if (!num_flows) + break; + + bnxt_tc_flow_stats_batch_update(bp, num_flows, + tc_info->stats_batch); + } + + rhashtable_walk_exit(&tc_info->iter); +} + int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *cls_flower) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 2beccd41c886..404f98dea0ab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -78,11 +78,6 @@ struct bnxt_tc_actions { struct ip_tunnel_key tun_encap_key; }; -struct bnxt_tc_flow_stats { - u64 packets; - u64 bytes; -}; - struct bnxt_tc_flow { u32 flags; #define BNXT_TC_FLOW_FLAGS_ETH_ADDRS BIT(1) @@ -119,6 +114,10 @@ struct bnxt_tc_flow { /* previous snap-shot of stats */ struct bnxt_tc_flow_stats prev_stats; unsigned long lastused; /* jiffies */ + /* for calculating delta from prev_stats and + * updating prev_stats atomically. + */ + spinlock_t stats_lock; }; /* Tunnel encap/decap hash table @@ -195,6 +194,12 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *cls_flower); int bnxt_init_tc(struct bnxt *bp); void bnxt_shutdown_tc(struct bnxt *bp); +void bnxt_tc_flow_stats_work(struct bnxt *bp); + +static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) +{ + return bp->tc_info.enabled; +} #else /* CONFIG_BNXT_FLOWER_OFFLOAD */ @@ -212,5 +217,14 @@ static inline int bnxt_init_tc(struct bnxt *bp) static inline void bnxt_shutdown_tc(struct bnxt *bp) { } + +static inline void bnxt_tc_flow_stats_work(struct bnxt *bp) +{ +} + +static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) +{ + return false; +} #endif /* CONFIG_BNXT_FLOWER_OFFLOAD */ #endif /* BNXT_TC_H */ From cd66358e52f74585f043ef63089727273b3421d3 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 26 Oct 2017 11:51:32 -0400 Subject: [PATCH 1514/2177] bnxt_en: alloc tc_info{} struct only when tc flower is enabled TC flower is not enabled on VFs and when there's no FW support. Alloc the tc_info{} struct at init time only when TC flower is being enabled. Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 47 +++++++++++-------- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 3 ++ 5 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bbf6da389f86..5ce950629ce9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7347,7 +7347,7 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct bnxt *bp = cb_priv; - if (BNXT_VF(bp)) + if (!bnxt_tc_flower_enabled(bp)) return -EOPNOTSUPP; switch (type) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index b8343ee4182c..5359a1f0045f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1346,7 +1346,7 @@ struct bnxt { enum devlink_eswitch_mode eswitch_mode; struct bnxt_vf_rep **vf_reps; /* array of vf-rep ptrs */ u16 *cfa_code_map; /* cfa_code -> vf_idx map */ - struct bnxt_tc_info tc_info; + struct bnxt_tc_info *tc_info; }; #define BNXT_RX_STATS_OFFSET(counter) \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 71828a5beefe..798d13964274 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -658,7 +658,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { struct bnxt_tc_l2_node *l2_node = flow_node->l2_node; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; /* remove flow_node from the L2 shared flow list */ @@ -714,7 +714,7 @@ bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, struct bnxt_tc_flow_node *flow_node, __le16 *ref_flow_handle) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *ref_flow_node; struct bnxt_tc_l2_node *l2_node; @@ -829,7 +829,7 @@ static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node, __le32 *ref_decap_handle) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *ref_flow_node; struct bnxt_tc_l2_node *decap_l2_node; @@ -867,7 +867,7 @@ static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; /* remove flow_node from the decap L2 sharing flow list */ @@ -886,7 +886,7 @@ static void bnxt_tc_put_decap_handle(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { __le32 decap_handle = flow_node->decap_node->tunnel_handle; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; if (flow_node->decap_l2_node) @@ -973,7 +973,7 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, __le32 *decap_filter_handle) { struct ip_tunnel_key *decap_key = &flow->tun_key; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_l2_key l2_info = { {0} }; struct bnxt_tc_tunnel_node *decap_node; struct ip_tunnel_key tun_key = { 0 }; @@ -1051,7 +1051,7 @@ static void bnxt_tc_put_encap_handle(struct bnxt *bp, struct bnxt_tc_tunnel_node *encap_node) { __le32 encap_handle = encap_node->tunnel_handle; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, @@ -1070,7 +1070,7 @@ static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, __le32 *encap_handle) { struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_tunnel_node *encap_node; int rc; @@ -1137,7 +1137,7 @@ static int bnxt_tc_get_tunnel_handle(struct bnxt *bp, static int __bnxt_tc_del_flow(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; /* send HWRM cmd to free the flow-id */ @@ -1180,7 +1180,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *tc_flow_cmd) { struct bnxt_tc_flow_node *new_node, *old_node; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow *flow; __le32 tunnel_handle = 0; __le16 ref_flow_handle; @@ -1261,7 +1261,7 @@ done: static int bnxt_tc_del_flow(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *flow_node; flow_node = rhashtable_lookup_fast(&tc_info->flow_table, @@ -1280,7 +1280,7 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd) { struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *flow_node; struct bnxt_tc_flow *flow; unsigned long lastused; @@ -1378,7 +1378,7 @@ static int bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows, struct bnxt_tc_stats_batch stats_batch[]) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc, i; rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch); @@ -1405,7 +1405,7 @@ bnxt_tc_flow_stats_batch_prep(struct bnxt *bp, struct bnxt_tc_stats_batch stats_batch[], int *num_flows) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct rhashtable_iter *iter = &tc_info->iter; void *flow_node; int rc, i; @@ -1443,7 +1443,7 @@ done: void bnxt_tc_flow_stats_work(struct bnxt *bp) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int num_flows, rc; num_flows = atomic_read(&tc_info->flow_table.nelems); @@ -1528,7 +1528,7 @@ static const struct rhashtable_params bnxt_tc_tunnel_ht_params = { int bnxt_init_tc(struct bnxt *bp) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info; int rc; if (bp->hwrm_spec_code < 0x10803) { @@ -1536,6 +1536,10 @@ int bnxt_init_tc(struct bnxt *bp) "Firmware does not support TC flower offload.\n"); return -ENOTSUPP; } + + tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL); + if (!tc_info) + return -ENOMEM; mutex_init(&tc_info->lock); /* Counter widths are programmed by FW */ @@ -1545,7 +1549,7 @@ int bnxt_init_tc(struct bnxt *bp) tc_info->flow_ht_params = bnxt_tc_flow_ht_params; rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params); if (rc) - return rc; + goto free_tc_info; tc_info->l2_ht_params = bnxt_tc_l2_ht_params; rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params); @@ -1573,6 +1577,7 @@ int bnxt_init_tc(struct bnxt *bp) tc_info->enabled = true; bp->dev->hw_features |= NETIF_F_HW_TC; bp->dev->features |= NETIF_F_HW_TC; + bp->tc_info = tc_info; return 0; destroy_decap_table: @@ -1583,14 +1588,16 @@ destroy_l2_table: rhashtable_destroy(&tc_info->l2_table); destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); +free_tc_info: + kfree(tc_info); return rc; } void bnxt_shutdown_tc(struct bnxt *bp) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; - if (!tc_info->enabled) + if (!bnxt_tc_flower_enabled(bp)) return; rhashtable_destroy(&tc_info->flow_table); @@ -1598,4 +1605,6 @@ void bnxt_shutdown_tc(struct bnxt *bp) rhashtable_destroy(&tc_info->decap_l2_table); rhashtable_destroy(&tc_info->decap_table); rhashtable_destroy(&tc_info->encap_table); + kfree(tc_info); + bp->tc_info = NULL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 404f98dea0ab..97e09a880693 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -198,7 +198,7 @@ void bnxt_tc_flow_stats_work(struct bnxt *bp); static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) { - return bp->tc_info.enabled; + return bp->tc_info && bp->tc_info->enabled; } #else /* CONFIG_BNXT_FLOWER_OFFLOAD */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 4ae935999ebe..c1761ed5785e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -124,6 +124,9 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type, struct bnxt *bp = vf_rep->bp; int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid; + if (!bnxt_tc_flower_enabled(vf_rep->bp)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return bnxt_tc_setup_flower(bp, vf_fid, type_data); From cc49c8ff68144d64de18fc5d0039356e0060c0e4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Oct 2017 09:11:48 -0700 Subject: [PATCH 1515/2177] MAINAINTERS: Add Doug as GENET maintainer Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c9ee7abf4627..3e95dd7db02c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2897,6 +2897,7 @@ F: drivers/gpio/gpio-brcmstb.c F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt BROADCOM GENET ETHERNET DRIVER +M: Doug Berger M: Florian Fainelli L: netdev@vger.kernel.org S: Supported From 2ffbbf0f91288f909b3d495cbf029d8e4cc7db66 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 26 Oct 2017 11:06:46 -0600 Subject: [PATCH 1516/2177] net: qualcomm: rmnet: Fix the return value of rmnet_rx_handler() Since packet is always consumed by rmnet_rx_handler(), we always return RX_HANDLER_CONSUMED. There is no need to pass on this value through multiple functions. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 31 +++++++------------ .../net/ethernet/qualcomm/rmnet/rmnet_map.h | 3 +- .../qualcomm/rmnet/rmnet_map_command.c | 4 +-- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index df3d2d16ce55..5dd186d4d0e4 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -43,7 +43,7 @@ static void rmnet_set_skb_proto(struct sk_buff *skb) /* Generic handler */ -static rx_handler_result_t +static void rmnet_deliver_skb(struct sk_buff *skb) { skb_reset_transport_header(skb); @@ -53,12 +53,11 @@ rmnet_deliver_skb(struct sk_buff *skb) skb->pkt_type = PACKET_HOST; skb_set_mac_header(skb, 0); netif_receive_skb(skb); - return RX_HANDLER_CONSUMED; } /* MAP handler */ -static rx_handler_result_t +static void __rmnet_map_ingress_handler(struct sk_buff *skb, struct rmnet_port *port) { @@ -91,31 +90,27 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, skb_pull(skb, sizeof(struct rmnet_map_header)); skb_trim(skb, len); rmnet_set_skb_proto(skb); - return rmnet_deliver_skb(skb); + rmnet_deliver_skb(skb); + return; free_skb: kfree_skb(skb); - return RX_HANDLER_CONSUMED; } -static rx_handler_result_t +static void rmnet_map_ingress_handler(struct sk_buff *skb, struct rmnet_port *port) { struct sk_buff *skbn; - int rc; if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) { while ((skbn = rmnet_map_deaggregate(skb)) != NULL) __rmnet_map_ingress_handler(skbn, port); consume_skb(skb); - rc = RX_HANDLER_CONSUMED; } else { - rc = __rmnet_map_ingress_handler(skb, port); + __rmnet_map_ingress_handler(skb, port); } - - return rc; } static int rmnet_map_egress_handler(struct sk_buff *skb, @@ -149,15 +144,13 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, return RMNET_MAP_SUCCESS; } -static rx_handler_result_t +static void rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) { if (bridge_dev) { skb->dev = bridge_dev; dev_queue_xmit(skb); } - - return RX_HANDLER_CONSUMED; } /* Ingress / Egress Entry Points */ @@ -168,13 +161,12 @@ rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) */ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) { - int rc = RX_HANDLER_CONSUMED; struct sk_buff *skb = *pskb; struct rmnet_port *port; struct net_device *dev; if (!skb) - return RX_HANDLER_CONSUMED; + goto done; dev = skb->dev; port = rmnet_get_port(dev); @@ -182,14 +174,15 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) switch (port->rmnet_mode) { case RMNET_EPMODE_VND: if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) - rc = rmnet_map_ingress_handler(skb, port); + rmnet_map_ingress_handler(skb, port); break; case RMNET_EPMODE_BRIDGE: - rc = rmnet_bridge_handler(skb, port->bridge_ep); + rmnet_bridge_handler(skb, port->bridge_ep); break; } - return rc; +done: + return RX_HANDLER_CONSUMED; } /* Modifies packet as per logical endpoint configuration and egress data format diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h index ce2302c25b12..3af3fe7b5457 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -80,7 +80,6 @@ u8 rmnet_map_demultiplex(struct sk_buff *skb); struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb); struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, int hdrlen, int pad); -rx_handler_result_t rmnet_map_command(struct sk_buff *skb, - struct rmnet_port *port); +void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port); #endif /* _RMNET_MAP_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c index 74d362f71cce..51e604923ac1 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -76,8 +76,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb, /* Process MAP command frame and send N/ACK message as appropriate. Message cmd * name is decoded here and appropriate handler is called. */ -rx_handler_result_t rmnet_map_command(struct sk_buff *skb, - struct rmnet_port *port) +void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port) { struct rmnet_map_control_command *cmd; unsigned char command_name; @@ -102,5 +101,4 @@ rx_handler_result_t rmnet_map_command(struct sk_buff *skb, } if (rc == RMNET_MAP_COMMAND_ACK) rmnet_map_send_ack(skb, rc); - return RX_HANDLER_CONSUMED; } From 85355d775ff70d9ba77a5f94dd786b23852e1c72 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 26 Oct 2017 11:06:47 -0600 Subject: [PATCH 1517/2177] net: qualcomm: rmnet: Always assign rmnet dev in deaggregation path The rmnet device needs to assigned for all packets in the deaggregation path based on the mux id, so the check is not needed. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 5dd186d4d0e4..1ea978335da3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -83,8 +83,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, if (!ep) goto free_skb; - if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING) - skb->dev = ep->egress_dev; + skb->dev = ep->egress_dev; /* Subtract MAP header */ skb_pull(skb, sizeof(struct rmnet_map_header)); From 192c4b5d48f2ae25a4ce323b4cb8b024fac3efd2 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 26 Oct 2017 11:06:48 -0600 Subject: [PATCH 1518/2177] net: qualcomm: rmnet: Add support for 64 bit stats Implement 64 bit per cpu stats. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.h | 14 ++++ .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 76 +++++++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 60115e69e415..9586703d2d58 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -41,9 +41,23 @@ struct rmnet_port { extern struct rtnl_link_ops rmnet_link_ops; +struct rmnet_vnd_stats { + u64 rx_pkts; + u64 rx_bytes; + u64 tx_pkts; + u64 tx_bytes; + u32 tx_drops; +}; + +struct rmnet_pcpu_stats { + struct rmnet_vnd_stats stats; + struct u64_stats_sync syncp; +}; + struct rmnet_priv { u8 mux_id; struct net_device *real_dev; + struct rmnet_pcpu_stats __percpu *pcpu_stats; }; struct rmnet_port *rmnet_get_port(struct net_device *real_dev); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 12bd0bbd5235..b0befa18cb10 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -27,14 +27,28 @@ void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev) { - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_pcpu_stats *pcpu_ptr; + + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + u64_stats_update_begin(&pcpu_ptr->syncp); + pcpu_ptr->stats.rx_pkts++; + pcpu_ptr->stats.rx_bytes += skb->len; + u64_stats_update_end(&pcpu_ptr->syncp); } void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_pcpu_stats *pcpu_ptr; + + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + u64_stats_update_begin(&pcpu_ptr->syncp); + pcpu_ptr->stats.tx_pkts++; + pcpu_ptr->stats.tx_bytes += skb->len; + u64_stats_update_end(&pcpu_ptr->syncp); } /* Network Device Operations */ @@ -48,7 +62,7 @@ static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb, if (priv->real_dev) { rmnet_egress_handler(skb); } else { - dev->stats.tx_dropped++; + this_cpu_inc(priv->pcpu_stats->stats.tx_drops); kfree_skb(skb); } return NETDEV_TX_OK; @@ -70,12 +84,64 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev) return priv->real_dev->ifindex; } +static int rmnet_vnd_init(struct net_device *dev) +{ + struct rmnet_priv *priv = netdev_priv(dev); + + priv->pcpu_stats = alloc_percpu(struct rmnet_pcpu_stats); + if (!priv->pcpu_stats) + return -ENOMEM; + + return 0; +} + +static void rmnet_vnd_uninit(struct net_device *dev) +{ + struct rmnet_priv *priv = netdev_priv(dev); + + free_percpu(priv->pcpu_stats); +} + +static void rmnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_vnd_stats total_stats; + struct rmnet_pcpu_stats *pcpu_ptr; + unsigned int cpu, start; + + memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); + + for_each_possible_cpu(cpu) { + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + do { + start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); + total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts; + total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes; + total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts; + total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes; + } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); + + total_stats.tx_drops += pcpu_ptr->stats.tx_drops; + } + + s->rx_packets = total_stats.rx_pkts; + s->rx_bytes = total_stats.rx_bytes; + s->tx_packets = total_stats.tx_pkts; + s->tx_bytes = total_stats.tx_bytes; + s->tx_dropped = total_stats.tx_drops; +} + static const struct net_device_ops rmnet_vnd_ops = { .ndo_start_xmit = rmnet_vnd_start_xmit, .ndo_change_mtu = rmnet_vnd_change_mtu, .ndo_get_iflink = rmnet_vnd_get_iflink, .ndo_add_slave = rmnet_add_bridge, .ndo_del_slave = rmnet_del_bridge, + .ndo_init = rmnet_vnd_init, + .ndo_uninit = rmnet_vnd_uninit, + .ndo_get_stats64 = rmnet_get_stats64, }; /* Called by kernel whenever a new rmnet device is created. Sets MTU, From ca32fb034c19e00cfb5e0fd7217eb92f81302048 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 26 Oct 2017 11:06:49 -0600 Subject: [PATCH 1519/2177] net: qualcomm: rmnet: Add support for GRO Add gro_cells so that rmnet devices can call gro_cells_receive instead of netif_receive_skb. Signed-off-by: Subash Abhinov Kasiviswanathan Cc: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/Kconfig | 1 + drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 2 ++ drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 4 +++- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 8 ++++++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig index 6e2587af47a4..9bb06d284644 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/Kconfig +++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig @@ -5,6 +5,7 @@ menuconfig RMNET tristate "RmNet MAP driver" default n + select GRO_CELLS ---help--- If you select this, you will enable the RMNET module which is used for handling data in the multiplexing and aggregation protocol (MAP) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 9586703d2d58..c19259eea99e 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -14,6 +14,7 @@ */ #include +#include #ifndef _RMNET_CONFIG_H_ #define _RMNET_CONFIG_H_ @@ -58,6 +59,7 @@ struct rmnet_priv { u8 mux_id; struct net_device *real_dev; struct rmnet_pcpu_stats __percpu *pcpu_stats; + struct gro_cells gro_cells; }; struct rmnet_port *rmnet_get_port(struct net_device *real_dev); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 1ea978335da3..29842ccc91a9 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -46,13 +46,15 @@ static void rmnet_set_skb_proto(struct sk_buff *skb) static void rmnet_deliver_skb(struct sk_buff *skb) { + struct rmnet_priv *priv = netdev_priv(skb->dev); + skb_reset_transport_header(skb); skb_reset_network_header(skb); rmnet_vnd_rx_fixup(skb, skb->dev); skb->pkt_type = PACKET_HOST; skb_set_mac_header(skb, 0); - netif_receive_skb(skb); + gro_cells_receive(&priv->gro_cells, skb); } /* MAP handler */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index b0befa18cb10..9caa5e387450 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -87,11 +87,18 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev) static int rmnet_vnd_init(struct net_device *dev) { struct rmnet_priv *priv = netdev_priv(dev); + int err; priv->pcpu_stats = alloc_percpu(struct rmnet_pcpu_stats); if (!priv->pcpu_stats) return -ENOMEM; + err = gro_cells_init(&priv->gro_cells, dev); + if (err) { + free_percpu(priv->pcpu_stats); + return err; + } + return 0; } @@ -99,6 +106,7 @@ static void rmnet_vnd_uninit(struct net_device *dev) { struct rmnet_priv *priv = netdev_priv(dev); + gro_cells_destroy(&priv->gro_cells); free_percpu(priv->pcpu_stats); } From c26eba03e4073bd32ef6c0ea2ba2a3ff5eed11da Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 26 Oct 2017 16:23:25 -0500 Subject: [PATCH 1520/2177] ibmvnic: Update reset infrastructure to support tunable parameters Update ibmvnic reset infrastructure to include a new reset option that will allow changing of tunable parameters. There currently is no way to request different capabilities from the vnic server on the fly so this patch achieves this by resetting the driver and attempting to log in with the requested changes. If the reset operation fails, the old values of the tunable parameters are stored in the "fallback" struct and we attempt to login with the fallback values. Signed-off-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 211 +++++++++++++++++++++++++---- drivers/net/ethernet/ibm/ibmvnic.h | 24 +++- 2 files changed, 208 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 11eba8277132..3d0280196fdc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -115,6 +115,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int ibmvnic_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); +static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -926,6 +927,11 @@ static int ibmvnic_open(struct net_device *netdev) mutex_lock(&adapter->reset_lock); + if (adapter->mac_change_pending) { + __ibmvnic_set_mac(netdev, &adapter->desired.mac); + adapter->mac_change_pending = false; + } + if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) { @@ -1426,7 +1432,7 @@ static void ibmvnic_set_multi(struct net_device *netdev) } } -static int ibmvnic_set_mac(struct net_device *netdev, void *p) +static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; @@ -1444,6 +1450,22 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return 0; } +static int ibmvnic_set_mac(struct net_device *netdev, void *p) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + + if (adapter->state != VNIC_OPEN) { + memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr)); + adapter->mac_change_pending = true; + return 0; + } + + __ibmvnic_set_mac(netdev, addr); + + return 0; +} + /** * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. @@ -1470,6 +1492,13 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rc) return rc; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM || + adapter->wait_for_reset) { + release_resources(adapter); + release_sub_crqs(adapter); + release_crq_queue(adapter); + } + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { /* remove the closed state so when we call open it appears * we are coming from the probed state. @@ -1492,16 +1521,23 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } - rc = reset_tx_pools(adapter); - if (rc) - return rc; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM || + adapter->wait_for_reset) { + rc = init_resources(adapter); + if (rc) + return rc; + } else { + rc = reset_tx_pools(adapter); + if (rc) + return rc; - rc = reset_rx_pools(adapter); - if (rc) - return rc; + rc = reset_rx_pools(adapter); + if (rc) + return rc; - if (reset_state == VNIC_CLOSED) - return 0; + if (reset_state == VNIC_CLOSED) + return 0; + } } rc = __ibmvnic_open(netdev); @@ -1561,7 +1597,7 @@ static void __ibmvnic_reset(struct work_struct *work) struct ibmvnic_adapter *adapter; struct net_device *netdev; u32 reset_state; - int rc; + int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); netdev = adapter->netdev; @@ -1580,6 +1616,12 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); } + if (adapter->wait_for_reset) { + adapter->wait_for_reset = false; + adapter->reset_done_rc = rc; + complete(&adapter->reset_done); + } + if (rc) { netdev_dbg(adapter->netdev, "Reset failed\n"); free_all_rwi(adapter); @@ -1759,9 +1801,42 @@ static void ibmvnic_netpoll_controller(struct net_device *dev) } #endif +static int wait_for_reset(struct ibmvnic_adapter *adapter) +{ + adapter->fallback.mtu = adapter->req_mtu; + adapter->fallback.rx_queues = adapter->req_rx_queues; + adapter->fallback.tx_queues = adapter->req_tx_queues; + adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq; + adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq; + + init_completion(&adapter->reset_done); + ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + adapter->wait_for_reset = true; + wait_for_completion(&adapter->reset_done); + + if (adapter->reset_done_rc) { + adapter->desired.mtu = adapter->fallback.mtu; + adapter->desired.rx_queues = adapter->fallback.rx_queues; + adapter->desired.tx_queues = adapter->fallback.tx_queues; + adapter->desired.rx_entries = adapter->fallback.rx_entries; + adapter->desired.tx_entries = adapter->fallback.tx_entries; + + init_completion(&adapter->reset_done); + ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + wait_for_completion(&adapter->reset_done); + } + adapter->wait_for_reset = false; + + return adapter->reset_done_rc; +} + static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) { - return -EOPNOTSUPP; + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.mtu = new_mtu + ETH_HLEN; + + return wait_for_reset(adapter); } static const struct net_device_ops ibmvnic_netdev_ops = { @@ -1849,6 +1924,27 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, ring->rx_jumbo_pending = 0; } +static int ibmvnic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + + adapter->desired.rx_entries = ring->rx_pending; + adapter->desired.tx_entries = ring->tx_pending; + + return wait_for_reset(adapter); +} + static void ibmvnic_get_channels(struct net_device *netdev, struct ethtool_channels *channels) { @@ -1864,6 +1960,17 @@ static void ibmvnic_get_channels(struct net_device *netdev, channels->combined_count = 0; } +static int ibmvnic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.rx_queues = channels->rx_count; + adapter->desired.tx_queues = channels->tx_count; + + return wait_for_reset(adapter); +} + static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct ibmvnic_adapter *adapter = netdev_priv(dev); @@ -1960,7 +2067,9 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .set_msglevel = ibmvnic_set_msglevel, .get_link = ibmvnic_get_link, .get_ringparam = ibmvnic_get_ringparam, + .set_ringparam = ibmvnic_set_ringparam, .get_channels = ibmvnic_get_channels, + .set_channels = ibmvnic_set_channels, .get_strings = ibmvnic_get_strings, .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, @@ -2426,6 +2535,7 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) { struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; + int max_entries; if (!retry) { /* Sub-CRQ entries are 32 byte long */ @@ -2437,21 +2547,60 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) return; } - /* Get the minimum between the queried max and the entries - * that fit in our PAGE_SIZE - */ - adapter->req_tx_entries_per_subcrq = - adapter->max_tx_entries_per_subcrq > entries_page ? - entries_page : adapter->max_tx_entries_per_subcrq; - adapter->req_rx_add_entries_per_subcrq = - adapter->max_rx_add_entries_per_subcrq > entries_page ? - entries_page : adapter->max_rx_add_entries_per_subcrq; + if (adapter->desired.mtu) + adapter->req_mtu = adapter->desired.mtu; + else + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; + + if (!adapter->desired.tx_entries) + adapter->desired.tx_entries = + adapter->max_tx_entries_per_subcrq; + if (!adapter->desired.rx_entries) + adapter->desired.rx_entries = + adapter->max_rx_add_entries_per_subcrq; + + max_entries = IBMVNIC_MAX_LTB_SIZE / + (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries = max_entries; + } + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries = max_entries; + } + + if (adapter->desired.tx_entries) + adapter->req_tx_entries_per_subcrq = + adapter->desired.tx_entries; + else + adapter->req_tx_entries_per_subcrq = + adapter->max_tx_entries_per_subcrq; + + if (adapter->desired.rx_entries) + adapter->req_rx_add_entries_per_subcrq = + adapter->desired.rx_entries; + else + adapter->req_rx_add_entries_per_subcrq = + adapter->max_rx_add_entries_per_subcrq; + + if (adapter->desired.tx_queues) + adapter->req_tx_queues = + adapter->desired.tx_queues; + else + adapter->req_tx_queues = + adapter->opt_tx_comp_sub_queues; + + if (adapter->desired.rx_queues) + adapter->req_rx_queues = + adapter->desired.rx_queues; + else + adapter->req_rx_queues = + adapter->opt_rx_comp_queues; - adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues; - adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; - - adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; } memset(&crq, 0, sizeof(crq)); @@ -3272,6 +3421,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; + struct net_device *netdev = adapter->netdev; struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; struct ibmvnic_login_buffer *login = adapter->login_buf; int i; @@ -3291,6 +3441,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", @@ -3846,7 +3998,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) unsigned long timeout = msecs_to_jiffies(30000); int rc; - if (adapter->resetting) { + if (adapter->resetting && !adapter->wait_for_reset) { rc = ibmvnic_reset_crq(adapter); if (!rc) rc = vio_enable_interrupts(adapter->vdev); @@ -3880,7 +4032,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) return -1; } - if (adapter->resetting) + if (adapter->resetting && !adapter->wait_for_reset) rc = reset_sub_crq_queues(adapter); else rc = init_sub_crqs(adapter); @@ -3949,6 +4101,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) mutex_init(&adapter->rwi_lock); adapter->resetting = false; + adapter->mac_change_pending = false; + do { rc = ibmvnic_init(adapter); if (rc && rc != EAGAIN) @@ -3956,6 +4110,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } while (rc == EAGAIN); netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) @@ -3970,6 +4126,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_info(&dev->dev, "ibmvnic registered\n"); adapter->state = VNIC_PROBED; + + adapter->wait_for_reset = false; + return 0; ibmvnic_register_fail: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 7aa347a21e78..27107f33755b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -42,6 +42,9 @@ #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 +#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) +#define IBMVNIC_BUFFER_HLEN 500 + struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -945,13 +948,23 @@ enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, VNIC_RESET_MOBILITY, VNIC_RESET_FATAL, VNIC_RESET_NON_FATAL, - VNIC_RESET_TIMEOUT}; + VNIC_RESET_TIMEOUT, + VNIC_RESET_CHANGE_PARAM}; struct ibmvnic_rwi { enum ibmvnic_reset_reason reset_reason; struct list_head list; }; +struct ibmvnic_tunables { + u64 rx_queues; + u64 tx_queues; + u64 rx_entries; + u64 tx_entries; + u64 mtu; + struct sockaddr mac; +}; + struct ibmvnic_adapter { struct vio_dev *vdev; struct net_device *netdev; @@ -1012,6 +1025,10 @@ struct ibmvnic_adapter { struct completion fw_done; int fw_done_rc; + struct completion reset_done; + int reset_done_rc; + bool wait_for_reset; + /* partner capabilities */ u64 min_tx_queues; u64 min_rx_queues; @@ -1056,4 +1073,9 @@ struct ibmvnic_adapter { struct work_struct ibmvnic_reset; bool resetting; bool napi_enabled, from_passive_init; + + bool mac_change_pending; + + struct ibmvnic_tunables desired; + struct ibmvnic_tunables fallback; }; From 2a1bf51111975846f412f47449edefdf6fa17ee4 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 26 Oct 2017 16:24:15 -0500 Subject: [PATCH 1521/2177] ibmvnic: Fix failover error path for non-fatal resets For all non-fatal reset conditions, the hypervisor will send a failover when we attempt to initialize the crq and the vnic client is expected to handle that failover instead of the existing non-fatal reset. To handle this, we need to return from init with a return code that indicates that we have hit this case. Signed-off-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- drivers/net/ethernet/ibm/ibmvnic.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 3d0280196fdc..d0cff2807d0b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1507,7 +1507,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = ibmvnic_init(adapter); if (rc) - return 0; + return IBMVNIC_INIT_FAILED; /* If the adapter was in PROBE state prior to the reset, * exit here. @@ -1610,7 +1610,7 @@ static void __ibmvnic_reset(struct work_struct *work) while (rwi) { rc = do_reset(adapter, rwi, reset_state); kfree(rwi); - if (rc) + if (rc && rc != IBMVNIC_INIT_FAILED) break; rwi = get_next_rwi(adapter); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 27107f33755b..4670af80d612 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -30,6 +30,8 @@ #define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 #define IBMVNIC_STATS_TIMEOUT 1 +#define IBMVNIC_INIT_FAILED 2 + /* basic structures plus 100 2k buffers */ #define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305 From c859e21a35ce5604dde0b618169680aa3c7e3bdb Mon Sep 17 00:00:00 2001 From: Intiyaz Basha Date: Thu, 26 Oct 2017 16:18:20 -0700 Subject: [PATCH 1522/2177] liquidio: xmit_more support Defer ringing the Tx doorbell if skb->xmit_more is set unless the Tx queue is full or stopped. To keep latency low, use a deferral limit of 8 packets. We chose 8 because Octeon can fetch at most 8 packets in a single PCI read, and our tests show that 8 results in low latency. Signed-off-by: Intiyaz Basha Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_core.c | 6 ++++-- .../net/ethernet/cavium/liquidio/lio_main.c | 18 ++++++++++++------ .../net/ethernet/cavium/liquidio/lio_vf_main.c | 17 ++++++++++++----- .../ethernet/cavium/liquidio/octeon_config.h | 2 ++ .../net/ethernet/cavium/liquidio/octeon_iq.h | 3 +++ .../net/ethernet/cavium/liquidio/octeon_main.h | 2 +- .../net/ethernet/cavium/liquidio/octeon_nic.c | 5 +++-- .../net/ethernet/cavium/liquidio/octeon_nic.h | 3 ++- .../ethernet/cavium/liquidio/request_manager.c | 18 ++++++++++++++++-- 9 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 23f6b60030c5..b891d858e416 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -91,7 +91,7 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype, *bytes_compl += skb->len; } -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) +int octeon_report_sent_bytes_to_bql(void *buf, int reqtype) { struct octnet_buf_free_info *finfo; struct sk_buff *skb; @@ -112,11 +112,13 @@ void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) break; default: - return; + return 0; } txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); netdev_tx_sent_queue(txq, skb->len); + + return netif_xmit_stopped(txq); } void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index accd038f3f34..8ea24d68e824 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2594,7 +2594,8 @@ static void handle_timestamp(struct octeon_device *oct, */ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo) + struct octnet_buf_free_info *finfo, + int xmit_more) { int retval; struct octeon_soft_command *sc; @@ -2629,7 +2630,7 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, len = (u32)((struct octeon_instr_ih2 *) (&sc->cmd.cmd2.ih2))->dlengsz; - ring_doorbell = 1; + ring_doorbell = !xmit_more; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -2663,7 +2664,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) union tx_info *tx_info; int status = 0; int q_idx = 0, iq_no = 0; - int j; + int j, xmit_more = 0; u64 dptr = 0; u32 tag = 0; @@ -2868,17 +2869,19 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & 0xfff; } + xmit_more = skb->xmit_more; + if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo); + status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); else - status = octnet_send_nic_data_pkt(oct, &ndata); + status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n"); if (status == IQ_SEND_STOP) - stop_q(lio->netdev, q_idx); + stop_q(netdev, q_idx); netif_trans_update(netdev); @@ -2897,6 +2900,9 @@ lio_xmit_failed: if (dptr) dma_unmap_single(&oct->pci_dev->dev, dptr, ndata.datasize, DMA_TO_DEVICE); + + octeon_ring_doorbell_locked(oct, iq_no); + tx_buffer_free(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 4c3b5688529b..00c19306ecee 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1690,7 +1690,8 @@ static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf) */ static int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo) + struct octnet_buf_free_info *finfo, + int xmit_more) { struct octeon_soft_command *sc; int ring_doorbell; @@ -1720,7 +1721,7 @@ static int send_nic_timestamp_pkt(struct octeon_device *oct, len = (u32)((struct octeon_instr_ih3 *)(&sc->cmd.cmd3.ih3))->dlengsz; - ring_doorbell = 1; + ring_doorbell = !xmit_more; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -1752,6 +1753,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) struct octeon_device *oct; int q_idx = 0, iq_no = 0; union tx_info *tx_info; + int xmit_more = 0; struct lio *lio; int status = 0; u64 dptr = 0; @@ -1940,10 +1942,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & VLAN_VID_MASK; } + xmit_more = skb->xmit_more; + if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo); + status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); else - status = octnet_send_nic_data_pkt(oct, &ndata); + status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; @@ -1952,7 +1956,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) if (status == IQ_SEND_STOP) { dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n", iq_no); - stop_q(lio->netdev, q_idx); + stop_q(netdev, q_idx); } netif_trans_update(netdev); @@ -1972,6 +1976,9 @@ lio_xmit_failed: if (dptr) dma_unmap_single(&oct->pci_dev->dev, dptr, ndata.datasize, DMA_TO_DEVICE); + + octeon_ring_doorbell_locked(oct, iq_no); + tx_buffer_free(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index 63bd9c94e547..ceac74388e09 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -37,6 +37,8 @@ #define MAX_OCTEON_LINKS MAX_OCTEON_NICIF #define MAX_OCTEON_MULTICAST_ADDR 32 +#define MAX_OCTEON_FILL_COUNT 8 + /* CN6xxx IQ configuration macros */ #define CN6XXX_MAX_INPUT_QUEUES 32 #define CN6XXX_MAX_IQ_DESCRIPTORS 2048 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 5c3c8da976f7..81c987682941 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -343,6 +343,9 @@ int octeon_delete_instr_queue(struct octeon_device *octeon_dev, u32 iq_no); int lio_wait_for_instr_fetch(struct octeon_device *oct); +void +octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no); + int octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype, void (*fn)(void *)); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index 32ef3a7d88d8..c846eec11a45 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -63,7 +63,7 @@ struct octnet_buf_free_info { }; /* BQL-related functions */ -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype); +int octeon_report_sent_bytes_to_bql(void *buf, int reqtype); void octeon_update_tx_completion_counters(void *buf, int reqtype, unsigned int *pkts_compl, unsigned int *bytes_compl); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index b457cf23fce6..150609bd8849 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -82,9 +82,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, } int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata) + struct octnic_data_pkt *ndata, + int xmit_more) { - int ring_doorbell = 1; + int ring_doorbell = !xmit_more; return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd, ndata->buf, ndata->datasize, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index 6480ef863441..de4130d26a98 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -279,7 +279,8 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, * queue should be stopped, and IQ_SEND_OK if it sent okay. */ int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata); + struct octnic_data_pkt *ndata, + int xmit_more); /** Send a NIC control packet to the device * @param oct - octeon device pointer diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 1e0fbce86d60..a10459742ae4 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -278,6 +278,18 @@ ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq) } } +void +octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no) +{ + struct octeon_instr_queue *iq; + + iq = oct->instr_queue[iq_no]; + spin_lock(&iq->post_lock); + if (iq->fill_cnt) + ring_doorbell(oct, iq); + spin_unlock(&iq->post_lock); +} + static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq, u8 *cmd) { @@ -543,6 +555,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, u32 force_db, void *cmd, void *buf, u32 datasize, u32 reqtype) { + int xmit_stopped; struct iq_post_status st; struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; @@ -554,12 +567,13 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, st = __post_command2(iq, cmd); if (st.status != IQ_SEND_FAILED) { - octeon_report_sent_bytes_to_bql(buf, reqtype); + xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype); __add_to_request_list(iq, st.index, buf, reqtype); INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize); INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1); - if (force_db) + if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db || + xmit_stopped || st.status == IQ_SEND_STOP) ring_doorbell(oct, iq); } else { INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1); From 26aa0459fad28725aa0bc12a3615cc9a0bd7118f Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Mon, 16 Oct 2017 18:01:23 -0700 Subject: [PATCH 1523/2177] net/sched: Check for null dev_queue on create flow In qdisc_alloc() the dev_queue pointer was used without any checks being performed. If qdisc_create() gets a null dev_queue pointer, it just passes it along to qdisc_alloc(), leading to a crash. That happens if a root qdisc implements select_queue() and returns a null dev_queue pointer for an "invalid handle", for example, or if the dev_queue associated with the parent qdisc is null. This patch is in preparation for the next in this series, where select_queue() is being added to mqprio and as it may return a null dev_queue. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- net/sched/sch_generic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6ced7c89c141..aa74aa42b5d7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -603,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc *sch; unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; - struct net_device *dev = dev_queue->dev; + struct net_device *dev; + if (!dev_queue) { + err = -EINVAL; + goto errout; + } + + dev = dev_queue->dev; p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); From ce8a75f60b75831947569229616cf1bc94f2d965 Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Mon, 16 Oct 2017 18:01:24 -0700 Subject: [PATCH 1524/2177] net/sched: Change behavior of mq select_queue() Currently, the class_ops select_queue() implementation on sch_mq returns a pointer to netdev_queue #0 when it receives and invalid qdisc id. That can be misleading since all of mq's inner qdiscs are attached to a valid netdev_queue. Here we fix that by returning NULL when a qdisc id is invalid. This is aligned with how select_queue() is implemented for sch_mqprio in the next patch on this series, keeping a consistent behavior between these two qdiscs. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- net/sched/sch_mq.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3a3e507422b..213b586a06a0 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) static struct netdev_queue *mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) { - unsigned int ntx = TC_H_MIN(tcm->tcm_parent); - struct netdev_queue *dev_queue = mq_queue_get(sch, ntx); - - if (!dev_queue) { - struct net_device *dev = qdisc_dev(sch); - - return netdev_get_tx_queue(dev, 0); - } - return dev_queue; + return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, From 0f7787b4133fb26b6dc0779e4867408e07711d8e Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Mon, 16 Oct 2017 18:01:25 -0700 Subject: [PATCH 1525/2177] net/sched: Add select_queue() class_ops for mqprio When replacing a child qdisc from mqprio, tc_modify_qdisc() must fetch the netdev_queue pointer that the current child qdisc is associated with before creating the new qdisc. Currently, when using mqprio as root qdisc, the kernel will end up getting the queue #0 pointer from the mqprio (root qdisc), which leaves any new child qdisc with a possibly wrong netdev_queue pointer. Implementing the Qdisc_class_ops select_queue() on mqprio fixes this issue and avoid an inconsistent state when child qdiscs are replaced. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- net/sched/sch_mqprio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 51c2b289c69b..4d5ed45123f0 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -575,6 +575,12 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } +static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch, + struct tcmsg *tcm) +{ + return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); +} + static const struct Qdisc_class_ops mqprio_class_ops = { .graft = mqprio_graft, .leaf = mqprio_leaf, @@ -582,6 +588,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = { .walk = mqprio_walk, .dump = mqprio_dump_class, .dump_stats = mqprio_dump_class_stats, + .select_queue = mqprio_select_queue, }; static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { From 585d763af09cc21daf48ecc873604ccdb70f6014 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 16 Oct 2017 18:01:26 -0700 Subject: [PATCH 1526/2177] net/sched: Introduce Credit Based Shaper (CBS) qdisc This queueing discipline implements the shaper algorithm defined by the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L. It's primary usage is to apply some bandwidth reservation to user defined traffic classes, which are mapped to different queues via the mqprio qdisc. Only a simple software implementation is added for now. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Jesus Sanchez-Palencia Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- include/uapi/linux/pkt_sched.h | 19 +++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/sch_cbs.c | 293 +++++++++++++++++++++++++++++++++ 4 files changed, 324 insertions(+) create mode 100644 net/sched/sch_cbs.c diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index e7cc3d3c7421..0e88cc262ca0 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -904,4 +904,23 @@ struct tc_pie_xstats { __u32 maxq; /* maximum queue size */ __u32 ecn_mark; /* packets marked with ecn*/ }; + +/* CBS */ +struct tc_cbs_qopt { + __u8 offload; + __u8 _pad[3]; + __s32 hicredit; + __s32 locredit; + __s32 idleslope; + __s32 sendslope; +}; + +enum { + TCA_CBS_UNSPEC, + TCA_CBS_PARMS, + __TCA_CBS_MAX, +}; + +#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) + #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e70ed26485a2..c03d86a7775e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -172,6 +172,17 @@ config NET_SCH_TBF To compile this code as a module, choose M here: the module will be called sch_tbf. +config NET_SCH_CBS + tristate "Credit Based Shaper (CBS)" + ---help--- + Say Y here if you want to use the Credit Based Shaper (CBS) packet + scheduling algorithm. + + See the top of for more details. + + To compile this code as a module, choose M here: the + module will be called sch_cbs. + config NET_SCH_GRED tristate "Generic Random Early Detection (GRED)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 7b915d226de7..80c8f92d162d 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o +obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c new file mode 100644 index 000000000000..0e85133c5653 --- /dev/null +++ b/net/sched/sch_cbs.c @@ -0,0 +1,293 @@ +/* + * net/sched/sch_cbs.c Credit Based Shaper + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Vinicius Costa Gomes + * + */ + +/* Credit Based Shaper (CBS) + * ========================= + * + * This is a simple rate-limiting shaper aimed at TSN applications on + * systems with known traffic workloads. + * + * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, + * Section 8.6.8.2, and explained in more detail in the Annex L of the + * same specification. + * + * There are four tunables to be considered: + * + * 'idleslope': Idleslope is the rate of credits that is + * accumulated (in kilobits per second) when there is at least + * one packet waiting for transmission. Packets are transmitted + * when the current value of credits is equal or greater than + * zero. When there is no packet to be transmitted the amount of + * credits is set to zero. This is the main tunable of the CBS + * algorithm. + * + * 'sendslope': + * Sendslope is the rate of credits that is depleted (it should be a + * negative number of kilobits per second) when a transmission is + * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section + * 8.6.8.2 item g): + * + * sendslope = idleslope - port_transmit_rate + * + * 'hicredit': Hicredit defines the maximum amount of credits (in + * bytes) that can be accumulated. Hicredit depends on the + * characteristics of interfering traffic, + * 'max_interference_size' is the maximum size of any burst of + * traffic that can delay the transmission of a frame that is + * available for transmission for this traffic class, (IEEE + * 802.1Q-2014 Annex L, Equation L-3): + * + * hicredit = max_interference_size * (idleslope / port_transmit_rate) + * + * 'locredit': Locredit is the minimum amount of credits that can + * be reached. It is a function of the traffic flowing through + * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): + * + * locredit = max_frame_size * (sendslope / port_transmit_rate) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BYTES_PER_KBIT (1000LL / 8) + +struct cbs_sched_data { + s64 port_rate; /* in bytes/s */ + s64 last; /* timestamp in ns */ + s64 credits; /* in bytes */ + s32 locredit; /* in bytes */ + s32 hicredit; /* in bytes */ + s64 sendslope; /* in bytes/s */ + s64 idleslope; /* in bytes/s */ + struct qdisc_watchdog watchdog; + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); + struct sk_buff *(*dequeue)(struct Qdisc *sch); +}; + +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (sch->q.qlen == 0 && q->credits > 0) { + /* We need to stop accumulating credits when there's + * no enqueued packets and q->credits is positive. + */ + q->credits = 0; + q->last = ktime_get_ns(); + } + + return qdisc_enqueue_tail(skb, sch); +} + +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->enqueue(skb, sch); +} + +/* timediff is in ns, slope is in bytes/s */ +static s64 timediff_to_credits(s64 timediff, s64 slope) +{ + return div64_s64(timediff * slope, NSEC_PER_SEC); +} + +static s64 delay_from_credits(s64 credits, s64 slope) +{ + if (unlikely(slope == 0)) + return S64_MAX; + + return div64_s64(-credits * NSEC_PER_SEC, slope); +} + +static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) +{ + if (unlikely(port_rate == 0)) + return S64_MAX; + + return div64_s64(len * slope, port_rate); +} + +static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + s64 now = ktime_get_ns(); + struct sk_buff *skb; + s64 credits; + int len; + + if (q->credits < 0) { + credits = timediff_to_credits(now - q->last, q->idleslope); + + credits = q->credits + credits; + q->credits = min_t(s64, credits, q->hicredit); + + if (q->credits < 0) { + s64 delay; + + delay = delay_from_credits(q->credits, q->idleslope); + qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); + + q->last = now; + + return NULL; + } + } + + skb = qdisc_dequeue_head(sch); + if (!skb) + return NULL; + + len = qdisc_pkt_len(skb); + + /* As sendslope is a negative number, this will decrease the + * amount of q->credits. + */ + credits = credits_from_len(len, q->sendslope, q->port_rate); + credits += q->credits; + + q->credits = max_t(s64, credits, q->locredit); + q->last = now; + + return skb; +} + +static struct sk_buff *cbs_dequeue(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { + [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, +}; + +static int cbs_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct nlattr *tb[TCA_CBS_MAX + 1]; + struct ethtool_link_ksettings ecmd; + struct tc_cbs_qopt *qopt; + s64 link_speed; + int err; + + err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); + if (err < 0) + return err; + + if (!tb[TCA_CBS_PARMS]) + return -EINVAL; + + qopt = nla_data(tb[TCA_CBS_PARMS]); + + if (qopt->offload) + return -EOPNOTSUPP; + + if (!__ethtool_get_link_ksettings(dev, &ecmd)) + link_speed = ecmd.base.speed; + else + link_speed = SPEED_1000; + + q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + q->hicredit = qopt->hicredit; + q->locredit = qopt->locredit; + q->idleslope = qopt->idleslope * BYTES_PER_KBIT; + q->sendslope = qopt->sendslope * BYTES_PER_KBIT; + + return 0; +} + +static int cbs_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (!opt) + return -EINVAL; + + qdisc_watchdog_init(&q->watchdog, sch); + + return cbs_change(sch, opt); +} + +static void cbs_destroy(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_cancel(&q->watchdog); +} + +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct tc_cbs_qopt opt = { }; + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + opt.hicredit = q->hicredit; + opt.locredit = q->locredit; + opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); + opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); + opt.offload = 0; + + if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { + .id = "cbs", + .priv_size = sizeof(struct cbs_sched_data), + .enqueue = cbs_enqueue, + .dequeue = cbs_dequeue, + .peek = qdisc_peek_dequeued, + .init = cbs_init, + .reset = qdisc_reset_queue, + .destroy = cbs_destroy, + .change = cbs_change, + .dump = cbs_dump, + .owner = THIS_MODULE, +}; + +static int __init cbs_module_init(void) +{ + return register_qdisc(&cbs_qdisc_ops); +} + +static void __exit cbs_module_exit(void) +{ + unregister_qdisc(&cbs_qdisc_ops); +} +module_init(cbs_module_init) +module_exit(cbs_module_exit) +MODULE_LICENSE("GPL"); From 3d0bd028ffb4a4915cb64cfa0d2cee1578cc0321 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 16 Oct 2017 18:01:27 -0700 Subject: [PATCH 1527/2177] net/sched: Add support for HW offloading for CBS This adds support for offloading the CBS algorithm to the controller, if supported. Drivers wanting to support CBS offload must implement the .ndo_setup_tc callback and handle the TC_SETUP_CBS (introduced here) type. Signed-off-by: Vinicius Costa Gomes Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 1 + include/net/pkt_sched.h | 9 ++++ net/sched/sch_cbs.c | 104 +++++++++++++++++++++++++++++++++----- 3 files changed, 102 insertions(+), 12 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c7960c8338a..5e02f79b2110 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -776,6 +776,7 @@ enum tc_setup_type { TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, + TC_SETUP_CBS, }; /* These structures hold the attributes of xdp state that are being passed diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b8ecafce4ba1..02f2db26e30c 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -140,4 +140,13 @@ static inline struct net *qdisc_net(struct Qdisc *q) return dev_net(q->dev_queue->dev); } +struct tc_cbs_qopt_offload { + u8 enable; + s32 queue; + s32 hicredit; + s32 locredit; + s32 idleslope; + s32 sendslope; +}; + #endif diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 0e85133c5653..bdb533b7fb8c 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -68,6 +68,8 @@ #define BYTES_PER_KBIT (1000LL / 8) struct cbs_sched_data { + bool offload; + int queue; s64 port_rate; /* in bytes/s */ s64 last; /* timestamp in ns */ s64 credits; /* in bytes */ @@ -80,6 +82,11 @@ struct cbs_sched_data { struct sk_buff *(*dequeue)(struct Qdisc *sch); }; +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch) +{ + return qdisc_enqueue_tail(skb, sch); +} + static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); @@ -169,6 +176,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) return skb; } +static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) +{ + return qdisc_dequeue_head(sch); +} + static struct sk_buff *cbs_dequeue(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); @@ -180,14 +192,66 @@ static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, }; +static void cbs_disable_offload(struct net_device *dev, + struct cbs_sched_data *q) +{ + struct tc_cbs_qopt_offload cbs = { }; + const struct net_device_ops *ops; + int err; + + if (!q->offload) + return; + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + ops = dev->netdev_ops; + if (!ops->ndo_setup_tc) + return; + + cbs.queue = q->queue; + cbs.enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + if (err < 0) + pr_warn("Couldn't disable CBS offload for queue %d\n", + cbs.queue); +} + +static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, + const struct tc_cbs_qopt *opt) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_cbs_qopt_offload cbs = { }; + int err; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + cbs.queue = q->queue; + + cbs.enable = 1; + cbs.hicredit = opt->hicredit; + cbs.locredit = opt->locredit; + cbs.idleslope = opt->idleslope; + cbs.sendslope = opt->sendslope; + + err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + if (err < 0) + return err; + + q->enqueue = cbs_enqueue_offload; + q->dequeue = cbs_dequeue_offload; + + return 0; +} + static int cbs_change(struct Qdisc *sch, struct nlattr *opt) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct nlattr *tb[TCA_CBS_MAX + 1]; - struct ethtool_link_ksettings ecmd; struct tc_cbs_qopt *qopt; - s64 link_speed; int err; err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); @@ -199,23 +263,30 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt) qopt = nla_data(tb[TCA_CBS_PARMS]); - if (qopt->offload) - return -EOPNOTSUPP; + if (!qopt->offload) { + struct ethtool_link_ksettings ecmd; + s64 link_speed; - if (!__ethtool_get_link_ksettings(dev, &ecmd)) - link_speed = ecmd.base.speed; - else - link_speed = SPEED_1000; + if (!__ethtool_get_link_ksettings(dev, &ecmd)) + link_speed = ecmd.base.speed; + else + link_speed = SPEED_1000; - q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; + q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; - q->enqueue = cbs_enqueue_soft; - q->dequeue = cbs_dequeue_soft; + cbs_disable_offload(dev, q); + } else { + err = cbs_enable_offload(dev, q, qopt); + if (err < 0) + return err; + } + /* Everything went OK, save the parameters used. */ q->hicredit = qopt->hicredit; q->locredit = qopt->locredit; q->idleslope = qopt->idleslope * BYTES_PER_KBIT; q->sendslope = qopt->sendslope * BYTES_PER_KBIT; + q->offload = qopt->offload; return 0; } @@ -223,10 +294,16 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt) static int cbs_init(struct Qdisc *sch, struct nlattr *opt) { struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); if (!opt) return -EINVAL; + q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + qdisc_watchdog_init(&q->watchdog, sch); return cbs_change(sch, opt); @@ -235,8 +312,11 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt) static void cbs_destroy(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); qdisc_watchdog_cancel(&q->watchdog); + + cbs_disable_offload(dev, q); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -253,7 +333,7 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) opt.locredit = q->locredit; opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); - opt.offload = 0; + opt.offload = q->offload; if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) goto nla_put_failure; From 05f9d3e1ae6eaf7507e3bd95b0eef2acd4b84ea8 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Mon, 16 Oct 2017 18:01:28 -0700 Subject: [PATCH 1528/2177] igb: Add support for CBS offload This patch adds support for Credit-Based Shaper (CBS) qdisc offload from Traffic Control system. This support enable us to leverage the Forwarding and Queuing for Time-Sensitive Streams (FQTSS) features from Intel i210 Ethernet Controller. FQTSS is the former 802.1Qav standard which was merged into 802.1Q in 2014. It enables traffic prioritization and bandwidth reservation via the Credit-Based Shaper which is implemented in hardware by i210 controller. The patch introduces the igb_setup_tc() function which implements the support for CBS qdisc hardware offload in the IGB driver. CBS offload is the only traffic control offload supported by the driver at the moment. FQTSS transmission mode from i210 controller is automatically enabled by the IGB driver when the CBS is enabled for the first hardware queue. Likewise, FQTSS mode is automatically disabled when CBS is disabled for the last hardware queue. Changing FQTSS mode requires NIC reset. FQTSS feature is supported by i210 controller only. Signed-off-by: Andre Guedes Tested-by: Aaron Brown Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/igb/e1000_defines.h | 23 ++ drivers/net/ethernet/intel/igb/e1000_regs.h | 8 + drivers/net/ethernet/intel/igb/igb.h | 6 + drivers/net/ethernet/intel/igb/igb_main.c | 347 ++++++++++++++++++ 4 files changed, 384 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 1de82f247312..83cabff1e0ab 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -353,7 +353,18 @@ #define E1000_RXPBS_CFG_TS_EN 0x80000000 #define I210_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ +#define I210_RXPBSIZE_MASK 0x0000003F +#define I210_RXPBSIZE_PB_32KB 0x00000020 #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ +#define I210_TXPBSIZE_MASK 0xC0FFFFFF +#define I210_TXPBSIZE_PB0_8KB (8 << 0) +#define I210_TXPBSIZE_PB1_8KB (8 << 6) +#define I210_TXPBSIZE_PB2_4KB (4 << 12) +#define I210_TXPBSIZE_PB3_4KB (4 << 18) + +#define I210_DTXMXPKTSZ_DEFAULT 0x00000098 + +#define I210_SR_QUEUES_NUM 2 /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 @@ -1051,4 +1062,16 @@ #define E1000_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) #define E1000_VLAPQF_QUEUE_MASK 0x03 +/* TX Qav Control fields */ +#define E1000_TQAVCTRL_XMIT_MODE BIT(0) +#define E1000_TQAVCTRL_DATAFETCHARB BIT(4) +#define E1000_TQAVCTRL_DATATRANARB BIT(8) + +/* TX Qav Credit Control fields */ +#define E1000_TQAVCC_IDLESLOPE_MASK 0xFFFF +#define E1000_TQAVCC_QUEUEMODE BIT(31) + +/* Transmit Descriptor Control fields */ +#define E1000_TXDCTL_PRIORITY BIT(27) + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 58adbf234e07..8eee081d395f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -421,6 +421,14 @@ do { \ #define E1000_I210_FLA 0x1201C +#define E1000_I210_DTXMXPKTSZ 0x355C + +#define E1000_I210_TXDCTL(_n) (0x0E028 + ((_n) * 0x40)) + +#define E1000_I210_TQAVCTRL 0x3570 +#define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40)) +#define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40)) + #define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) #define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 06ffb2bc713e..92845692087a 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -281,6 +281,11 @@ struct igb_ring { u16 count; /* number of desc. in the ring */ u8 queue_index; /* logical index of the ring*/ u8 reg_idx; /* physical index of the ring */ + bool cbs_enable; /* indicates if CBS is enabled */ + s32 idleslope; /* idleSlope in kbps */ + s32 sendslope; /* sendSlope in kbps */ + s32 hicredit; /* hiCredit in bytes */ + s32 locredit; /* loCredit in bytes */ /* everything past this point are written often */ u16 next_to_clean; @@ -621,6 +626,7 @@ struct igb_adapter { #define IGB_FLAG_EEE BIT(14) #define IGB_FLAG_VLAN_PROMISC BIT(15) #define IGB_FLAG_RX_LEGACY BIT(16) +#define IGB_FLAG_FQTSS BIT(17) /* Media Auto Sense */ #define IGB_MAS_ENABLE_0 0X0001 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 58d01a211367..b3d730f4d695 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,17 @@ #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" + +enum queue_mode { + QUEUE_MODE_STRICT_PRIORITY, + QUEUE_MODE_STREAM_RESERVATION, +}; + +enum tx_queue_prio { + TX_QUEUE_PRIO_HIGH, + TX_QUEUE_PRIO_LOW, +}; + char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = @@ -1271,6 +1283,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + ring->cbs_enable = false; + ring->idleslope = 0; + ring->sendslope = 0; + ring->hicredit = 0; + ring->locredit = 0; + u64_stats_init(&ring->tx_syncp); u64_stats_init(&ring->tx_syncp2); @@ -1598,6 +1616,284 @@ static void igb_get_hw_control(struct igb_adapter *adapter) ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } +static void enable_fqtss(struct igb_adapter *adapter, bool enable) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + + WARN_ON(hw->mac.type != e1000_i210); + + if (enable) + adapter->flags |= IGB_FLAG_FQTSS; + else + adapter->flags &= ~IGB_FLAG_FQTSS; + + if (netif_running(netdev)) + schedule_work(&adapter->reset_task); +} + +static bool is_fqtss_enabled(struct igb_adapter *adapter) +{ + return (adapter->flags & IGB_FLAG_FQTSS) ? true : false; +} + +static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue, + enum tx_queue_prio prio) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 4); + + val = rd32(E1000_I210_TXDCTL(queue)); + + if (prio == TX_QUEUE_PRIO_HIGH) + val |= E1000_TXDCTL_PRIORITY; + else + val &= ~E1000_TXDCTL_PRIORITY; + + wr32(E1000_I210_TXDCTL(queue), val); +} + +static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + + val = rd32(E1000_I210_TQAVCC(queue)); + + if (mode == QUEUE_MODE_STREAM_RESERVATION) + val |= E1000_TQAVCC_QUEUEMODE; + else + val &= ~E1000_TQAVCC_QUEUEMODE; + + wr32(E1000_I210_TQAVCC(queue), val); +} + +/** + * igb_configure_cbs - Configure Credit-Based Shaper (CBS) + * @adapter: pointer to adapter struct + * @queue: queue number + * @enable: true = enable CBS, false = disable CBS + * @idleslope: idleSlope in kbps + * @sendslope: sendSlope in kbps + * @hicredit: hiCredit in bytes + * @locredit: loCredit in bytes + * + * Configure CBS for a given hardware queue. When disabling, idleslope, + * sendslope, hicredit, locredit arguments are ignored. Returns 0 if + * success. Negative otherwise. + **/ +static void igb_configure_cbs(struct igb_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 tqavcc; + u16 value; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + + if (enable) { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); + set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); + + /* According to i210 datasheet section 7.2.7.7, we should set + * the 'idleSlope' field from TQAVCC register following the + * equation: + * + * For 100 Mbps link speed: + * + * value = BW * 0x7735 * 0.2 (E1) + * + * For 1000Mbps link speed: + * + * value = BW * 0x7735 * 2 (E2) + * + * E1 and E2 can be merged into one equation as shown below. + * Note that 'link-speed' is in Mbps. + * + * value = BW * 0x7735 * 2 * link-speed + * -------------- (E3) + * 1000 + * + * 'BW' is the percentage bandwidth out of full link speed + * which can be found with the following equation. Note that + * idleSlope here is the parameter from this function which + * is in kbps. + * + * BW = idleSlope + * ----------------- (E4) + * link-speed * 1000 + * + * That said, we can come up with a generic equation to + * calculate the value we should set it TQAVCC register by + * replacing 'BW' in E3 by E4. The resulting equation is: + * + * value = idleSlope * 0x7735 * 2 * link-speed + * ----------------- -------------- (E5) + * link-speed * 1000 1000 + * + * 'link-speed' is present in both sides of the fraction so + * it is canceled out. The final equation is the following: + * + * value = idleSlope * 61034 + * ----------------- (E6) + * 1000000 + */ + value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000); + + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + tqavcc |= value; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735); + } else { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW); + set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY); + + /* Set idleSlope to zero. */ + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + /* Set hiCredit to zero. */ + wr32(E1000_I210_TQAVHC(queue), 0); + } + + /* XXX: In i210 controller the sendSlope and loCredit parameters from + * CBS are not configurable by software so we don't do any 'controller + * configuration' in respect to these parameters. + */ + + netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n", + (enable) ? "enabled" : "disabled", queue, + idleslope, sendslope, hicredit, locredit); +} + +static int igb_save_cbs_params(struct igb_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + struct igb_ring *ring; + + if (queue < 0 || queue > adapter->num_tx_queues) + return -EINVAL; + + ring = adapter->tx_ring[queue]; + + ring->cbs_enable = enable; + ring->idleslope = idleslope; + ring->sendslope = sendslope; + ring->hicredit = hicredit; + ring->locredit = locredit; + + return 0; +} + +static bool is_any_cbs_enabled(struct igb_adapter *adapter) +{ + struct igb_ring *ring; + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + ring = adapter->tx_ring[i]; + + if (ring->cbs_enable) + return true; + } + + return false; +} + +static void igb_setup_tx_mode(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 val; + + /* Only i210 controller supports changing the transmission mode. */ + if (hw->mac.type != e1000_i210) + return; + + if (is_fqtss_enabled(adapter)) { + int i, max_queue; + + /* Configure TQAVCTRL register: set transmit mode to 'Qav', + * set data fetch arbitration to 'round robin' and set data + * transfer arbitration to 'credit shaper algorithm. + */ + val = rd32(E1000_I210_TQAVCTRL); + val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB; + val &= ~E1000_TQAVCTRL_DATAFETCHARB; + wr32(E1000_I210_TQAVCTRL, val); + + /* Configure Tx and Rx packet buffers sizes as described in + * i210 datasheet section 7.2.7.7. + */ + val = rd32(E1000_TXPBS); + val &= ~I210_TXPBSIZE_MASK; + val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB | + I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB; + wr32(E1000_TXPBS, val); + + val = rd32(E1000_RXPBS); + val &= ~I210_RXPBSIZE_MASK; + val |= I210_RXPBSIZE_PB_32KB; + wr32(E1000_RXPBS, val); + + /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ + * register should not exceed the buffer size programmed in + * TXPBS. The smallest buffer size programmed in TXPBS is 4kB + * so according to the datasheet we should set MAX_TPKT_SIZE to + * 4kB / 64. + * + * However, when we do so, no frame from queue 2 and 3 are + * transmitted. It seems the MAX_TPKT_SIZE should not be great + * or _equal_ to the buffer size programmed in TXPBS. For this + * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64. + */ + val = (4096 - 1) / 64; + wr32(E1000_I210_DTXMXPKTSZ, val); + + /* Since FQTSS mode is enabled, apply any CBS configuration + * previously set. If no previous CBS configuration has been + * done, then the initial configuration is applied, which means + * CBS is disabled. + */ + max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ? + adapter->num_tx_queues : I210_SR_QUEUES_NUM; + + for (i = 0; i < max_queue; i++) { + struct igb_ring *ring = adapter->tx_ring[i]; + + igb_configure_cbs(adapter, i, ring->cbs_enable, + ring->idleslope, ring->sendslope, + ring->hicredit, ring->locredit); + } + } else { + wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); + wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); + wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT); + + val = rd32(E1000_I210_TQAVCTRL); + /* According to Section 8.12.21, the other flags we've set when + * enabling FQTSS are not relevant when disabling FQTSS so we + * don't set they here. + */ + val &= ~E1000_TQAVCTRL_XMIT_MODE; + wr32(E1000_I210_TQAVCTRL, val); + } + + netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ? + "enabled" : "disabled"); +} + /** * igb_configure - configure the hardware for RX and TX * @adapter: private board structure @@ -1609,6 +1905,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_get_hw_control(adapter); igb_set_rx_mode(netdev); + igb_setup_tx_mode(adapter); igb_restore_vlan(adapter); @@ -2150,6 +2447,55 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int igb_offload_cbs(struct igb_adapter *adapter, + struct tc_cbs_qopt_offload *qopt) +{ + struct e1000_hw *hw = &adapter->hw; + int err; + + /* CBS offloading is only supported by i210 controller. */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + /* CBS offloading is only supported by queue 0 and queue 1. */ + if (qopt->queue < 0 || qopt->queue > 1) + return -EINVAL; + + err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + if (err) + return err; + + if (is_fqtss_enabled(adapter)) { + igb_configure_cbs(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + + if (!is_any_cbs_enabled(adapter)) + enable_fqtss(adapter, false); + + } else { + enable_fqtss(adapter, true); + } + + return 0; +} + +static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (type) { + case TC_SETUP_CBS: + return igb_offload_cbs(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -2175,6 +2521,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_features = igb_set_features, .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = igb_features_check, + .ndo_setup_tc = igb_setup_tc, }; /** From 952c5719aac6587f1e0add97dca79f9e73887f9b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 28 Oct 2017 01:56:10 -0400 Subject: [PATCH 1529/2177] bnxt_en: Fix randconfig build errors. Fix undefined symbols when CONFIG_VLAN_8021Q or CONFIG_INET is not set. Fixes: 8c95f773b4a3 ("bnxt_en: add support for Flower based vxlan encap/decap offload") Reported-by: Jakub Kicinski Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 798d13964274..d5031f436f83 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -904,6 +904,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, struct bnxt_tc_l2_key *l2_info, struct net_device *real_dst_dev) { +#ifdef CONFIG_INET struct flowi4 flow = { {0} }; struct net_device *dst_dev; struct neighbour *nbr; @@ -925,6 +926,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, */ dst_dev = rt->dst.dev; if (is_vlan_dev(dst_dev)) { +#if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev); if (vlan->real_dev != real_dst_dev) { @@ -938,6 +940,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, l2_info->inner_vlan_tci = htons(vlan->vlan_id); l2_info->inner_vlan_tpid = vlan->vlan_proto; l2_info->num_vlans = 1; +#endif } else if (dst_dev != real_dst_dev) { netdev_info(bp->dev, "dst_dev(%s) for %pI4b is not PF-if(%s)", @@ -966,6 +969,9 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, put_rt: ip_rt_put(rt); return rc; +#else + return -EOPNOTSUPP; +#endif } static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, From 57ab1ca215971702df534ae93cd76c15ca084c77 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Oct 2017 10:50:07 -0400 Subject: [PATCH 1530/2177] net: dsa: move fixed link registration helpers The new bindings (dsa2.c) and the old bindings (legacy.c) share two helpers dsa_cpu_dsa_setup and dsa_cpu_dsa_destroy, used to register or deregister a fixed PHY if a given port has a corresponding device node. Unclutter the code by moving them into two new port.c helpers, dsa_port_fixed_link_register_of and dsa_port_fixed_link_(un)register_of. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa.c | 39 -------------------------------------- net/dsa/dsa2.c | 8 ++++---- net/dsa/dsa_priv.h | 5 +++-- net/dsa/legacy.c | 4 ++-- net/dsa/port.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 47 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index fe0081730305..b8f2d9f7c3ed 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -68,37 +68,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { [DSA_TAG_PROTO_NONE] = &none_ops, }; -int dsa_cpu_dsa_setup(struct dsa_port *port) -{ - struct device_node *port_dn = port->dn; - struct dsa_switch *ds = port->ds; - struct phy_device *phydev; - int ret, mode; - - if (of_phy_is_fixed_link(port_dn)) { - ret = of_phy_register_fixed_link(port_dn); - if (ret) { - dev_err(ds->dev, "failed to register fixed PHY\n"); - return ret; - } - phydev = of_phy_find_device(port_dn); - - mode = of_get_phy_mode(port_dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; - - genphy_config_init(phydev); - genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port->index, phydev); - - put_device(&phydev->mdio.dev); - } - - return 0; -} - const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) { const struct dsa_device_ops *ops; @@ -113,14 +82,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -void dsa_cpu_dsa_destroy(struct dsa_port *port) -{ - struct device_node *port_dn = port->dn; - - if (of_phy_is_fixed_link(port_dn)) - of_phy_deregister_fixed_link(port_dn); -} - static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ec58654a71cd..de91c48b6806 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -219,7 +219,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(port); + err = dsa_port_fixed_link_register_of(port); if (err) { dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", port->index, err); @@ -235,7 +235,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port) static void dsa_dsa_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); + dsa_port_fixed_link_unregister_of(port); } static int dsa_cpu_port_apply(struct dsa_port *port) @@ -243,7 +243,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(port); + err = dsa_port_fixed_link_register_of(port); if (err) { dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", port->index, err); @@ -259,7 +259,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port) static void dsa_cpu_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); + dsa_port_fixed_link_unregister_of(port); } static int dsa_user_port_apply(struct dsa_port *port) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1e9914062d0b..1e65afd6989e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -93,8 +93,6 @@ struct dsa_slave_priv { }; /* dsa.c */ -int dsa_cpu_dsa_setup(struct dsa_port *port); -void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); bool dsa_schedule_work(struct work_struct *work); @@ -159,6 +157,9 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); +int dsa_port_fixed_link_register_of(struct dsa_port *dp); +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 93e1b116ef83..ed7aae342fca 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - ret = dsa_cpu_dsa_setup(&ds->ports[port]); + ret = dsa_port_fixed_link_register_of(&ds->ports[port]); if (ret) return ret; } @@ -274,7 +274,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_cpu_dsa_destroy(&ds->ports[port]); + dsa_port_fixed_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) diff --git a/net/dsa/port.c b/net/dsa/port.c index 72c8dbd3d3f2..bb30b1a7de3a 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -12,6 +12,8 @@ #include #include +#include +#include #include "dsa_priv.h" @@ -264,3 +266,48 @@ int dsa_port_vlan_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } + +int dsa_port_fixed_link_register_of(struct dsa_port *dp) +{ + struct device_node *dn = dp->dn; + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int mode; + int err; + + if (of_phy_is_fixed_link(dn)) { + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } + + phydev = of_phy_find_device(dn); + + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; + + genphy_config_init(phydev); + genphy_read_status(phydev); + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + put_device(&phydev->mdio.dev); + } + + return 0; +} + +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +{ + struct device_node *dn = dp->dn; + + if (of_phy_is_fixed_link(dn)) + of_phy_deregister_fixed_link(dn); +} From aa28667cfbe4ff6f14454dda210b1f2e485f99b5 Mon Sep 17 00:00:00 2001 From: Felix Manlunas Date: Thu, 26 Oct 2017 16:46:36 -0700 Subject: [PATCH 1531/2177] liquidio: fix kernel panic in VF driver Doing ifconfig down on VF driver in the middle of receiving line rate traffic causes a kernel panic: LiquidIO_VF 0000:02:00.3: should not come here should not get rx when poll mode = 0 for vf BUG: unable to handle kernel NULL pointer dereference at (null) . . . Call Trace: ? tasklet_action+0x102/0x120 __do_softirq+0x91/0x292 irq_exit+0xb6/0xc0 do_IRQ+0x4f/0xd0 common_interrupt+0x93/0x93 RIP: 0010:cpuidle_enter_state+0x142/0x2f0 RSP: 0018:ffffffffa6403e20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff59 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 000000000000001f RDX: 0000000000000000 RSI: 000000002ab7519f RDI: 0000000000000000 RBP: ffffffffa6403e58 R08: 0000000000000084 R09: 0000000000000018 R10: ffffffffa6403df0 R11: 00000000000003c7 R12: 0000000000000003 R13: ffffd27ebd806800 R14: ffffffffa64d40d8 R15: 0000007be072823f cpuidle_enter+0x17/0x20 call_cpuidle+0x23/0x40 do_idle+0x18c/0x1f0 cpu_startup_entry+0x64/0x70 rest_init+0xa5/0xb0 start_kernel+0x45e/0x46b x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x6f/0x72 secondary_startup_64+0xa5/0xa5 Code: Bad RIP value. RIP: (null) RSP: ffff9246ed003f28 CR2: 0000000000000000 ---[ end trace 92731e80f31b7d7d ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x24000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt Reason is: in the function assigned to net_device_ops->ndo_stop, the steps for bringing down the interface are done in the wrong order. The step that notifies the NIC firmware to stop forwarding packets to host is done too late. Fix it by moving that step to the beginning. Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 00c19306ecee..fd70a4844e2d 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1288,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; struct napi_struct *napi, *n; + /* tell Octeon to stop forwarding packets to host */ + send_rx_ctrl_cmd(lio, 0); + if (oct->props[lio->ifidx].napi_enabled) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); @@ -1305,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev) netif_carrier_off(netdev); lio->link_changes++; - /* tell Octeon to stop forwarding packets to host */ - send_rx_ctrl_cmd(lio, 0); - ifstate_reset(lio, LIO_IFSTATE_RUNNING); txqs_stop(netdev); From a267eaebfcaeb27ad3b83303b6c9f8f739d757aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Casc=C3=B3n?= Date: Thu, 26 Oct 2017 17:35:38 -0700 Subject: [PATCH 1532/2177] nfp: inform the VF driver needs to be restarted after changing the MAC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add message to inform the VF MAC was changed and the need to restart the VF driver for the changes to be effective. Signed-off-by: Pablo Cascón Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c index e6d2e06b050c..8b1b962cf1d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) writew(get_unaligned_be16(mac + 4), app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); - return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + if (!err) + nfp_info(app->pf->cpp, + "MAC %pM set on VF %d, reload the VF driver to make this change effective.\n", + mac, vf); + + return err; } int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, From a830405ee452ddc4101c3c9334e6fedd42c6b357 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Fri, 27 Oct 2017 08:22:02 +0530 Subject: [PATCH 1533/2177] stmmac: copy unicast mac address to MAC registers Currently stmmac driver not copying the valid ethernet MAC address to MAC registers. This patch takes care of updating the MAC register with MAC address. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c7a894ead274..ff4fb5eae1af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3749,6 +3749,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -3976,7 +3990,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** From 74b6551b9f4106a3e69ab46e60b52a8947dcf60c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Oct 2017 23:10:35 -0500 Subject: [PATCH 1534/2177] ipv6: exthdrs: use swap macro in ipv6_dest_hao make use of the swap macro and remove unnecessary variable tmp_addr. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 7835dea930b4..9f918a770f87 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -187,7 +187,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct in6_addr tmp_addr; int ret; if (opt->dsthao) { @@ -229,9 +228,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; - tmp_addr = ipv6h->saddr; - ipv6h->saddr = hao->addr; - hao->addr = tmp_addr; + swap(ipv6h->saddr, hao->addr); if (skb->tstamp == 0) __net_timestamp(skb); From c63144e4dda7967f2419fa3c2cc5db1228a7fccf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:54:25 -0700 Subject: [PATCH 1535/2177] drivers/net: 8390: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/axnet_cs.c | 10 +++++----- drivers/net/ethernet/8390/pcnet_cs.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 3da1fc539ef9..7bddb8efb6d5 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static void axnet_tx_timeout(struct net_device *dev); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); static int mdio_read(unsigned int addr, int phy_id, int loc); @@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ax_open(dev); @@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ax_interrupt(irq, dev_id); } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)(arg); - struct axnet_dev *info = PRIV(dev); + struct axnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index bd0a2a14b649..eae9827035dc 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev); static int pcnet_close(struct net_device *dev); static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void pcnet_reset_8390(struct net_device *dev); static int set_config(struct net_device *dev, struct ifmap *map); static int setup_shmem_window(struct pcmcia_device *link, int start_pg, @@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ei_open(dev); @@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ret; } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct pcnet_dev *info = PRIV(dev); + struct pcnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; From c6c52ba1514120db3ad2d36391ed37bafcfc43d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:54:38 -0700 Subject: [PATCH 1536/2177] drivers/net: amd: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Tom Lendacky Cc: "David S. Miller" Cc: Allen Pais Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/a2065.c | 13 ++++++++++--- drivers/net/ethernet/amd/am79c961a.c | 9 +++++---- drivers/net/ethernet/amd/am79c961a.h | 1 + drivers/net/ethernet/amd/declance.c | 10 ++++++---- drivers/net/ethernet/amd/pcnet32.c | 10 +++++----- drivers/net/ethernet/amd/sunlance.c | 8 ++++---- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++++++-------- 7 files changed, 37 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 998d30e050a6..212fe72a190b 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -123,6 +123,7 @@ struct lance_private { int burst_sizes; /* ledma SBus burst sizes */ #endif struct timer_list multicast_timer; + struct net_device *dev; }; #define LANCE_ADDR(x) ((int)(x) & ~0xff000000) @@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } +static void lance_set_multicast_retry(struct timer_list *t) +{ + struct lance_private *lp = from_timer(lp, t, multicast_timer); + + lance_set_multicast(lp->dev); +} + static int a2065_init_one(struct zorro_dev *z, const struct zorro_device_id *ent); static void a2065_remove_one(struct zorro_dev *z); @@ -728,14 +736,13 @@ static int a2065_init_one(struct zorro_dev *z, priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS; priv->rx_ring_mod_mask = RX_RING_MOD_MASK; priv->tx_ring_mod_mask = TX_RING_MOD_MASK; + priv->dev = dev; dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; dev->dma = 0; - setup_timer(&priv->multicast_timer, - (void(*)(unsigned long))lance_set_multicast, - (unsigned long)dev); + timer_setup(&priv->multicast_timer, lance_set_multicast_retry, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0612dbee00d2..01d132c02ff9 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev) write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT); } -static void am79c961_timer(unsigned long data) +static void am79c961_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; unsigned int lnkstat, carrier; unsigned long flags; @@ -728,7 +728,8 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(&priv->chip_lock); - setup_timer(&priv->timer, am79c961_timer, (unsigned long)dev); + priv->dev = dev; + timer_setup(&priv->timer, am79c961_timer, 0); if (am79c961_hw_init(dev)) goto release; diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h index 9f384b79507b..fc5088c70731 100644 --- a/drivers/net/ethernet/amd/am79c961a.h +++ b/drivers/net/ethernet/amd/am79c961a.h @@ -140,6 +140,7 @@ struct dev_priv { unsigned long txhdr; spinlock_t chip_lock; struct timer_list timer; + struct net_device *dev; }; #endif diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 9bdf81c2cd00..116997a8b593 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -260,6 +260,7 @@ struct lance_private { unsigned short busmaster_regval; struct timer_list multicast_timer; + struct net_device *dev; /* Pointers to the ring buffers as seen from the CPU */ char *rx_buf_ptr_cpu[RX_RING_SIZE]; @@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1246,8 +1248,8 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - setup_timer(&lp->multicast_timer, lance_set_multicast_retry, - (unsigned long)dev); + lp->dev = dev; + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); ret = register_netdev(dev); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index e46153654016..a561705f232c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -321,7 +321,7 @@ static struct net_device_stats *pcnet32_get_stats(struct net_device *); static void pcnet32_load_multicast(struct net_device *dev); static void pcnet32_set_multicast_list(struct net_device *); static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); -static void pcnet32_watchdog(struct net_device *); +static void pcnet32_watchdog(struct timer_list *); static int mdio_read(struct net_device *dev, int phy_id, int reg_num); static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); @@ -1970,8 +1970,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options |= PCNET32_PORT_MII; } - setup_timer(&lp->watchdog_timer, (void *)&pcnet32_watchdog, - (unsigned long)dev); + timer_setup(&lp->watchdog_timer, pcnet32_watchdog, 0); /* The PCNET32-specific entries in the device structure. */ dev->netdev_ops = &pcnet32_netdev_ops; @@ -2901,9 +2900,10 @@ static void pcnet32_check_media(struct net_device *dev, int verbose) * Could possibly be changed to use mii_check_media instead. */ -static void pcnet32_watchdog(struct net_device *dev) +static void pcnet32_watchdog(struct timer_list *t) { - struct pcnet32_private *lp = netdev_priv(dev); + struct pcnet32_private *lp = from_timer(lp, t, watchdog_timer); + struct net_device *dev = lp->dev; unsigned long flags; /* Print the link status if it has changed */ diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 0183ffb9d3ba..cdd7a611479b 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1248,9 +1248,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1459,8 +1460,7 @@ no_link_test: * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - setup_timer(&lp->multicast_timer, lance_set_multicast_retry, - (unsigned long)dev); + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); if (register_netdev(dev)) { printk(KERN_ERR "SunLance: Cannot register device.\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 608693d11bd7..3d53153ce751 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -642,9 +642,9 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data) return IRQ_HANDLED; } -static void xgbe_tx_timer(unsigned long data) +static void xgbe_tx_timer(struct timer_list *t) { - struct xgbe_channel *channel = (struct xgbe_channel *)data; + struct xgbe_channel *channel = from_timer(channel, t, tx_timer); struct xgbe_prv_data *pdata = channel->pdata; struct napi_struct *napi; @@ -680,9 +680,9 @@ static void xgbe_service(struct work_struct *work) pdata->phy_if.phy_status(pdata); } -static void xgbe_service_timer(unsigned long data) +static void xgbe_service_timer(struct timer_list *t) { - struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); queue_work(pdata->dev_workqueue, &pdata->service_work); @@ -694,16 +694,14 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata) struct xgbe_channel *channel; unsigned int i; - setup_timer(&pdata->service_timer, xgbe_service_timer, - (unsigned long)pdata); + timer_setup(&pdata->service_timer, xgbe_service_timer, 0); for (i = 0; i < pdata->channel_count; i++) { channel = pdata->channel[i]; if (!channel->tx_ring) break; - setup_timer(&channel->tx_timer, xgbe_tx_timer, - (unsigned long)channel); + timer_setup(&channel->tx_timer, xgbe_tx_timer, 0); } } From 70a42ac1c2487edf06a2dcf06812622ffd825cbc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:54:45 -0700 Subject: [PATCH 1537/2177] drivers/net: appletalk/cops: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Allen Pais Cc: "David S. Miller" Cc: David Howells Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/appletalk/cops.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index caf04284711a..bb49f6e40a19 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -155,6 +155,7 @@ static int cops_irqlist[] = { }; static struct timer_list cops_timer; +static struct net_device *cops_timer_dev; /* use 0 for production, 1 for verification, 2 for debug, 3 for verbose debug */ #ifndef COPS_DEBUG @@ -187,7 +188,7 @@ static void cops_load (struct net_device *dev); static int cops_nodeid (struct net_device *dev, int nodeid); static irqreturn_t cops_interrupt (int irq, void *dev_id); -static void cops_poll (unsigned long ltdev); +static void cops_poll(struct timer_list *t); static void cops_timeout(struct net_device *dev); static void cops_rx (struct net_device *dev); static netdev_tx_t cops_send_packet (struct sk_buff *skb, @@ -424,7 +425,8 @@ static int cops_open(struct net_device *dev) */ if(lp->board==TANGENT) /* Poll 20 times per second */ { - setup_timer(&cops_timer, cops_poll, (unsigned long)dev); + cops_timer_dev = dev; + timer_setup(&cops_timer, cops_poll, 0); cops_timer.expires = jiffies + HZ/20; add_timer(&cops_timer); } @@ -671,12 +673,11 @@ static int cops_nodeid (struct net_device *dev, int nodeid) * Poll the Tangent type cards to see if we have work. */ -static void cops_poll(unsigned long ltdev) +static void cops_poll(struct timer_list *unused) { int ioaddr, status; int boguscount = 0; - - struct net_device *dev = (struct net_device *)ltdev; + struct net_device *dev = cops_timer_dev; del_timer(&cops_timer); From 0e23daeb640773adf5528e5e08e7cb81fc12775d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:54:53 -0700 Subject: [PATCH 1538/2177] drivers/net: chelsio/cxgb*: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Santosh Raspatur Cc: Ganesh Goudar Cc: Casey Leedom Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 12 ++++++------ drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 7 +++---- drivers/net/ethernet/chelsio/cxgb4/sge.c | 12 ++++++------ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 12 ++++++------ 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e2d342647b19..e3d28ae75360 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2853,9 +2853,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter) * bother cleaning them up here. * */ -static void sge_timer_tx(unsigned long data) +static void sge_timer_tx(struct timer_list *t) { - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; @@ -2893,10 +2893,10 @@ static void sge_timer_tx(unsigned long data) * starved. * */ -static void sge_timer_rx(unsigned long data) +static void sge_timer_rx(struct timer_list *t) { spinlock_t *lock; - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; u32 status; @@ -2976,8 +2976,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, struct sge_qset *q = &adapter->sge.qs[id]; init_qset_cntxt(q, id); - setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q); - setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q); + timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0); + timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0); q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, sizeof(struct rx_desc), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 9b6aabe4f963..614db014ef18 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -741,9 +741,9 @@ err: return ret; } -static void ch_flower_stats_cb(unsigned long data) +static void ch_flower_stats_cb(struct timer_list *t) { - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, flower_stats_timer); struct ch_tc_flower_entry *flower_entry; struct ch_tc_flower_stats *ofld_stats; unsigned int i; @@ -815,8 +815,7 @@ err: void cxgb4_init_tc_flower(struct adapter *adap) { hash_init(adap->flower_anymatch_tbl); - setup_timer(&adap->flower_stats_timer, ch_flower_stats_cb, - (unsigned long)adap); + timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 4ef68f69b58c..486b01fe23bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2583,11 +2583,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap) return t4_intr_intx; } -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.rx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -2620,11 +2620,11 @@ done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i, budget; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -3458,8 +3458,8 @@ int t4_sge_init(struct adapter *adap) /* Set up timers used for recuring callbacks to process RX and TX * administrative tasks. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); spin_lock_init(&s->intrq_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 05498e7f2840..14d7e673c656 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2058,9 +2058,9 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter) * when out of memory a queue can become empty. We schedule NAPI to do * the actual refill. */ -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.rx_timer); struct sge *s = &adapter->sge; unsigned int i; @@ -2117,9 +2117,9 @@ static void sge_rx_timer_cb(unsigned long data) * when no new packets are being submitted. This is essential for pktgen, * at least. */ -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.tx_timer); struct sge *s = &adapter->sge; unsigned int i, budget; @@ -2676,8 +2676,8 @@ int t4vf_sge_init(struct adapter *adapter) /* * Set up tasklet timers. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); /* * Initialize Forwarded Interrupt Queue lock. From 9cb618c295016966b8781a57616b07d8b4d9cb21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:54:59 -0700 Subject: [PATCH 1539/2177] drivers/net: dlink: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Denis Kirjanov Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Denis Kirjanov Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 6ca9e981ad57..1a27176381fb 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -913,7 +913,7 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = jiffies + 3*HZ; add_timer(&np->timer); @@ -951,10 +951,10 @@ static void check_duplex(struct net_device *dev) } } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii_if.dev; void __iomem *ioaddr = np->base; int next_tick = 10*HZ; From 8b3718dc2c3cf3043f474e067c374546ba17c403 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:07 -0700 Subject: [PATCH 1540/2177] drivers/net: fealnx: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: "yuval.shaia@oracle.com" Cc: Allen Pais Cc: Stephen Hemminger Cc: Philippe Reynes Cc: Johannes Berg Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/fealnx.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c8982313d850..23053919ebf5 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int netdev_open(struct net_device *dev); static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); -static void netdev_timer(unsigned long data); -static void reset_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); +static void reset_timer(struct timer_list *t); static void fealnx_tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -909,13 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = RUN_AT(3 * HZ); /* timer handler */ add_timer(&np->timer); - setup_timer(&np->reset_timer, reset_timer, (unsigned long)dev); + timer_setup(&np->reset_timer, reset_timer, 0); np->reset_timer_armed = 0; return rc; } @@ -1078,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev) } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii.dev; void __iomem *ioaddr = np->mem; int old_crvalue = np->crvalue; unsigned int old_linkok = np->linkok; @@ -1167,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev) } -static void reset_timer(unsigned long data) +static void reset_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, reset_timer); + struct net_device *dev = np->mii.dev; unsigned long flags; printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name); From 34309b36e4f1ab53fcc696275c1fa2849bc80709 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:13 -0700 Subject: [PATCH 1541/2177] drivers/net: korina: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Roman Yeryomin Cc: Florian Fainelli Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 7cecd9dbc111..ae195f8adff5 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -653,10 +653,10 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) &lp->eth_regs->ethmac2); } -static void korina_poll_media(unsigned long data) +static void korina_poll_media(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct korina_private *lp = netdev_priv(dev); + struct korina_private *lp = from_timer(lp, t, media_check_timer); + struct net_device *dev = lp->dev; korina_check_media(dev, 0); mod_timer(&lp->media_check_timer, jiffies + HZ); @@ -1103,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev) ": cannot register net device: %d\n", rc); goto probe_err_register; } - setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev); + timer_setup(&lp->media_check_timer, korina_poll_media, 0); INIT_WORK(&lp->restart_task, korina_restart_task); From 0365b047dea70ae931f99594bf8e5976ffec7fae Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:20 -0700 Subject: [PATCH 1542/2177] drivers/net: mellanox: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Saeed Mahameed Cc: Matan Barak Cc: Leon Romanovsky Cc: netdev@vger.kernel.org Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a89a68ce53ad..185dcac0abe7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&health->wq_lock, flags); } -static void poll_health(unsigned long data) +static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; u32 count; @@ -320,7 +320,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - setup_timer(&health->timer, poll_health, (unsigned long)dev); + timer_setup(&health->timer, poll_health, 0); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); From 15735c9d8a36cadb23ac5e9e29ea083f517767e4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:27 -0700 Subject: [PATCH 1543/2177] drivers/net: natsemi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Allen Pais Cc: Eric Dumazet Cc: Philippe Reynes Cc: Wei Yongjun Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 10 +++++----- drivers/net/ethernet/natsemi/ns83820.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index dedeacd0bbca..b9a1a9f999ea 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev); static void do_cable_magic(struct net_device *dev); static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); static void ns_tx_timeout(struct net_device *dev); static int alloc_ring(struct net_device *dev); @@ -1571,7 +1571,7 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); add_timer(&np->timer); @@ -1787,10 +1787,10 @@ static void init_registers(struct net_device *dev) * this check via dspcfg_workaround sysfs option. * 3) check of death of the RX path due to OOM */ -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->dev; void __iomem * ioaddr = ns_ioaddr(dev); int next_tick = NATSEMI_TIMER_FREQ; const int irq = np->pci_dev->irq; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 99d3c7884a4a..958fced4dacf 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev) spin_unlock_irqrestore(&dev->tx_lock, flags); } -static void ns83820_tx_watch(unsigned long data) +static void ns83820_tx_watch(struct timer_list *t) { - struct net_device *ndev = (void *)data; - struct ns83820 *dev = PRIV(ndev); + struct ns83820 *dev = from_timer(dev, t, tx_watchdog); + struct net_device *ndev = dev->ndev; #if defined(DEBUG) printk("ns83820_tx_watch: %u %u %d\n", @@ -1652,7 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - setup_timer(&dev->tx_watchdog, ns83820_tx_watch, (unsigned long)ndev); + timer_setup(&dev->tx_watchdog, ns83820_tx_watch, 0); mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev); /* FIXME: wait for phy to come up */ From 8089c6f4777f394407d63e217314f934b85a7947 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:34 -0700 Subject: [PATCH 1544/2177] drivers/net: packetengines: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Allen Pais Cc: yuan linyu Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/hamachi.c | 14 +++++++------- drivers/net/ethernet/packetengines/yellowfin.c | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 77bc7cca8980..c9529c29a0a7 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -413,13 +413,13 @@ that case. /* The rest of these values should never change. */ -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); enum capability_flags {CanHaveMII=1, }; static const struct chip_info { u16 vendor_id, device_id, device_id_mask, pad; const char *name; - void (*media_timer)(unsigned long data); + void (*media_timer)(struct timer_list *t); int flags; } chip_tbl[] = { {0x1318, 0x0911, 0xffff, 0, "Hamachi GNIC-II", hamachi_timer, 0}, @@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); static void hamachi_tx_timeout(struct net_device *dev); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, @@ -979,7 +979,7 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - setup_timer(&hmp->timer, hamachi_timer, (unsigned long)dev); + timer_setup(&hmp->timer, hamachi_timer, 0); hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ add_timer(&hmp->timer); @@ -1017,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev) return 0; } -static void hamachi_timer(unsigned long data) +static void hamachi_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct hamachi_private *hmp = netdev_priv(dev); + struct hamachi_private *hmp = from_timer(hmp, t, timer); + struct net_device *dev = hmp->mii_if.dev; void __iomem *ioaddr = hmp->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 33c241f52a71..54224d1822e3 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int location); static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); -static void yellowfin_timer(unsigned long data); +static void yellowfin_timer(struct timer_list *t); static void yellowfin_tx_timeout(struct net_device *dev); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, @@ -632,7 +632,7 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - setup_timer(&yp->timer, yellowfin_timer, (unsigned long)dev); + timer_setup(&yp->timer, yellowfin_timer, 0); yp->timer.expires = jiffies + 3*HZ; add_timer(&yp->timer); out: @@ -643,10 +643,10 @@ err_free_irq: goto out; } -static void yellowfin_timer(unsigned long data) +static void yellowfin_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct yellowfin_private *yp = netdev_priv(dev); + struct yellowfin_private *yp = from_timer(yp, t, timer); + struct net_device *dev = pci_get_drvdata(yp->pci_dev); void __iomem *ioaddr = yp->base; int next_tick = 60*HZ; From 267146d44718771aa0b375e78b33c81d137db09d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Oct 2017 22:55:42 -0700 Subject: [PATCH 1545/2177] drivers/net: smsc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: "yuval.shaia@oracle.com" Cc: Eric Dumazet Cc: Philippe Reynes Cc: Allen Pais Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/epic100.c | 10 +++++----- drivers/net/ethernet/smsc/smc91c92_cs.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 2a9724898fcf..949aaef390b6 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); -static void epic_timer(unsigned long data); +static void epic_timer(struct timer_list *t); static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, @@ -739,7 +739,7 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - setup_timer(&ep->timer, epic_timer, (unsigned long)dev); + timer_setup(&ep->timer, epic_timer, 0); ep->timer.expires = jiffies + 3*HZ; add_timer(&ep->timer); @@ -843,10 +843,10 @@ static void check_media(struct net_device *dev) } } -static void epic_timer(unsigned long data) +static void epic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct epic_private *ep = netdev_priv(dev); + struct epic_private *ep = from_timer(ep, t, timer); + struct net_device *dev = ep->mii.dev; void __iomem *ioaddr = ep->ioaddr; int next_tick = 5*HZ; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 92c927aec66d..a55f430f6a7b 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev); static int s9k_config(struct net_device *dev, struct ifmap *map); static void smc_set_xcvr(struct net_device *dev, int if_port); static void smc_reset(struct net_device *dev); -static void media_check(u_long arg); +static void media_check(struct timer_list *t); static void mdio_sync(unsigned int addr); static int mdio_read(struct net_device *dev, int phy_id, int loc); static void mdio_write(struct net_device *dev, int phy_id, int loc, int value); @@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); - setup_timer(&smc->media, media_check, (u_long)dev); + timer_setup(&smc->media, media_check, 0); mod_timer(&smc->media, jiffies + HZ); return 0; @@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev) ======================================================================*/ -static void media_check(u_long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct smc_private *smc = netdev_priv(dev); + struct smc_private *smc = from_timer(smc, t, media); + struct net_device *dev = smc->mii_if.dev; unsigned int ioaddr = dev->base_addr; u_short i, media, saved_bank; u_short link; From ec36e416f06f6a8659281053fdc46ce484ad2211 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:21 -0700 Subject: [PATCH 1546/2177] tcp: Namespace-ify sysctl_tcp_nometrics_save Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_metrics.c | 4 +--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f4622e28db3a..9606e2ea1f14 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -142,6 +142,7 @@ struct netns_ipv4 { int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; + int sysctl_tcp_nometrics_save; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 18f047501f53..6ab7fa4154b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f1bcb9b7e082..b742a5e26a9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -451,13 +451,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_no_metrics_save", - .data = &sysctl_tcp_nometrics_save, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_moderate_rcvbuf", .data = &sysctl_tcp_moderate_rcvbuf, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_no_metrics_save", + .data = &init_net.ipv4.sysctl_tcp_nometrics_save, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0ab78abc811b..0507b56b6d4b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -20,8 +20,6 @@ #include #include -int sysctl_tcp_nometrics_save __read_mostly; - static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, const struct inetpeer_addr *daddr, struct net *net, unsigned int hash); @@ -330,7 +328,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (sysctl_tcp_nometrics_save || !dst) + if (net->ipv4.sysctl_tcp_nometrics_save || !dst) return; rcu_read_lock(); From 4540c0cf98b8892a642d2453eec20ae3eb5696fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:22 -0700 Subject: [PATCH 1547/2177] tcp: Namespace-ify sysctl_tcp_moderate_rcvbuf Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9606e2ea1f14..4458a54fe3f4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -143,6 +143,7 @@ struct netns_ipv4 { int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; + int sysctl_tcp_moderate_rcvbuf; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ab7fa4154b2..f954e74578ff 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b742a5e26a9d..2ebe87fd1169 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -451,13 +451,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_moderate_rcvbuf", - .data = &sysctl_tcp_moderate_rcvbuf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_tso_win_divisor", .data = &sysctl_tcp_tso_win_divisor, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_moderate_rcvbuf", + .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 90d76f1c8f96..ce481325115f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -84,7 +84,6 @@ int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; -int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -411,7 +410,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency * Allow enough cushion so that sender is not limited by our window */ - if (sysctl_tcp_moderate_rcvbuf) + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) rcvmem <<= 2; if (sk->sk_rcvbuf < rcvmem) @@ -602,7 +601,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (sysctl_tcp_moderate_rcvbuf && + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvwin, rcvmem, rcvbuf; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 49757c758211..27f376b90913 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2493,6 +2493,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_frto = 2; + net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From d06a99045837d3f4d5431793c4c390b0daf2a08d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:23 -0700 Subject: [PATCH 1548/2177] tcp: Namespace-ify sysctl_tcp_tso_win_divisor Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 5 +++++ net/ipv4/tcp_output.c | 8 +------- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 4458a54fe3f4..60bccda046db 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -144,6 +144,7 @@ struct netns_ipv4 { int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf; + int sysctl_tcp_tso_win_divisor; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index f954e74578ff..ed0828dc82f1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2ebe87fd1169..a053cacb8290 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -451,13 +451,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_tso_win_divisor", - .data = &sysctl_tcp_tso_win_divisor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_congestion_control", .mode = 0644, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tso_win_divisor", + .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27f376b90913..284ff16148df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2494,6 +2494,11 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; + /* This limits the percentage of the congestion window which we + * will allow a single TSO frame to consume. Building TSO frames + * which are too large can cause TCP streams to be bursty. + */ + net->ipv4.sysctl_tcp_tso_win_divisor = 3; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 55a0aa4b96df..60df3ab52166 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -53,12 +53,6 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; /* Default TSQ limit of four TSO segments */ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; -/* This limits the percentage of the congestion window which we - * will allow a single TSO frame to consume. Building TSO frames - * which are too large can cause TCP streams to be bursty. - */ -int sysctl_tcp_tso_win_divisor __read_mostly = 3; - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -1988,7 +1982,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); From ceef9ab6be7234f9e49f79769e0da88d1dccfcc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:24 -0700 Subject: [PATCH 1549/2177] tcp: Namespace-ify sysctl_tcp_workaround_signed_windows Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 4 ++-- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 14 +++++--------- net/ipv6/syncookies.c | 2 +- 7 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 60bccda046db..e74c7c1b0d18 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -145,6 +145,7 @@ struct netns_ipv4 { int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; + int sysctl_tcp_workaround_signed_windows; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index ed0828dc82f1..e338e16178dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_workaround_signed_windows; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ @@ -1302,7 +1301,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) } /* Determine a window scaling and initial window to offer. */ -void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, +void tcp_select_initial_window(const struct sock *sk, int __space, + __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 77cf32a80952..fda37f2862c9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a053cacb8290..3ae9012a4979 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -457,13 +457,6 @@ static struct ctl_table ipv4_table[] = { .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, - { - .procname = "tcp_workaround_signed_windows", - .data = &sysctl_tcp_workaround_signed_windows, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_limit_output_bytes", .data = &sysctl_tcp_limit_output_bytes, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_workaround_signed_windows", + .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3270ab8416ce..3c65c1a3f944 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -369,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, full_space = rcv_wnd * mss; /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(full_space, + tcp_select_initial_window(sk_listener, full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rsk_rcv_wnd, &req->rsk_window_clamp, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 60df3ab52166..5bbed67c27e9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,11 +45,6 @@ #include -/* People can turn this on to work with those rare, broken TCPs that - * interpret the window field as a signed quantity. - */ -int sysctl_tcp_workaround_signed_windows __read_mostly = 0; - /* Default TSQ limit of four TSO segments */ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; @@ -196,7 +191,7 @@ u32 tcp_default_init_rwnd(u32 mss) * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */ -void tcp_select_initial_window(int __space, __u32 mss, +void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) @@ -220,7 +215,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sysctl_tcp_workaround_signed_windows) + if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = space; @@ -280,7 +275,8 @@ static u16 tcp_select_window(struct sock *sk) /* Make sure we do not exceed the maximum possible * scaled window. */ - if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) + if (!tp->rx_opt.rcv_wscale && + sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -3349,7 +3345,7 @@ static void tcp_connect_init(struct sock *sk) if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); - tcp_select_initial_window(tcp_full_space(sk), + tcp_select_initial_window(sk, tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4e7817abc0b9..e7a3a6b6cf56 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); From 9184d8bb448a3d2c2d9f90f1e2f5de625292e769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:25 -0700 Subject: [PATCH 1550/2177] tcp: Namespace-ify sysctl_tcp_limit_output_bytes Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 6 ++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e74c7c1b0d18..e98f473bab13 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -146,6 +146,7 @@ struct netns_ipv4 { int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; + int sysctl_tcp_limit_output_bytes; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index e338e16178dd..33f9d30a6905 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3ae9012a4979..6caf5c40730f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -457,13 +457,6 @@ static struct ctl_table ipv4_table[] = { .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, - { - .procname = "tcp_limit_output_bytes", - .data = &sysctl_tcp_limit_output_bytes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_challenge_ack_limit", .data = &sysctl_tcp_challenge_ack_limit, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_limit_output_bytes", + .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 284ff16148df..713b80261e4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2499,6 +2499,8 @@ static int __net_init tcp_sk_init(struct net *net) * which are too large can cause TCP streams to be bursty. */ net->ipv4.sysctl_tcp_tso_win_divisor = 3; + /* Default TSQ limit of four TSO segments */ + net->ipv4.sysctl_tcp_limit_output_bytes = 262144; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5bbed67c27e9..f018892c6a98 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,9 +45,6 @@ #include -/* Default TSQ limit of four TSO segments */ -int sysctl_tcp_limit_output_bytes __read_mostly = 262144; - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -2215,7 +2212,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, unsigned int limit; limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit = min_t(u32, limit, + sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; if (refcount_read(&sk->sk_wmem_alloc) > limit) { From b530b68148301d73775cd27cc136ce4dd5738ae8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:26 -0700 Subject: [PATCH 1551/2177] tcp: Namespace-ify sysctl_tcp_challenge_ack_limit Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 14 ++++++-------- net/ipv4/tcp_ipv4.c | 2 ++ 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e98f473bab13..e9895d40868e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -147,6 +147,7 @@ struct netns_ipv4 { int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_challenge_ack_limit; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 33f9d30a6905..afc23596e9aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6caf5c40730f..e28b3b7a7bbc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -457,13 +457,6 @@ static struct ctl_table ipv4_table[] = { .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, - { - .procname = "tcp_challenge_ack_limit", - .data = &sysctl_tcp_challenge_ack_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_challenge_ack_limit", + .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce481325115f..928048a4e2c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,9 +79,6 @@ #include #include -/* rfc5961 challenge ack rate limiting */ -int sysctl_tcp_challenge_ack_limit = 1000; - int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; @@ -3443,10 +3440,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static u32 challenge_timestamp; static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); u32 count, now; /* First check our per-socket dupack rate limit. */ - if (__tcp_oow_rate_limited(sock_net(sk), + if (__tcp_oow_rate_limited(net, LINUX_MIB_TCPACKSKIPPEDCHALLENGE, &tp->last_oow_ack_time)) return; @@ -3454,16 +3452,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; + u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; - WRITE_ONCE(challenge_count, half + - prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); if (count > 0) { WRITE_ONCE(challenge_count, count - 1); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 713b80261e4f..50ab3a3eced3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2501,6 +2501,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tso_win_divisor = 3; /* Default TSQ limit of four TSO segments */ net->ipv4.sysctl_tcp_limit_output_bytes = 262144; + /* rfc5961 challenge ack rate limiting */ + net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 26e9596e5b8f11025b57b12e7265df649129ab00 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:27 -0700 Subject: [PATCH 1552/2177] tcp: Namespace-ify sysctl_tcp_min_tso_segs Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp.c | 2 -- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 ++- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e9895d40868e..a2da3e19a977 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -148,6 +148,7 @@ struct netns_ipv4 { int sysctl_tcp_workaround_signed_windows; int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; + int sysctl_tcp_min_tso_segs; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index afc23596e9aa..0735303a6575 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e28b3b7a7bbc..00b4aea3705b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -499,15 +499,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, - { - .procname = "tcp_min_tso_segs", - .data = &sysctl_tcp_min_tso_segs, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &gso_max_segs, - }, { .procname = "tcp_pacing_ss_ratio", .data = &sysctl_tcp_pacing_ss_ratio, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_min_tso_segs", + .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &gso_max_segs, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c7c983f0f817..a01c97708d83 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,8 +285,6 @@ #include -int sysctl_tcp_min_tso_segs __read_mostly = 2; - int sysctl_tcp_autocorking __read_mostly = 1; struct percpu_counter tcp_orphan_count; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 50ab3a3eced3..6192f26145d3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2503,6 +2503,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_limit_output_bytes = 262144; /* rfc5961 challenge ack rate limiting */ net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f018892c6a98..aab6e7145013 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1758,7 +1758,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; return tso_segs ? : - tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ From bd239704295c66196e6b77c5717ec4aec076ddd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:28 -0700 Subject: [PATCH 1553/2177] tcp: Namespace-ify sysctl_tcp_min_rtt_wlen Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a2da3e19a977..1a66af8a0d32 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -149,6 +149,7 @@ struct netns_ipv4 { int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_tso_segs; + int sysctl_tcp_min_rtt_wlen; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0735303a6575..56f50c9a3e6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 00b4aea3705b..029692d2e4ae 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_min_rtt_wlen", - .data = &sysctl_tcp_min_rtt_wlen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &one, .extra2 = &gso_max_segs, }, + { + .procname = "tcp_min_rtt_wlen", + .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 928048a4e2c5..da1ef666d1f9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -80,7 +80,6 @@ #include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -2915,8 +2914,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { + u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; struct tcp_sock *tp = tcp_sk(sk); - u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6192f26145d3..ced35af5737a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2504,6 +2504,7 @@ static int __net_init tcp_sk_init(struct net *net) /* rfc5961 challenge ack rate limiting */ net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; net->ipv4.sysctl_tcp_min_tso_segs = 2; + net->ipv4.sysctl_tcp_min_rtt_wlen = 300; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 790f00e19f65673c3c169dfc137c09a9236847d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:29 -0700 Subject: [PATCH 1554/2177] tcp: Namespace-ify sysctl_tcp_autocorking Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp.c | 4 +--- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1a66af8a0d32..537830882149 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -150,6 +150,7 @@ struct netns_ipv4 { int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_tso_segs; int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_autocorking; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 56f50c9a3e6a..0268f1025d9d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 029692d2e4ae..43a18a317053 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -510,15 +510,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &zero, .extra2 = &thousand, }, - { - .procname = "tcp_autocorking", - .data = &sysctl_tcp_autocorking, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, { .procname = "tcp_invalid_ratelimit", .data = &sysctl_tcp_invalid_ratelimit, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_autocorking", + .data = &init_net.ipv4.sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a01c97708d83..a7a0f316eb86 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,8 +285,6 @@ #include -int sysctl_tcp_autocorking __read_mostly = 1; - struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -697,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sysctl_tcp_autocorking && + sock_net(sk)->ipv4.sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ced35af5737a..351e3497c8f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2505,6 +2505,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_min_rtt_wlen = 300; + net->ipv4.sysctl_tcp_autocorking = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 4170ba6b589ced82da56c7e4f71cc84b2be036d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:30 -0700 Subject: [PATCH 1555/2177] tcp: Namespace-ify sysctl_tcp_invalid_ratelimit Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 537830882149..e52c2124b32e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -151,6 +151,7 @@ struct netns_ipv4 { int sysctl_tcp_min_tso_segs; int sysctl_tcp_min_rtt_wlen; int sysctl_tcp_autocorking; + int sysctl_tcp_invalid_ratelimit; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0268f1025d9d..5869a822ecb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 43a18a317053..6a9349c27f00 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -510,13 +510,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &zero, .extra2 = &thousand, }, - { - .procname = "tcp_invalid_ratelimit", - .data = &sysctl_tcp_invalid_ratelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "tcp_invalid_ratelimit", + .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index da1ef666d1f9..db4d458d0205 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -80,7 +80,6 @@ #include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -3403,7 +3402,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 351e3497c8f3..6617aae18ba2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2506,6 +2506,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_min_rtt_wlen = 300; net->ipv4.sysctl_tcp_autocorking = 1; + net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 23a7102a2d1068508fa2a0ce593a0df7f8fdc0ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:31 -0700 Subject: [PATCH 1556/2177] tcp: Namespace-ify sysctl_tcp_pacing_ss_ratio Also remove an obsolete comment about TCP pacing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp_input.c | 9 +-------- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e52c2124b32e..eb2dcf1cbe61 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -152,6 +152,7 @@ struct netns_ipv4 { int sysctl_tcp_min_rtt_wlen; int sysctl_tcp_autocorking; int sysctl_tcp_invalid_ratelimit; + int sysctl_tcp_pacing_ss_ratio; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5869a822ecb1..2a5f8261ca03 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,7 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6a9349c27f00..7f0dba852d47 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -492,15 +492,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, - { - .procname = "tcp_pacing_ss_ratio", - .data = &sysctl_tcp_pacing_ss_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, { .procname = "tcp_pacing_ca_ratio", .data = &sysctl_tcp_pacing_ca_ratio, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, + { + .procname = "tcp_pacing_ss_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index db4d458d0205..29539d39e61a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -767,13 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->srtt_us = max(1U, srtt); } -/* Set the sk_pacing_rate to allow proper sizing of TSO packets. - * Note: TCP stack does not yet implement pacing. - * FQ packet scheduler can be used to implement cheap but effective - * TCP pacing, to smooth the burst on large writes when packets - * in flight is significantly lower than cwnd (or rwin) - */ -int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; static void tcp_update_pacing_rate(struct sock *sk) @@ -793,7 +786,7 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tp->snd_cwnd < tp->snd_ssthresh / 2) - rate *= sysctl_tcp_pacing_ss_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else rate *= sysctl_tcp_pacing_ca_ratio; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6617aae18ba2..1d8fc663af51 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2507,6 +2507,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_min_rtt_wlen = 300; net->ipv4.sysctl_tcp_autocorking = 1; net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; + net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From c26e91f8b9b8e1fd252e07c1f60e50220cd7ebab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Oct 2017 07:47:32 -0700 Subject: [PATCH 1557/2177] tcp: Namespace-ify sysctl_tcp_pacing_ca_ratio Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp_input.c | 4 +--- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index eb2dcf1cbe61..141ba82b5efb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -153,6 +153,7 @@ struct netns_ipv4 { int sysctl_tcp_autocorking; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; + int sysctl_tcp_pacing_ca_ratio; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2a5f8261ca03..092d606fcc16 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -250,8 +250,6 @@ extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_pacing_ca_ratio; - extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7f0dba852d47..4602af6d5358 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -492,15 +492,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, - { - .procname = "tcp_pacing_ca_ratio", - .data = &sysctl_tcp_pacing_ca_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &thousand, }, + { + .procname = "tcp_pacing_ca_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 29539d39e61a..21c358c0cf2e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -767,8 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->srtt_us = max(1U, srtt); } -int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; - static void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -788,7 +786,7 @@ static void tcp_update_pacing_rate(struct sock *sk) if (tp->snd_cwnd < tp->snd_ssthresh / 2) rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else - rate *= sysctl_tcp_pacing_ca_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; rate *= max(tp->snd_cwnd, tp->packets_out); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8fc663af51..7c1dae6493c3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2508,6 +2508,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_autocorking = 1; net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; + net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); From 5b52a4c3acf5f4b4854d1c3ddc8be8770330a79c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 27 Oct 2017 10:01:39 -0700 Subject: [PATCH 1558/2177] tcp: remove unnecessary include two extra #include are not necessary in tcp.h Remove them. Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 092d606fcc16..aa1cc90fdc02 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,9 +45,6 @@ #include #include - -#include -#include #include extern struct inet_hashinfo tcp_hashinfo; From e19b42a1a0669ed5b8009930c5269a5a87cc363c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 27 Oct 2017 13:19:36 +0300 Subject: [PATCH 1559/2177] bridge: netlink: make setlink/dellink notifications more accurate Before this patch we had cases that either sent notifications when there were in fact no changes (e.g. non-existent vlan delete) or didn't send notifications when there were changes (e.g. vlan add range with an error in the middle, port flags change + vlan update error). This patch sends down a boolean to the functions setlink/dellink use and if there is even a single configuration change (port flag, vlan add/del, port state) then we always send a notification. This is all done to keep backwards compatibility with the opportunistic vlan delete, where one could specify a vlan range that has missing vlans inside and still everything in that range will be cleared, this is mostly used to clear the whole vlan config with a single call, i.e. range 1-4094. Signed-off-by: Nikolay Aleksandrov Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 44 ++++++++++++++++++++-------------- net/bridge/br_netlink_tunnel.c | 14 +++++++---- net/bridge/br_private_tunnel.h | 3 ++- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fb61b6c79235..d0290ede9342 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -506,7 +506,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, } static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, - int cmd, struct bridge_vlan_info *vinfo) + int cmd, struct bridge_vlan_info *vinfo, bool *changed) { int err = 0; @@ -517,21 +517,24 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, * per-VLAN entry as well */ err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); - if (err) - break; } else { vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; err = br_vlan_add(br, vinfo->vid, vinfo->flags); } + if (!err) + *changed = true; break; case RTM_DELLINK: if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else { - br_vlan_delete(br, vinfo->vid); + if (!nbp_vlan_delete(p, vinfo->vid)) + *changed = true; + + if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) && + !br_vlan_delete(p->br, vinfo->vid)) + *changed = true; + } else if (!br_vlan_delete(br, vinfo->vid)) { + *changed = true; } break; } @@ -542,7 +545,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, static int br_process_vlan_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct bridge_vlan_info *vinfo_curr, - struct bridge_vlan_info **vinfo_last) + struct bridge_vlan_info **vinfo_last, + bool *changed) { if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) return -EINVAL; @@ -572,7 +576,7 @@ static int br_process_vlan_info(struct net_bridge *br, sizeof(struct bridge_vlan_info)); for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { tmp_vinfo.vid = v; - err = br_vlan_info(br, p, cmd, &tmp_vinfo); + err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed); if (err) break; } @@ -581,13 +585,13 @@ static int br_process_vlan_info(struct net_bridge *br, return err; } - return br_vlan_info(br, p, cmd, vinfo_curr); + return br_vlan_info(br, p, cmd, vinfo_curr, changed); } static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, - int cmd) + int cmd, bool *changed) { struct bridge_vlan_info *vinfo_curr = NULL; struct bridge_vlan_info *vinfo_last = NULL; @@ -607,7 +611,8 @@ static int br_afspec(struct net_bridge *br, return err; err = br_process_vlan_tunnel_info(br, p, cmd, &tinfo_curr, - &tinfo_last); + &tinfo_last, + changed); if (err) return err; break; @@ -616,7 +621,7 @@ static int br_afspec(struct net_bridge *br, return -EINVAL; vinfo_curr = nla_data(attr); err = br_process_vlan_info(br, p, cmd, vinfo_curr, - &vinfo_last); + &vinfo_last, changed); if (err) return err; break; @@ -804,6 +809,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + bool changed = false; int err = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); @@ -839,14 +845,15 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) } if (err) goto out; + changed = true; } if (afspec) { err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_SETLINK); + afspec, RTM_SETLINK, &changed); } - if (err == 0) + if (changed) br_ifinfo_notify(RTM_NEWLINK, p); out: return err; @@ -857,6 +864,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; + bool changed = false; int err = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -869,8 +877,8 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) return -EINVAL; err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_DELLINK); - if (err == 0) + afspec, RTM_DELLINK, &changed); + if (changed) /* Send RTM_NEWLINK because userspace * expects RTM_NEWLINK for vlan dels */ diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 3712c7f0e00c..da8cb99fd259 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + }; static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, - u16 vid, u32 tun_id) + u16 vid, u32 tun_id, bool *changed) { int err = 0; @@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, switch (cmd) { case RTM_SETLINK: err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + if (!err) + *changed = true; break; case RTM_DELLINK: - nbp_vlan_tunnel_info_delete(p, vid); + if (!nbp_vlan_tunnel_info_delete(p, vid)) + *changed = true; break; } @@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last) + struct vtunnel_info *tinfo_last, + bool *changed) { int err; @@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { - err = br_vlan_tunnel_info(p, cmd, v, t); + err = br_vlan_tunnel_info(p, cmd, v, t, changed); if (err) return err; t++; @@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, if (tinfo_last->flags) return -EINVAL; err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, - tinfo_curr->tunid); + tinfo_curr->tunid, changed); if (err) return err; memset(tinfo_last, 0, sizeof(struct vtunnel_info)); diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 4a447a378ab3..a259471bfd78 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last); + struct vtunnel_info *tinfo_last, + bool *changed); int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); int br_fill_vlan_tunnel_info(struct sk_buff *skb, struct net_bridge_vlan_group *vg); From f418af6343fbfaaa3306e7cf15906be4f20c69ff Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 27 Oct 2017 13:19:37 +0300 Subject: [PATCH 1560/2177] bridge: vlan: signal if anything changed on vlan add Before this patch there was no way to tell if the vlan add operation actually changed anything, thus we would always generate a notification on adds. Let's make the notifications more precise and generate them only if anything changed, so use the new bool parameter to signal that the vlan was updated. We cannot return an error because there are valid use cases that will be broken (e.g. overlapping range add) and also we can't risk masking errors due to calls into drivers for vlan add which can potentially return anything. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 9 +++-- net/bridge/br_private.h | 14 +++++--- net/bridge/br_vlan.c | 78 ++++++++++++++++++++++++++++++----------- 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d0290ede9342..e732403669c6 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -508,6 +508,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct bridge_vlan_info *vinfo, bool *changed) { + bool curr_change; int err = 0; switch (cmd) { @@ -516,12 +517,14 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, /* if the MASTER flag is set this will act on the global * per-VLAN entry as well */ - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags, + &curr_change); } else { vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; - err = br_vlan_add(br, vinfo->vid, vinfo->flags); + err = br_vlan_add(br, vinfo->vid, vinfo->flags, + &curr_change); } - if (!err) + if (curr_change) *changed = true; break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fa0039f44818..860e4afaf71a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -803,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); @@ -816,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port); @@ -903,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } @@ -926,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br) return 0; } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 233a30040c91..51935270c651 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); } -static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid == vid) - return; + return false; smp_wmb(); vg->pvid = vid; + + return true; } -static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid != vid) - return; + return false; smp_wmb(); vg->pvid = 0; + + return true; } -static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) +/* return true if anything changed, false otherwise */ +static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) { struct net_bridge_vlan_group *vg; + u16 old_flags = v->flags; + bool ret; if (br_vlan_is_master(v)) vg = br_vlan_group(v->br); @@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) vg = nbp_vlan_group(v->port); if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(vg, v->vid); + ret = __vlan_add_pvid(vg, v->vid); else - __vlan_delete_pvid(vg, v->vid); + ret = __vlan_delete_pvid(vg, v->vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; else v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; + + return ret || !!(old_flags ^ v->flags); } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, @@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid vg = br_vlan_group(br); masterv = br_vlan_find(vg, vid); if (!masterv) { + bool changed; + /* missing global ctx, create it now */ - if (br_vlan_add(br, vid, 0)) + if (br_vlan_add(br, vid, 0, &changed)) return NULL; masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) @@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) /* need to work on the master vlan too */ if (flags & BRIDGE_VLAN_INFO_MASTER) { - err = br_vlan_add(br, v->vid, flags | - BRIDGE_VLAN_INFO_BRENTRY); + bool changed; + + err = br_vlan_add(br, v->vid, + flags | BRIDGE_VLAN_INFO_BRENTRY, + &changed); if (err) goto out_filt; } @@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; @@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vg = br_vlan_group(br); vlan = br_vlan_find(vg, vid); if (vlan) { @@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) refcount_inc(&vlan->refcnt); vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; + *changed = true; } - __vlan_add_flags(vlan, flags); + if (__vlan_add_flags(vlan, flags)) + *changed = true; + return 0; } @@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (ret) { free_percpu(vlan->stats); kfree(vlan); + } else { + *changed = true; } return ret; @@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) const struct net_bridge_vlan *pvent; struct net_bridge_vlan_group *vg; struct net_bridge_port *p; + unsigned long *changed; + bool vlchange; u16 old_pvid; int err = 0; - unsigned long *changed; if (!pvid) { br_vlan_disable_default_pvid(br); @@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = br_vlan_add(br, pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); if (err) goto out; br_vlan_delete(br, old_pvid); @@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = nbp_vlan_add(p, pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); if (err) goto err_port; nbp_vlan_delete(p, old_pvid); @@ -890,7 +914,8 @@ err_port: if (old_pvid) nbp_vlan_add(p, old_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); nbp_vlan_delete(p, pvid); } @@ -899,7 +924,8 @@ err_port: br_vlan_add(br, old_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); br_vlan_delete(br, pvid); } goto out; @@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br) { struct net_bridge_vlan_group *vg; int ret = -ENOMEM; + bool changed; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) @@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br) rcu_assign_pointer(br->vlgrp, vg); ret = br_vlan_add(br, 1, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, &changed); if (ret) goto err_vlan_add; @@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p) INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { + bool changed; + ret = nbp_vlan_add(p, p->br->default_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &changed); if (ret) goto err_vlan_add; } @@ -1016,8 +1046,10 @@ err_vlan_enabled: /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { struct switchdev_obj_port_vlan v = { .obj.orig_dev = port->dev, @@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vlan = br_vlan_find(nbp_vlan_group(port), vid); if (vlan) { /* Pass the flags to the hardware bridge */ ret = switchdev_port_obj_add(port->dev, &v.obj); if (ret && ret != -EOPNOTSUPP) return ret; - __vlan_add_flags(vlan, flags); + *changed = __vlan_add_flags(vlan, flags); + return 0; } @@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ret = __vlan_add(vlan, flags); if (ret) kfree(vlan); + else + *changed = true; return ret; } From 509708310cf917a05fbceb41ad67da1416b81bd0 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 27 Oct 2017 13:24:49 +0300 Subject: [PATCH 1561/2177] r8169: Add support for interrupt coalesce tuning (ethtool -C) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kirr: In particular with ethtool -C rx-usecs 0 rx-frames 0 now it is possible to disable RX delays when NIC usage requires low-latency. See this thread for context: https://www.spinics.net/lists/netdev/msg217665.html My specific case is that: We have many computers with gigabit Realtek NICs. For 2 such computers connected to a gigabit store-and-forward switch the minimum round-trip time for small pings (`ping -i 0 -w 3 -s 56 -q peer`) is ~ 30μs. However it turned out that when Ethernet frame length transitions 127 -> 128 bytes (`ping -i 0 -w 3 -s {81 -> 82} -q peer`) the lowest RTT transitions step-wise to ~ 270μs. As David Light said this is RX interrupt mitigation done by NIC which creates the latency. For workloads when low-latency is required with e.g. Intel, BCM etc NIC drivers one just uses `ethtool -C rx-usecs ...` to reduce the time NIC delays before interrupting CPU, but it turned out `ethtool -C` is not supported by r8169 driver. Like Stéphane ANCELOT I've traced the problem down to IntrMitigate being hardcoded to != 0 for our chips (we have 8168 based NICs): https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/ethernet/realtek/r8169.c#n5460 static void rtl_hw_start_8169(struct net_device *dev) { ... /* * Undocumented corner. Supposedly: * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets */ RTL_W16(IntrMitigate, 0x0000); https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/ethernet/realtek/r8169.c#n6346 static void rtl_hw_start_8168(struct net_device *dev) { ... RTL_W16(IntrMitigate, 0x5151); and then I've also found https://www.spinics.net/lists/netdev/msg217665.html and original Francois' patch: https://www.spinics.net/lists/netdev/msg217984.html https://www.spinics.net/lists/netdev/msg218207.html So could we please finally get support for tuning r8169 interrupt coalescing in tree? (so that next poor soul who hits the problem does not need to go all the way to dig into driver sources and internet wildly and finally patch locally -RTL_W16(IntrMitigate, 0x5151); +RTL_W16(IntrMitigate, 0x5100); guessing whether it is right or not and also having to care to deploy the patch everywhere it needs to be used, etc...). To do so I've took original Francois's patch from 2012 and reworked it a bit: - updated to latest net-next.git; - adjusted scaling setup based on feedback from Hayes to pick up scaling vector depending not only on link speed but also on CPlusCmd[0:1] and to adjust CPlusCmd[0:1] correspondingly when setting timings; - improved a bit (I think so) error handling. I've tested the patch on "RTL8168d/8111d" (XID 083000c0) and with it and `ethtool -C rx-usecs 0 rx-frames 0` on both ends it improves: - minimum RTT latency: ~270μs -> ~30μs (small packet), ~330μs -> ~110μs (full 1.5K ethernet frame) - average RTT latency: ~480μs -> ~50μs (small packet), ~560μs -> ~125μs (full 1.5K ethernet frame) ( before: root@neo1:# ping -i 0 -w 3 -s 82 -q neo2 PING neo2.kirr.nexedi.com (192.168.102.21) 82(110) bytes of data. --- neo2.kirr.nexedi.com ping statistics --- 5906 packets transmitted, 5905 received, 0% packet loss, time 2999ms rtt min/avg/max/mdev = 0.274/0.485/0.607/0.026 ms, ipg/ewma 0.508/0.489 ms root@neo1:# ping -i 0 -w 3 -s 1472 -q neo2 PING neo2.kirr.nexedi.com (192.168.102.21) 1472(1500) bytes of data. --- neo2.kirr.nexedi.com ping statistics --- 5073 packets transmitted, 5073 received, 0% packet loss, time 2999ms rtt min/avg/max/mdev = 0.330/0.566/0.710/0.028 ms, ipg/ewma 0.591/0.544 ms after: root@neo1# ping -i 0 -w 3 -s 82 -q neo2 PING neo2.kirr.nexedi.com (192.168.102.21) 82(110) bytes of data. --- neo2.kirr.nexedi.com ping statistics --- 45815 packets transmitted, 45815 received, 0% packet loss, time 3000ms rtt min/avg/max/mdev = 0.036/0.051/0.368/0.010 ms, ipg/ewma 0.065/0.053 ms root@neo1:# ping -i 0 -w 3 -s 1472 -q neo2 PING neo2.kirr.nexedi.com (192.168.102.21) 1472(1500) bytes of data. --- neo2.kirr.nexedi.com ping statistics --- 21250 packets transmitted, 21250 received, 0% packet loss, time 3000ms rtt min/avg/max/mdev = 0.112/0.125/0.390/0.007 ms, ipg/ewma 0.141/0.125 ms the small -> 1.5K latency growth is understandable as it takes ~15μs to transmit 1.5K on 1Gbps on the wire and with 2 hosts and 1 switch and ICMP ECHO + ECHO reply the packet has to travel 4 ethernet segments which is already 60μs; probably something a bit else is also there as e.g. on Linux, even with `cpupower frequency-set -g performance`, on some computers I've noticed the kernel can be spending more time in software-only mode when incoming packets go in less frequently. E.g. this program can demonstrate the effect for ICMP ECHO processing: https://lab.nexedi.com/kirr/bcc/blob/43cfc13b/tools/pinglat.py (later this was found to be partly due to C-states exit latencies) ) We have this patch running in our testing setup for 1 months already without any issues observed. It remains to be clarified whether RX and TX timers use the same base. For now I've set them equally, but Francois's original patch version suggests it could be not the same. I've got no feedback at all to my original posting of this patch and questions https://www.spinics.net/lists/netdev/msg457173.html neither from Francois, nor from any people from Realtek during one month. So I suggest we simply apply it to net-next.git now. Cc: Francois Romieu Cc: Hayes Wang Cc: Realtek linux nic maintainers Cc: David Laight Cc: Stéphane ANCELOT Cc: Eric Dumazet Signed-off-by: Kirill Smelkov Tested-by: Holger Hoffstätte Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 231 +++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 7dc4b6de31e6..fd218fd9ef3c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -399,6 +399,12 @@ enum rtl_registers { RxMaxSize = 0xda, CPlusCmd = 0xe0, IntrMitigate = 0xe2, + +#define RTL_COALESCE_MASK 0x0f +#define RTL_COALESCE_SHIFT 4 +#define RTL_COALESCE_T_MAX (RTL_COALESCE_MASK) +#define RTL_COALESCE_FRAME_MAX (RTL_COALESCE_MASK << 2) + RxDescAddrLow = 0xe4, RxDescAddrHigh = 0xe8, EarlyTxThres = 0xec, /* 8169. Unit of 32 bytes. */ @@ -795,6 +801,7 @@ struct rtl8169_private { u16 cp_cmd; u16 event_slow; + const struct rtl_coalesce_info *coalesce_info; struct mdio_ops { void (*write)(struct rtl8169_private *, int, int); @@ -2363,10 +2370,229 @@ static int rtl8169_nway_reset(struct net_device *dev) return mii_nway_restart(&tp->mii); } +/* + * Interrupt coalescing + * + * > 1 - the availability of the IntrMitigate (0xe2) register through the + * > 8169, 8168 and 810x line of chipsets + * + * 8169, 8168, and 8136(810x) serial chipsets support it. + * + * > 2 - the Tx timer unit at gigabit speed + * + * The unit of the timer depends on both the speed and the setting of CPlusCmd + * (0xe0) bit 1 and bit 0. + * + * For 8169 + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 320ns 2.56us 40.96us + * 0 1 2.56us 20.48us 327.7us + * 1 0 5.12us 40.96us 655.4us + * 1 1 10.24us 81.92us 1.31ms + * + * For the other + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 5us 2.56us 40.96us + * 0 1 40us 20.48us 327.7us + * 1 0 80us 40.96us 655.4us + * 1 1 160us 81.92us 1.31ms + */ + +/* rx/tx scale factors for one particular CPlusCmd[0:1] value */ +struct rtl_coalesce_scale { + /* Rx / Tx */ + u32 nsecs[2]; +}; + +/* rx/tx scale factors for all CPlusCmd[0:1] cases */ +struct rtl_coalesce_info { + u32 speed; + struct rtl_coalesce_scale scalev[4]; /* each CPlusCmd[0:1] case */ +}; + +/* produce (r,t) pairs with each being in series of *1, *8, *8*2, *8*2*2 */ +#define rxtx_x1822(r, t) { \ + {{(r), (t)}}, \ + {{(r)*8, (t)*8}}, \ + {{(r)*8*2, (t)*8*2}}, \ + {{(r)*8*2*2, (t)*8*2*2}}, \ +} +static const struct rtl_coalesce_info rtl_coalesce_info_8169[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 320, 320) }, + { 0 }, +}; + +static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 5000, 5000) }, + { 0 }, +}; +#undef rxtx_x1822 + +/* get rx/tx scale vector corresponding to current speed */ +static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev) +{ + struct rtl8169_private *tp = netdev_priv(dev); + struct ethtool_link_ksettings ecmd; + const struct rtl_coalesce_info *ci; + int rc; + + rc = rtl8169_get_link_ksettings(dev, &ecmd); + if (rc < 0) + return ERR_PTR(rc); + + for (ci = tp->coalesce_info; ci->speed != 0; ci++) { + if (ecmd.base.speed == ci->speed) { + return ci; + } + } + + return ERR_PTR(-ELNRNG); +} + +static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_info *ci; + const struct rtl_coalesce_scale *scale; + struct { + u32 *max_frames; + u32 *usecs; + } coal_settings [] = { + { &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs }, + { &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs } + }, *p = coal_settings; + int i; + u16 w; + + memset(ec, 0, sizeof(*ec)); + + /* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */ + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return PTR_ERR(ci); + + scale = &ci->scalev[RTL_R16(CPlusCmd) & 3]; + + /* read IntrMitigate and adjust according to scale */ + for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) { + *p->max_frames = (w & RTL_COALESCE_MASK) << 2; + w >>= RTL_COALESCE_SHIFT; + *p->usecs = w & RTL_COALESCE_MASK; + } + + for (i = 0; i < 2; i++) { + p = coal_settings + i; + *p->usecs = (*p->usecs * scale->nsecs[i]) / 1000; + + /* + * ethtool_coalesce says it is illegal to set both usecs and + * max_frames to 0. + */ + if (!*p->usecs && !*p->max_frames) + *p->max_frames = 1; + } + + return 0; +} + +/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */ +static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale( + struct net_device *dev, u32 nsec, u16 *cp01) +{ + const struct rtl_coalesce_info *ci; + u16 i; + + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return ERR_CAST(ci); + + for (i = 0; i < 4; i++) { + u32 rxtx_maxscale = max(ci->scalev[i].nsecs[0], + ci->scalev[i].nsecs[1]); + if (nsec <= rxtx_maxscale * RTL_COALESCE_T_MAX) { + *cp01 = i; + return &ci->scalev[i]; + } + } + + return ERR_PTR(-EINVAL); +} + +static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_scale *scale; + struct { + u32 frames; + u32 usecs; + } coal_settings [] = { + { ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs }, + { ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs } + }, *p = coal_settings; + u16 w = 0, cp01; + int i; + + scale = rtl_coalesce_choose_scale(dev, + max(p[0].usecs, p[1].usecs) * 1000, &cp01); + if (IS_ERR(scale)) + return PTR_ERR(scale); + + for (i = 0; i < 2; i++, p++) { + u32 units; + + /* + * accept max_frames=1 we returned in rtl_get_coalesce. + * accept it not only when usecs=0 because of e.g. the following scenario: + * + * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX) + * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1 + * - then user does `ethtool -C eth0 rx-usecs 100` + * + * since ethtool sends to kernel whole ethtool_coalesce + * settings, if we do not handle rx_usecs=!0, rx_frames=1 + * we'll reject it below in `frames % 4 != 0`. + */ + if (p->frames == 1) { + p->frames = 0; + } + + units = p->usecs * 1000 / scale->nsecs[i]; + if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4) + return -EINVAL; + + w <<= RTL_COALESCE_SHIFT; + w |= units; + w <<= RTL_COALESCE_SHIFT; + w |= p->frames >> 2; + } + + rtl_lock_work(tp); + + RTL_W16(IntrMitigate, swab16(w)); + + tp->cp_cmd = (tp->cp_cmd & ~3) | cp01; + RTL_W16(CPlusCmd, tp->cp_cmd); + RTL_R16(CPlusCmd); + + rtl_unlock_work(tp); + + return 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .get_drvinfo = rtl8169_get_drvinfo, .get_regs_len = rtl8169_get_regs_len, .get_link = ethtool_op_get_link, + .get_coalesce = rtl_get_coalesce, + .set_coalesce = rtl_set_coalesce, .set_settings = rtl8169_set_settings, .get_msglevel = rtl8169_get_msglevel, .set_msglevel = rtl8169_set_msglevel, @@ -8061,6 +8287,7 @@ static const struct rtl_cfg_info { unsigned int align; u16 event_slow; unsigned features; + const struct rtl_coalesce_info *coalesce_info; u8 default_ver; } rtl_cfg_infos [] = { [RTL_CFG_0] = { @@ -8069,6 +8296,7 @@ static const struct rtl_cfg_info { .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, .features = RTL_FEATURE_GMII, + .coalesce_info = rtl_coalesce_info_8169, .default_ver = RTL_GIGA_MAC_VER_01, }, [RTL_CFG_1] = { @@ -8077,6 +8305,7 @@ static const struct rtl_cfg_info { .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_11, }, [RTL_CFG_2] = { @@ -8086,6 +8315,7 @@ static const struct rtl_cfg_info { .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, .features = RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_13, } }; @@ -8449,6 +8679,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->hw_start = cfg->hw_start; tp->event_slow = cfg->event_slow; + tp->coalesce_info = cfg->coalesce_info; tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? ~(RxBOVF | RxFOVF) : ~0; From c69fe407803d4b554b7397fad9598a04717ac255 Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Fri, 27 Oct 2017 18:08:21 +0530 Subject: [PATCH 1562/2177] cxgb3: Check and handle the dma mapping errors This patch adds checks at approprate places whether *dma_map*() call has succeeded or not. Original Work by: Santosh Rastapur Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 118 ++++++++++++++++++----- 1 file changed, 92 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e3d28ae75360..e988caa797cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -455,6 +455,11 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, q->pg_chunk.offset = 0; mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, 0, q->alloc_size, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) { + __free_pages(q->pg_chunk.page, order); + q->pg_chunk.page = NULL; + return -EIO; + } q->pg_chunk.mapping = mapping; } sd->pg_chunk = q->pg_chunk; @@ -949,40 +954,78 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb) return flits_to_desc(flits); } +/* map_skb - map a packet main body and its page fragments + * @pdev: the PCI device + * @skb: the packet + * @addr: placeholder to save the mapped addresses + * + * map the main body of an sk_buff and its page fragments, if any. + */ +static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb, + dma_addr_t *addr) +{ + const skb_frag_t *fp, *end; + const struct skb_shared_info *si; + + if (skb_headlen(skb)) { + *addr = pci_map_single(pdev, skb->data, skb_headlen(skb), + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto out_err; + addr++; + } + + si = skb_shinfo(skb); + end = &si->frags[si->nr_frags]; + + for (fp = si->frags; fp < end; fp++) { + *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp), + DMA_TO_DEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto unwind; + addr++; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp), + DMA_TO_DEVICE); + + pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE); +out_err: + return -ENOMEM; +} + /** - * make_sgl - populate a scatter/gather list for a packet + * write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @sgp: the SGL to populate * @start: start address of skb main body data to include in the SGL * @len: length of skb main body data to include in the SGL - * @pdev: the PCI device + * @addr: the list of the mapped addresses * - * Generates a scatter/gather list for the buffers that make up a packet + * Copies the scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ -static inline unsigned int make_sgl(const struct sk_buff *skb, - struct sg_ent *sgp, unsigned char *start, - unsigned int len, struct pci_dev *pdev) +static inline unsigned int write_sgl(const struct sk_buff *skb, + struct sg_ent *sgp, unsigned char *start, + unsigned int len, const dma_addr_t *addr) { - dma_addr_t mapping; - unsigned int i, j = 0, nfrags; + unsigned int i, j = 0, k = 0, nfrags; if (len) { - mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE); sgp->len[0] = cpu_to_be32(len); - sgp->addr[0] = cpu_to_be64(mapping); - j = 1; + sgp->addr[j++] = cpu_to_be64(addr[k++]); } nfrags = skb_shinfo(skb)->nr_frags; for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE); sgp->len[j] = cpu_to_be32(skb_frag_size(frag)); - sgp->addr[j] = cpu_to_be64(mapping); + sgp->addr[j] = cpu_to_be64(addr[k++]); j ^= 1; if (j == 0) ++sgp; @@ -1138,7 +1181,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, const struct port_info *pi, unsigned int pidx, unsigned int gen, struct sge_txq *q, unsigned int ndesc, - unsigned int compl) + unsigned int compl, const dma_addr_t *addr) { unsigned int flits, sgl_flits, cntrl, tso_info; struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1]; @@ -1196,7 +1239,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, } sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr); write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen, htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl), @@ -1227,6 +1270,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; struct sge_qset *qs; struct sge_txq *q; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* * The chip min packet length is 9 octets but play safe and reject @@ -1255,6 +1299,14 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } + /* Check if ethernet packet can't be sent as immediate data */ + if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) { + if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + } + q->in_use += ndesc; if (unlikely(credits - ndesc < q->stop_thres)) { t3_stop_tx_queue(txq, qs, q); @@ -1312,7 +1364,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(!skb_shared(skb))) skb_orphan(skb); - write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl); + write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr); check_ring_tx_db(adap, q); return NETDEV_TX_OK; } @@ -1577,7 +1629,8 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev, */ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, struct sge_txq *q, unsigned int pidx, - unsigned int gen, unsigned int ndesc) + unsigned int gen, unsigned int ndesc, + const dma_addr_t *addr) { unsigned int sgl_flits, flits; struct work_request_hdr *from; @@ -1598,10 +1651,9 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb_tail_pointer(skb) - - skb_transport_header(skb), - adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb), + skb_tail_pointer(skb) - skb_transport_header(skb), + addr); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); skb->destructor = deferred_unmap_destructor; @@ -1659,6 +1711,12 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); goto again; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) { + spin_unlock(&q->lock); + return NET_XMIT_SUCCESS; + } + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; @@ -1669,7 +1727,7 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); } spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head); check_ring_tx_db(adap, q); return NET_XMIT_SUCCESS; } @@ -1687,6 +1745,7 @@ static void restart_offloadq(unsigned long data) struct sge_txq *q = &qs->txq[TXQ_OFLD]; const struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; + unsigned int written = 0; spin_lock(&q->lock); again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); @@ -1706,10 +1765,15 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); break; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) + break; + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; + written += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; @@ -1717,7 +1781,8 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); __skb_unlink(skb, &q->sendq); spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, + (dma_addr_t *)skb->head); spin_lock(&q->lock); } spin_unlock(&q->lock); @@ -1727,8 +1792,9 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif wmb(); - t3_write_reg(adap, A_SG_KDOORBELL, - F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); + if (likely(written)) + t3_write_reg(adap, A_SG_KDOORBELL, + F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** From 949cf8b1dd39f6d306a681503b6031299fd7ced8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 29 Oct 2017 11:14:08 +0900 Subject: [PATCH 1563/2177] tcp: Remove "linux/unaligned/access_ok.h" include. This causes build failures: In file included from net/ipv4/tcp_input.c:79:0: ./include/linux/unaligned/access_ok.h:7:28: error: redefinition of 'get_unaligned_le16' In file included from ./include/asm-generic/unaligned.h:17:0, from ./arch/arm/include/generated/asm/unaligned.h:1, from net/ipv4/tcp_input.c:76: ./include/linux/unaligned/le_struct.h:6:19: note: previous definition of 'get_unaligned_le16' was here In file included from net/ipv4/tcp_input.c:79:0: ./include/linux/unaligned/access_ok.h:12:28: error: redefinition of 'get_unaligned_le32' Plain "asm/access_ok.h", which is already included, is sufficient. Fixes: 60e2a7780793 ("tcp: TCP experimental option for SMC") Reported-by: Egil Hjelmeland Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 21c358c0cf2e..b62a7d1707ae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,7 +76,6 @@ #include #include #include -#include #include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; From 3953ae7b218df4d1e544b98a393666f9ae58a78c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 27 Oct 2017 16:51:50 +0200 Subject: [PATCH 1564/2177] l2tp: don't register sessions in l2tp_session_create() Sessions created by l2tp_session_create() aren't fully initialised: some pseudo-wire specific operations need to be done before making the session usable. Therefore the PPP and Ethernet pseudo-wires continue working on the returned l2tp session while it's already been exposed to the rest of the system. This can lead to various issues. In particular, the session may enter the deletion process before having been fully initialised, which will confuse the session removal code. This patch moves session registration out of l2tp_session_create(), so that callers can control when the session is exposed to the rest of the system. This is done by the new l2tp_session_register() function. Only pppol2tp_session_create() can be easily converted to avoid modifying its session after registration (the debug message is dropped in order to avoid the need for holding a reference on the session). For pppol2tp_connect() and l2tp_eth_create()), more work is needed. That'll be done in followup patches. For now, let's just register the session right after its creation, like it was done before. The only difference is that we can easily take a reference on the session before registering it, so, at least, we're sure it's not going to be freed while we're working on it. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 21 +++++++-------------- net/l2tp/l2tp_core.h | 3 +++ net/l2tp/l2tp_eth.c | 9 +++++++++ net/l2tp/l2tp_ppp.c | 23 +++++++++++++++++------ 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index af22aa8ae35b..a1d56e143fcd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -322,8 +322,8 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); -static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, - struct l2tp_session *session) +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel) { struct l2tp_session *session_walk; struct hlist_head *g_head; @@ -371,6 +371,10 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, hlist_add_head(&session->hlist, head); write_unlock_bh(&tunnel->hlist_lock); + /* Ignore management session in session count value */ + if (session->session_id != 0) + atomic_inc(&l2tp_session_count); + return 0; err_tlock_pnlock: @@ -380,6 +384,7 @@ err_tlock: return err; } +EXPORT_SYMBOL_GPL(l2tp_session_register); /* Lookup a tunnel by id */ @@ -1788,7 +1793,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; - int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1846,17 +1850,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn refcount_set(&session->ref_count, 1); - err = l2tp_session_add_to_tunnel(tunnel, session); - if (err) { - kfree(session); - - return ERR_PTR(err); - } - - /* Ignore management session in session count value */ - if (session->session_id != 0) - atomic_inc(&l2tp_session_count); - return session; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 67c79d9b5c6c..77caa5966736 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,9 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel); + void __l2tp_session_unhash(struct l2tp_session *session); int l2tp_session_delete(struct l2tp_session *session); void l2tp_session_free(struct l2tp_session *session); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 014a7bc2a872..a7d76f5f31ff 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -271,6 +271,13 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, goto out; } + l2tp_session_inc_refcount(session); + rc = l2tp_session_register(session, tunnel); + if (rc < 0) { + kfree(session); + goto out; + } + dev = alloc_netdev(sizeof(*priv), name, name_assign_type, l2tp_eth_dev_setup); if (!dev) { @@ -304,6 +311,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); + l2tp_session_dec_refcount(session); dev_hold(dev); @@ -314,6 +322,7 @@ out_del_dev: spriv->dev = NULL; out_del_session: l2tp_session_delete(session); + l2tp_session_dec_refcount(session); out: return rc; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f50452b919d5..40cf7a78e331 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -715,6 +715,14 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = PTR_ERR(session); goto end; } + + l2tp_session_inc_refcount(session); + error = l2tp_session_register(session, tunnel); + if (error < 0) { + kfree(session); + goto end; + } + drop_refcnt = true; } /* Associate session with its PPPoL2TP socket */ @@ -800,7 +808,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, /* Error if tunnel socket is not prepped */ if (!tunnel->sock) { error = -ENOENT; - goto out; + goto err; } /* Default MTU values. */ @@ -815,18 +823,21 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, peer_session_id, cfg); if (IS_ERR(session)) { error = PTR_ERR(session); - goto out; + goto err; } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; - l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", - session->name); + error = l2tp_session_register(session, tunnel); + if (error < 0) + goto err_sess; - error = 0; + return 0; -out: +err_sess: + kfree(session); +err: return error; } From ee28de6bbd78c2e18111a0aef43ea746f28d2073 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 27 Oct 2017 16:51:51 +0200 Subject: [PATCH 1565/2177] l2tp: initialise l2tp_eth sessions before registering them Sessions must be initialised before being made externally visible by l2tp_session_register(). Otherwise the session may be concurrently deleted before being initialised, which can confuse the deletion path and eventually lead to kernel oops. Therefore, we need to move l2tp_session_register() down in l2tp_eth_create(), but also handle the intermediate step where only the session or the netdevice has been registered. We can't just call l2tp_session_register() in ->ndo_init() because we'd have no way to properly undo this operation in ->ndo_uninit(). Instead, let's register the session and the netdevice in two different steps and protect the session's device pointer with RCU. And now that we allow the session's .dev field to be NULL, we don't need to prevent the netdevice from being removed anymore. So we can drop the dev_hold() and dev_put() calls in l2tp_eth_create() and l2tp_eth_dev_uninit(). Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 106 +++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index a7d76f5f31ff..d29bfee291cb 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -54,7 +54,7 @@ struct l2tp_eth { /* via l2tp_session_priv() */ struct l2tp_eth_sess { - struct net_device *dev; + struct net_device __rcu *dev; }; @@ -72,7 +72,14 @@ static int l2tp_eth_dev_init(struct net_device *dev) static void l2tp_eth_dev_uninit(struct net_device *dev) { - dev_put(dev); + struct l2tp_eth *priv = netdev_priv(dev); + struct l2tp_eth_sess *spriv; + + spriv = l2tp_session_priv(priv->session); + RCU_INIT_POINTER(spriv->dev, NULL); + /* No need for synchronize_net() here. We're called by + * unregister_netdev*(), which does the synchronisation for us. + */ } static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) @@ -130,8 +137,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev) static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) { struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; - struct l2tp_eth *priv = netdev_priv(dev); + struct net_device *dev; + struct l2tp_eth *priv; if (session->debug & L2TP_MSG_DATA) { unsigned int length; @@ -155,16 +162,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb_dst_drop(skb); nf_reset(skb); + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) + goto error_rcu; + + priv = netdev_priv(dev); if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { atomic_long_inc(&priv->rx_packets); atomic_long_add(data_len, &priv->rx_bytes); } else { atomic_long_inc(&priv->rx_errors); } + rcu_read_unlock(); + return; +error_rcu: + rcu_read_unlock(); error: - atomic_long_inc(&priv->rx_errors); kfree_skb(skb); } @@ -175,11 +191,15 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (session) { spriv = l2tp_session_priv(session); - dev = spriv->dev; + + rtnl_lock(); + dev = rtnl_dereference(spriv->dev); if (dev) { - unregister_netdev(dev); - spriv->dev = NULL; + unregister_netdevice(dev); + rtnl_unlock(); module_put(THIS_MODULE); + } else { + rtnl_unlock(); } } } @@ -189,9 +209,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) { + rcu_read_unlock(); + return; + } + dev_hold(dev); + rcu_read_unlock(); seq_printf(m, " interface %s\n", dev->name); + + dev_put(dev); } #endif @@ -268,21 +299,14 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, peer_session_id, cfg); if (IS_ERR(session)) { rc = PTR_ERR(session); - goto out; - } - - l2tp_session_inc_refcount(session); - rc = l2tp_session_register(session, tunnel); - if (rc < 0) { - kfree(session); - goto out; + goto err; } dev = alloc_netdev(sizeof(*priv), name, name_assign_type, l2tp_eth_dev_setup); if (!dev) { rc = -ENOMEM; - goto out_del_session; + goto err_sess; } dev_net_set(dev, net); @@ -302,28 +326,48 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, #endif spriv = l2tp_session_priv(session); - spriv->dev = dev; - rc = register_netdev(dev); - if (rc < 0) - goto out_del_dev; + l2tp_session_inc_refcount(session); + + rtnl_lock(); + + /* Register both device and session while holding the rtnl lock. This + * ensures that l2tp_eth_delete() will see that there's a device to + * unregister, even if it happened to run before we assign spriv->dev. + */ + rc = l2tp_session_register(session, tunnel); + if (rc < 0) { + rtnl_unlock(); + goto err_sess_dev; + } + + rc = register_netdevice(dev); + if (rc < 0) { + rtnl_unlock(); + l2tp_session_delete(session); + l2tp_session_dec_refcount(session); + free_netdev(dev); + + return rc; + } - __module_get(THIS_MODULE); - /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); + rcu_assign_pointer(spriv->dev, dev); + + rtnl_unlock(); + l2tp_session_dec_refcount(session); - dev_hold(dev); + __module_get(THIS_MODULE); return 0; -out_del_dev: - free_netdev(dev); - spriv->dev = NULL; -out_del_session: - l2tp_session_delete(session); +err_sess_dev: l2tp_session_dec_refcount(session); -out: + free_netdev(dev); +err_sess: + kfree(session); +err: return rc; } From ee40fb2e1eb5bc0ddd3f2f83c6e39a454ef5a741 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 27 Oct 2017 16:51:52 +0200 Subject: [PATCH 1566/2177] l2tp: protect sock pointer of struct pppol2tp_session with RCU pppol2tp_session_create() registers sessions that can't have their corresponding socket initialised. This socket has to be created by userspace, then connected to the session by pppol2tp_connect(). Therefore, we need to protect the pppol2tp socket pointer of L2TP sessions, so that it can safely be updated when userspace is connecting or closing the socket. This will eventually allow pppol2tp_connect() to avoid generating transient states while initialising its parts of the session. To this end, this patch protects the pppol2tp socket pointer using RCU. The pppol2tp socket pointer is still set in pppol2tp_connect(), but only once we know the function isn't going to fail. It's eventually reset by pppol2tp_release(), which now has to wait for a grace period to elapse before it can drop the last reference on the socket. This ensures that pppol2tp_session_get_sock() can safely grab a reference on the socket, even after ps->sk is reset to NULL but before this operation actually gets visible from pppol2tp_session_get_sock(). The rest is standard RCU conversion: pppol2tp_recv(), which already runs in atomic context, is simply enclosed by rcu_read_lock() and rcu_read_unlock(), while other functions are converted to use pppol2tp_session_get_sock() followed by sock_put(). pppol2tp_session_setsockopt() is a special case. It used to retrieve the pppol2tp socket from the L2TP session, which itself was retrieved from the pppol2tp socket. Therefore we can just avoid dereferencing ps->sk and directly use the original socket pointer instead. With all users of ps->sk now handling NULL and concurrent updates, the L2TP ->ref() and ->deref() callbacks aren't needed anymore. Therefore, rather than converting pppol2tp_session_sock_hold() and pppol2tp_session_sock_put(), we can just drop them. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 154 +++++++++++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 53 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 40cf7a78e331..d40d5492c148 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -122,8 +122,11 @@ struct pppol2tp_session { int owner; /* pid that opened the socket */ - struct sock *sock; /* Pointer to the session + struct mutex sk_lock; /* Protects .sk */ + struct sock __rcu *sk; /* Pointer to the session * PPPoX socket */ + struct sock *__sk; /* Copy of .sk, for cleanup */ + struct rcu_head rcu; /* For asynchronous release */ struct sock *tunnel_sock; /* Pointer to the tunnel UDP * socket */ int flags; /* accessed by PPPIOCGFLAGS. @@ -138,6 +141,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = { static const struct proto_ops pppol2tp_ops; +/* Retrieves the pppol2tp socket associated to a session. + * A reference is held on the returned socket, so this function must be paired + * with sock_put(). + */ +static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session) +{ + struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; + + rcu_read_lock(); + sk = rcu_dereference(ps->sk); + if (sk) + sock_hold(sk); + rcu_read_unlock(); + + return sk; +} + /* Helpers to obtain tunnel/session contexts from sockets. */ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) @@ -224,7 +245,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int /* If the socket is bound, send it in to PPP's input queue. Otherwise * queue it on the session socket. */ - sk = ps->sock; + rcu_read_lock(); + sk = rcu_dereference(ps->sk); if (sk == NULL) goto no_sock; @@ -247,30 +269,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int kfree_skb(skb); } } + rcu_read_unlock(); return; no_sock: + rcu_read_unlock(); l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name); kfree_skb(skb); } -static void pppol2tp_session_sock_hold(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_hold(ps->sock); -} - -static void pppol2tp_session_sock_put(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_put(ps->sock); -} - /************************************************************************ * Transmit handling ***********************************************************************/ @@ -431,14 +439,16 @@ abort: */ static void pppol2tp_session_close(struct l2tp_session *session) { - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk = ps->sock; - struct socket *sock = sk->sk_socket; + struct sock *sk; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (sock) - inet_shutdown(sock, SEND_SHUTDOWN); + sk = pppol2tp_session_get_sock(session); + if (sk) { + if (sk->sk_socket) + inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); + sock_put(sk); + } /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); @@ -461,6 +471,14 @@ static void pppol2tp_session_destruct(struct sock *sk) } } +static void pppol2tp_put_sk(struct rcu_head *head) +{ + struct pppol2tp_session *ps; + + ps = container_of(head, typeof(*ps), rcu); + sock_put(ps->__sk); +} + /* Called when the PPPoX socket (session) is closed. */ static int pppol2tp_release(struct socket *sock) @@ -486,11 +504,24 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); - /* Purge any queued data */ if (session != NULL) { + struct pppol2tp_session *ps; + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - sock_put(sk); + + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + mutex_unlock(&ps->sk_lock); + call_rcu(&ps->rcu, pppol2tp_put_sk); + + /* Rely on the sock_put() call at the end of the function for + * dropping the reference held by pppol2tp_sock_to_session(). + * The last reference will be dropped by pppol2tp_put_sk(). + */ } release_sock(sk); @@ -557,12 +588,14 @@ out: static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; - struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; - if (ps) { - struct pppox_sock *po = pppox_sk(ps->sock); - if (po) - seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sk = pppol2tp_session_get_sock(session); + if (sk) { + struct pppox_sock *po = pppox_sk(sk); + + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); } } #endif @@ -693,13 +726,17 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* Using a pre-existing session is fine as long as it hasn't * been connected yet. */ - if (ps->sock) { + mutex_lock(&ps->sk_lock); + if (rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock))) { + mutex_unlock(&ps->sk_lock); error = -EEXIST; goto end; } /* consistency checks */ if (ps->tunnel_sock != tunnel->sock) { + mutex_unlock(&ps->sk_lock); error = -EEXIST; goto end; } @@ -716,19 +753,21 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, goto end; } + ps = l2tp_session_priv(session); + mutex_init(&ps->sk_lock); l2tp_session_inc_refcount(session); + + mutex_lock(&ps->sk_lock); error = l2tp_session_register(session, tunnel); if (error < 0) { + mutex_unlock(&ps->sk_lock); kfree(session); goto end; } drop_refcnt = true; } - /* Associate session with its PPPoL2TP socket */ - ps = l2tp_session_priv(session); ps->owner = current->pid; - ps->sock = sk; ps->tunnel_sock = tunnel->sock; session->recv_skb = pppol2tp_recv; @@ -737,12 +776,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->show = pppol2tp_show; #endif - /* We need to know each time a skb is dropped from the reorder - * queue. - */ - session->ref = pppol2tp_session_sock_hold; - session->deref = pppol2tp_session_sock_put; - /* If PMTU discovery was enabled, use the MTU that was discovered */ dst = sk_dst_get(tunnel->sock); if (dst != NULL) { @@ -776,12 +809,17 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.mtu = session->mtu; error = ppp_register_net_channel(sock_net(sk), &po->chan); - if (error) + if (error) { + mutex_unlock(&ps->sk_lock); goto end; + } out_no_ppp: /* This is how we get the session context from the socket. */ sk->sk_user_data = session; + rcu_assign_pointer(ps->sk, sk); + mutex_unlock(&ps->sk_lock); + sk->sk_state = PPPOX_CONNECTED; l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", session->name); @@ -827,6 +865,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, } ps = l2tp_session_priv(session); + mutex_init(&ps->sk_lock); ps->tunnel_sock = tunnel->sock; error = l2tp_session_register(session, tunnel); @@ -998,12 +1037,10 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session, "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", session->name, cmd, arg); - sk = ps->sock; + sk = pppol2tp_session_get_sock(session); if (!sk) return -EBADR; - sock_hold(sk); - switch (cmd) { case SIOCGIFMTU: err = -ENXIO; @@ -1279,7 +1316,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk, int optname, int val) { int err = 0; - struct pppol2tp_session *ps = l2tp_session_priv(session); switch (optname) { case PPPOL2TP_SO_RECVSEQ: @@ -1300,8 +1336,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk, } session->send_seq = !!val; { - struct sock *ssk = ps->sock; - struct pppox_sock *po = pppox_sk(ssk); + struct pppox_sock *po = pppox_sk(sk); + po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } @@ -1640,8 +1676,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) { struct l2tp_session *session = v; struct l2tp_tunnel *tunnel = session->tunnel; - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct pppox_sock *po = pppox_sk(ps->sock); + unsigned char state; + char user_data_ok; + struct sock *sk; u32 ip = 0; u16 port = 0; @@ -1651,6 +1688,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) port = ntohs(inet->inet_sport); } + sk = pppol2tp_session_get_sock(session); + if (sk) { + state = sk->sk_state; + user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N'; + } else { + state = 0; + user_data_ok = 'N'; + } + seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " "%04X/%04X %d %c\n", session->name, ip, port, @@ -1658,9 +1704,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->session_id, tunnel->peer_tunnel_id, session->peer_session_id, - ps->sock->sk_state, - (session == ps->sock->sk_user_data) ? - 'Y' : 'N'); + state, user_data_ok); seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n", session->mtu, session->mru, session->recv_seq ? 'R' : '-', @@ -1677,8 +1721,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) atomic_long_read(&session->stats.rx_bytes), atomic_long_read(&session->stats.rx_errors)); - if (po) + if (sk) { + struct pppox_sock *po = pppox_sk(sk); + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); + } } static int pppol2tp_seq_show(struct seq_file *m, void *v) From f98be6c6359e7e4a61aaefb9964c1db31cb9ec0c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 27 Oct 2017 16:51:52 +0200 Subject: [PATCH 1567/2177] l2tp: initialise PPP sessions before registering them pppol2tp_connect() initialises L2TP sessions after they've been exposed to the rest of the system by l2tp_session_register(). This puts sessions into transient states that are the source of several races, in particular with session's deletion path. This patch centralises the initialisation code into pppol2tp_session_init(), which is called before the registration phase. The only field that can't be set before session registration is the pppol2tp socket pointer, which has already been converted to RCU. So pppol2tp_connect() should now be race-free. The session's .session_close() callback is now set before registration. Therefore, it's always called when l2tp_core deletes the session, even if it was created by pppol2tp_session_create() and hasn't been plugged to a pppol2tp socket yet. That'd prevent session free because the extra reference taken by pppol2tp_session_close() wouldn't be dropped by the socket's ->sk_destruct() callback (pppol2tp_session_destruct()). We could set .session_close() only while connecting a session to its pppol2tp socket, or teach pppol2tp_session_close() to avoid grabbing a reference when the session isn't connected, but that'd require adding some form of synchronisation to be race free. Instead of that, we can just let the pppol2tp socket hold a reference on the session as soon as it starts depending on it (that is, in pppol2tp_connect()). Then we don't need to utilise pppol2tp_session_close() to hold a reference at the last moment to prevent l2tp_core from dropping it. When releasing the socket, pppol2tp_release() now deletes the session using the standard l2tp_session_delete() function, instead of merely removing it from hash tables. l2tp_session_delete() drops the reference the sessions holds on itself, but also makes sure it doesn't remove a session twice. So it can safely be called, even if l2tp_core already tried, or is concurrently trying, to remove the session. Finally, pppol2tp_session_destruct() drops the reference held by the socket. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 69 +++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d40d5492c148..845aba543dce 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -449,9 +449,6 @@ static void pppol2tp_session_close(struct l2tp_session *session) inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); sock_put(sk); } - - /* Don't let the session go away before our socket does */ - l2tp_session_inc_refcount(session); } /* Really kill the session socket. (Called from sock_put() if @@ -507,8 +504,7 @@ static int pppol2tp_release(struct socket *sock) if (session != NULL) { struct pppol2tp_session *ps; - __l2tp_session_unhash(session); - l2tp_session_queue_purge(session); + l2tp_session_delete(session); ps = l2tp_session_priv(session); mutex_lock(&ps->sk_lock); @@ -600,6 +596,35 @@ static void pppol2tp_show(struct seq_file *m, void *arg) } #endif +static void pppol2tp_session_init(struct l2tp_session *session) +{ + struct pppol2tp_session *ps; + struct dst_entry *dst; + + session->recv_skb = pppol2tp_recv; + session->session_close = pppol2tp_session_close; +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) + session->show = pppol2tp_show; +#endif + + ps = l2tp_session_priv(session); + mutex_init(&ps->sk_lock); + ps->tunnel_sock = session->tunnel->sock; + ps->owner = current->pid; + + /* If PMTU discovery was enabled, use the MTU that was discovered */ + dst = sk_dst_get(session->tunnel->sock); + if (dst) { + u32 pmtu = dst_mtu(dst); + + if (pmtu) { + session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD; + session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; + } + dst_release(dst); + } +} + /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket */ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, @@ -611,7 +636,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, struct l2tp_session *session = NULL; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; - struct dst_entry *dst; struct l2tp_session_cfg cfg = { 0, }; int error = 0; u32 tunnel_id, peer_tunnel_id; @@ -753,8 +777,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, goto end; } + pppol2tp_session_init(session); ps = l2tp_session_priv(session); - mutex_init(&ps->sk_lock); l2tp_session_inc_refcount(session); mutex_lock(&ps->sk_lock); @@ -767,26 +791,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, drop_refcnt = true; } - ps->owner = current->pid; - ps->tunnel_sock = tunnel->sock; - - session->recv_skb = pppol2tp_recv; - session->session_close = pppol2tp_session_close; -#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) - session->show = pppol2tp_show; -#endif - - /* If PMTU discovery was enabled, use the MTU that was discovered */ - dst = sk_dst_get(tunnel->sock); - if (dst != NULL) { - u32 pmtu = dst_mtu(dst); - - if (pmtu != 0) - session->mtu = session->mru = pmtu - - PPPOL2TP_HEADER_OVERHEAD; - dst_release(dst); - } - /* Special case: if source & dest session_id == 0x0000, this * socket is being created to manage the tunnel. Just set up * the internal context for use by ioctl() and sockopt() @@ -820,6 +824,12 @@ out_no_ppp: rcu_assign_pointer(ps->sk, sk); mutex_unlock(&ps->sk_lock); + /* Keep the reference we've grabbed on the session: sk doesn't expect + * the session to disappear. pppol2tp_session_destruct() is responsible + * for dropping it. + */ + drop_refcnt = false; + sk->sk_state = PPPOX_CONNECTED; l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", session->name); @@ -841,7 +851,6 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, { int error; struct l2tp_session *session; - struct pppol2tp_session *ps; /* Error if tunnel socket is not prepped */ if (!tunnel->sock) { @@ -864,9 +873,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, goto err; } - ps = l2tp_session_priv(session); - mutex_init(&ps->sk_lock); - ps->tunnel_sock = tunnel->sock; + pppol2tp_session_init(session); error = l2tp_session_register(session, tunnel); if (error < 0) From 90e229ef61fad240554f5899eb122fbe44990f78 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 27 Oct 2017 20:08:23 +0200 Subject: [PATCH 1568/2177] ppp: allow usage in namespaces Check for CAP_NET_ADMIN with ns_capable() instead of capable() to allow usage of ppp in user namespace other than the init one. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 6566107cef84..af7f93ed1487 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -390,7 +390,7 @@ static int ppp_open(struct inode *inode, struct file *file) /* * This could (should?) be enforced by the permissions on /dev/ppp. */ - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(file->f_cred->user_ns, CAP_NET_ADMIN)) return -EPERM; return 0; } From a6fb6aa3cfa9047b62653dbcfc9bcde6e2272b41 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 27 Oct 2017 12:36:38 -0700 Subject: [PATCH 1569/2177] hv_netvsc: Set tx_table to equal weight after subchannels open In some cases, like internal vSwitch, the host doesn't provide send indirection table updates. This patch sets the table to be equal weight after subchannels are all open. Otherwise, all workload will be on one TX channel. As tested, this patch has largely increased the throughput over internal vSwitch. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index addf9f69c58c..0648eebda829 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1114,6 +1114,9 @@ void rndis_set_subchannel(struct work_struct *w) netif_set_real_num_tx_queues(ndev, nvdev->num_chn); netif_set_real_num_rx_queues(ndev, nvdev->num_chn); + for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) + ndev_ctx->tx_table[i] = i % nvdev->num_chn; + rtnl_unlock(); return; From 641da8ed3d8c54a5357d85a0f18d1d537c4205b9 Mon Sep 17 00:00:00 2001 From: Felix Manlunas Date: Fri, 27 Oct 2017 14:37:03 -0700 Subject: [PATCH 1570/2177] liquidio: get rid of false alarm "Unknown cmd 27" in dmesg Creating a macvtap interface with the liquidio VF driver as lower device causes this alarming message to show up in dmesg: liquidio_link_ctrl_cmd_completion Unknown cmd 27 That's actually a false alarm because cmd 27 is the value of the macro OCTNET_CMD_SET_UC_LIST which is known. It's a control command sent from host to NIC firmware to set the unicast MAC address list of the macvtap lower device. Make the false alarm go away by adding a case for OCTNET_CMD_SET_UC_LIST in liquidio_link_ctrl_cmd_completion(). Signed-off-by: Felix Manlunas Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index b891d858e416..89b7820d59ce 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -143,6 +143,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) switch (nctrl->ncmd.s.cmd) { case OCTNET_CMD_CHANGE_DEVFLAGS: case OCTNET_CMD_SET_MULTI_LIST: + case OCTNET_CMD_SET_UC_LIST: break; case OCTNET_CMD_CHANGE_MACADDR: From 5c1a6eaf0d4eb51e96f193a603af59b55660cb08 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 27 Oct 2017 15:56:01 -0700 Subject: [PATCH 1571/2177] net: dsa: b53: Export b53_configure_vlan() bcm_sf2 and b53 replicate the same operations: clear all VLANs and set their ports to the default VLAN tag (1 for these devices) so export the b53 function doing just that. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 6 +++-- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 41 +------------------------------- 3 files changed, 6 insertions(+), 42 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c74a50112551..fa37f501f10b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -613,8 +613,9 @@ static void b53_enable_mib(struct b53_device *dev) b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); } -static int b53_configure_vlan(struct b53_device *dev) +int b53_configure_vlan(struct dsa_switch *ds) { + struct b53_device *dev = ds->priv; struct b53_vlan vl = { 0 }; int i; @@ -637,6 +638,7 @@ static int b53_configure_vlan(struct b53_device *dev) return 0; } +EXPORT_SYMBOL(b53_configure_vlan); static void b53_switch_reset_gpio(struct b53_device *dev) { @@ -751,7 +753,7 @@ static int b53_apply_config(struct b53_device *priv) /* disable switching */ b53_set_forwarding(priv, 0); - b53_configure_vlan(priv); + b53_configure_vlan(priv->ds); /* enable switching */ b53_set_forwarding(priv, 1); diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 603c66d240d8..daaaa1ecb996 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -285,6 +285,7 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev) /* Exported functions towards other drivers */ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port); +int b53_configure_vlan(struct dsa_switch *ds); void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data); void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 7f47400e557e..2d6867f4008c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -738,45 +738,6 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, return p->ethtool_ops->set_wol(p, wol); } -static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 10; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL); - if (!(reg & ARLA_VTBL_STDN)) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op) -{ - core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL); - - return bcm_sf2_vlan_op_wait(priv); -} - -static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int port; - - /* Clear all VLANs */ - bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_CLEAR); - - for (port = 0; port < priv->hw_params.num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - - core_writel(priv, 1, CORE_DEFAULT_1Q_TAG_P(port)); - } -} - static int bcm_sf2_sw_setup(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -793,7 +754,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) bcm_sf2_port_disable(ds, port, NULL); } - bcm_sf2_sw_configure_vlan(ds); + b53_configure_vlan(ds); bcm_sf2_enable_acb(ds); return 0; From 6dfca831c03ef654b1f7bff1b8d487d330e9f76b Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 27 Oct 2017 16:12:30 -0700 Subject: [PATCH 1572/2177] samples/bpf: adjust rlimit RLIMIT_MEMLOCK for xdp1 Default rlimit RLIMIT_MEMLOCK is 64KB, causes bpf map failure. e.g. [root@lab bpf]#./xdp1 -N $( Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/xdp1_user.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 2431c0321b71..fdaefe91801d 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "bpf_load.h" #include "bpf_util.h" @@ -69,6 +70,7 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; const char *optstr = "SN"; char filename[256]; int opt; @@ -91,6 +93,12 @@ int main(int argc, char **argv) usage(basename(argv[0])); return 1; } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + ifindex = strtoul(argv[optind], NULL, 0); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); From 21d72af7dcf0c9f78f4fbdb93315568731014e66 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 27 Oct 2017 17:28:22 -0700 Subject: [PATCH 1573/2177] samples/bpf: adjust rlimit RLIMIT_MEMLOCK for xdp_redirect_map Default rlimit RLIMIT_MEMLOCK is 64KB, causes bpf map failure. e.g. [root@labbpf]# ./xdp_redirect_map $( $( Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/xdp_redirect_map_user.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index d4d86a273fba..978a532f0748 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "bpf_load.h" #include "bpf_util.h" @@ -74,6 +75,7 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; const char *optstr = "SN"; char filename[256]; int ret, opt, key = 0; @@ -97,6 +99,11 @@ int main(int argc, char **argv) return 1; } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + ifindex_in = strtoul(argv[optind], NULL, 0); ifindex_out = strtoul(argv[optind + 1], NULL, 0); printf("input: %d output: %d\n", ifindex_in, ifindex_out); From 2ea2352ede9d97585164a7e19224955f4e4ca8db Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 27 Oct 2017 17:30:12 -0700 Subject: [PATCH 1574/2177] ipv6: prevent user from adding cached routes Cached routes should only be created by the system when receiving pmtu discovery or ip redirect msg. Users should not be allowed to create cached routes. Furthermore, after the patch series to move cached routes into exception table, user added cached routes will trigger the following warning in fib6_add(): WARNING: CPU: 0 PID: 2985 at net/ipv6/ip6_fib.c:1137 fib6_add+0x20d9/0x2c10 net/ipv6/ip6_fib.c:1137 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 2985 Comm: syzkaller320388 Not tainted 4.14.0-rc3+ #74 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:181 __warn+0x1c4/0x1d9 kernel/panic.c:542 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178 do_trap_no_signal arch/x86/kernel/traps.c:212 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:261 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:fib6_add+0x20d9/0x2c10 net/ipv6/ip6_fib.c:1137 RSP: 0018:ffff8801cf09f6a0 EFLAGS: 00010297 RAX: ffff8801ce45e340 RBX: 1ffff10039e13eec RCX: ffff8801d749c814 RDX: 0000000000000000 RSI: ffff8801d749c700 RDI: ffff8801d749c780 RBP: ffff8801cf09fa08 R08: 0000000000000000 R09: ffff8801cf09f360 R10: ffff8801cf09f2d8 R11: 1ffff10039c8befb R12: 0000000000000001 R13: dffffc0000000000 R14: ffff8801d749c700 R15: ffffffff860655c0 __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1011 ip6_route_add+0x148/0x1a0 net/ipv6/route.c:2782 ipv6_route_ioctl+0x4d5/0x690 net/ipv6/route.c:3291 inet6_ioctl+0xef/0x1e0 net/ipv6/af_inet6.c:521 sock_do_ioctl+0x65/0xb0 net/socket.c:961 sock_ioctl+0x2c2/0x440 net/socket.c:1058 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1530 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe So we fix this by failing the attemp to add cached routes from userspace with returning EINVAL error. Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/uapi/linux/ipv6_route.h | 2 +- net/ipv6/route.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index d496c02e14bc..c15d8054905c 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -28,7 +28,7 @@ #define RTF_ROUTEINFO 0x00800000 /* route information - RA */ -#define RTF_CACHE 0x01000000 /* cache entry */ +#define RTF_CACHE 0x01000000 /* read-only: can not be set by user */ #define RTF_FLOW 0x02000000 /* flow significant route */ #define RTF_POLICY 0x04000000 /* policy route */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 605e5dc1c010..70d9659fc1e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2478,6 +2478,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; From 2660d226d9901c2c82c81f0b3dc5e6737eed2dfe Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 28 Oct 2017 05:03:38 +0000 Subject: [PATCH 1575/2177] net: aquantia: Make local functions static Fixes the following sparse warnings: drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c:224:5: warning: symbol 'aq_ethtool_get_coalesce' was not declared. Should it be static? drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c:245:5: warning: symbol 'aq_ethtool_set_coalesce' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index d5e99b468870..70efb7467bf3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -221,8 +221,8 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev, return err; } -int aq_ethtool_get_coalesce(struct net_device *ndev, - struct ethtool_coalesce *coal) +static int aq_ethtool_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) { struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); @@ -242,8 +242,8 @@ int aq_ethtool_get_coalesce(struct net_device *ndev, return 0; } -int aq_ethtool_set_coalesce(struct net_device *ndev, - struct ethtool_coalesce *coal) +static int aq_ethtool_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) { struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); From 995231c820e3bd3633cb38bf4ea6f2541e1da331 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 26 Oct 2017 14:16:05 -0700 Subject: [PATCH 1576/2177] tools: bpftool: add bash completion for bpftool Add a completion file for bash. The completion function runs bpftool when needed, making it smart enough to help users complete ids or tags for eBPF programs and maps currently on the system. Update Makefile to install completion file to /usr/share/bash-completion/completions when running `make install`. Emacs file mode and (at the end) Vim modeline have been added, to keep the style in use for most existing bash completion files. In this, it differs from tools/perf/perf-completion.sh, which seems to be the only other completion file among the kernel sources repository. This is also valid for indent style: 4-space indents, as in other completion files. Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/bpf/bpftool/Makefile | 3 + tools/bpf/bpftool/bash-completion/bpftool | 354 ++++++++++++++++++++++ 2 files changed, 357 insertions(+) create mode 100644 tools/bpf/bpftool/bash-completion/bpftool diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4f339824ca57..813826c50936 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -46,6 +46,7 @@ $(LIBBPF)-clean: $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null prefix = /usr +bash_compdir ?= $(prefix)/share/bash-completion/completions CC = gcc @@ -76,6 +77,8 @@ clean: $(LIBBPF)-clean install: install $(OUTPUT)bpftool $(prefix)/sbin/bpftool + install -m 0755 -d $(bash_compdir) + install -m 0644 bash-completion/bpftool $(bash_compdir) doc: $(Q)$(MAKE) -C Documentation/ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool new file mode 100644 index 000000000000..7febee05c8e7 --- /dev/null +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -0,0 +1,354 @@ +# bpftool(8) bash completion -*- shell-script -*- +# +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is dual licensed under the GNU General License +# Version 2, June 1991 as shown in the file COPYING in the top-level +# directory of this source tree or the BSD 2-Clause License provided +# below. You have the option to license this software under the +# complete terms of either license. +# +# The BSD 2-Clause License: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Author: Quentin Monnet + +# Takes a list of words in argument; each one of them is added to COMPREPLY if +# it is not already present on the command line. Returns no value. +_bpftool_once_attr() +{ + local w idx found + for w in $*; do + found=0 + for (( idx=3; idx < ${#words[@]}-1; idx++ )); do + if [[ $w == ${words[idx]} ]]; then + found=1 + break + fi + done + [[ $found -eq 0 ]] && \ + COMPREPLY+=( $( compgen -W "$w" -- "$cur" ) ) + done +} + +# Takes a list of words in argument; adds them all to COMPREPLY if none of them +# is already present on the command line. Returns no value. +_bpftool_one_of_list() +{ + local w idx + for w in $*; do + for (( idx=3; idx < ${#words[@]}-1; idx++ )); do + [[ $w == ${words[idx]} ]] && return 1 + done + done + COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) ) +} + +_bpftool_get_map_ids() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + +_bpftool_get_prog_ids() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ + command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + +_bpftool_get_prog_tags() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ + command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) +} + +# For bpftool map update: retrieve type of the map to update. +_bpftool_map_update_map_type() +{ + local keyword ref + for (( idx=3; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[$((idx-2))]} == "update" ]]; then + keyword=${words[$((idx-1))]} + ref=${words[$((idx))]} + fi + done + [[ -z $ref ]] && return 0 + + local type + type=$(bpftool -jp map show $keyword $ref | \ + command sed -n 's/.*"type": "\(.*\)",$/\1/p') + printf $type +} + +_bpftool_map_update_get_id() +{ + # Is it the map to update, or a map to insert into the map to update? + # Search for "value" keyword. + local idx value + for (( idx=7; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[idx]} == "value" ]]; then + value=1 + break + fi + done + [[ $value -eq 0 ]] && _bpftool_get_map_ids && return 0 + + # Id to complete is for a value. It can be either prog id or map id. This + # depends on the type of the map to update. + local type=$(_bpftool_map_update_map_type) + case $type in + array_of_maps|hash_of_maps) + _bpftool_get_map_ids + return 0 + ;; + prog_array) + _bpftool_get_prog_ids + return 0 + ;; + *) + return 0 + ;; + esac +} + +_bpftool() +{ + local cur prev words objword + _init_completion || return + + # Deal with simplest keywords + case $prev in + help|key|opcodes) + return 0 + ;; + tag) + _bpftool_get_prog_tags + return 0 + ;; + file|pinned) + _filedir + return 0 + ;; + batch) + COMPREPLY=( $( compgen -W 'file' -- "$cur" ) ) + return 0 + ;; + esac + + # Search for object and command + local object command cmdword + for (( cmdword=1; cmdword < ${#words[@]}-1; cmdword++ )); do + [[ -n $object ]] && command=${words[cmdword]} && break + [[ ${words[cmdword]} != -* ]] && object=${words[cmdword]} + done + + if [[ -z $object ]]; then + case $cur in + -*) + local c='--version --json --pretty' + COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$( bpftool help 2>&1 | \ + command sed \ + -e '/OBJECT := /!d' \ + -e 's/.*{//' \ + -e 's/}.*//' \ + -e 's/|//g' )" -- "$cur" ) ) + COMPREPLY+=( $( compgen -W 'batch help' -- "$cur" ) ) + return 0 + ;; + esac + fi + + [[ $command == help ]] && return 0 + + # Completion depends on object and command in use + case $object in + prog) + case $prev in + id) + _bpftool_get_prog_ids + return 0 + ;; + esac + + local PROG_TYPE='id pinned tag' + case $command in + show) + [[ $prev != "$command" ]] && return 0 + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + dump) + case $prev in + $command) + COMPREPLY+=( $( compgen -W "xlated jited" -- \ + "$cur" ) ) + return 0 + ;; + xlated|jited) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + *) + _bpftool_once_attr 'file' + COMPREPLY+=( $( compgen -W 'opcodes' -- \ + "$cur" ) ) + return 0 + ;; + esac + ;; + pin) + if [[ $prev == "$command" ]]; then + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + else + _filedir + fi + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'dump help pin show' -- \ + "$cur" ) ) + ;; + esac + ;; + map) + local MAP_TYPE='id pinned' + case $command in + show|dump) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_map_ids + return 0 + ;; + *) + return 0 + ;; + esac + ;; + lookup|getnext|delete) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_map_ids + return 0 + ;; + key) + return 0 + ;; + *) + _bpftool_once_attr 'key' + return 0 + ;; + esac + ;; + update) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + _bpftool_map_update_get_id + return 0 + ;; + key) + return 0 + ;; + value) + # We can have bytes, or references to a prog or a + # map, depending on the type of the map to update. + case $(_bpftool_map_update_map_type) in + array_of_maps|hash_of_maps) + local MAP_TYPE='id pinned' + COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ + -- "$cur" ) ) + return 0 + ;; + prog_array) + local PROG_TYPE='id pinned tag' + COMPREPLY+=( $( compgen -W "$PROG_TYPE" \ + -- "$cur" ) ) + return 0 + ;; + *) + return 0 + ;; + esac + return 0 + ;; + *) + _bpftool_once_attr 'key' + local UPDATE_FLAGS='any exist noexist' + for (( idx=3; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[idx]} == 'value' ]]; then + # 'value' is present, but is not the last + # word i.e. we can now have UPDATE_FLAGS. + _bpftool_one_of_list "$UPDATE_FLAGS" + return 0 + fi + done + for (( idx=3; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[idx]} == 'key' ]]; then + # 'key' is present, but is not the last + # word i.e. we can now have 'value'. + _bpftool_once_attr 'value' + return 0 + fi + done + return 0 + ;; + esac + ;; + pin) + if [[ $prev == "$command" ]]; then + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + else + _filedir + fi + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'delete dump getnext help \ + lookup pin show update' -- "$cur" ) ) + ;; + esac + ;; + esac +} && +complete -F _bpftool bpftool + +# ex: ts=4 sw=4 et filetype=sh From a190d04db93710ae166749055b6985397c6d13f5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Thu, 26 Oct 2017 15:09:21 -0700 Subject: [PATCH 1577/2177] ipvlan: introduce 'private' attribute for all existing modes. IPvlan has always operated in bridge mode. However there are scenarios where each slave should be able to talk through the master device but not necessarily across each other. Think of an environment where each of a namespace is a private and independant customer. In this scenario the machine which is hosting these namespaces neither want to tell who their neighbor is nor the individual namespaces care to talk to neighbor on short-circuited network path. This patch implements the mode that is very similar to the 'private' mode in macvlan where individual slaves can send and receive traffic through the master device, just that they can not talk among slave devices. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 30 ++++++++++++++++++++--- drivers/net/ipvlan/ipvlan.h | 16 ++++++++++++ drivers/net/ipvlan/ipvlan_core.c | 15 +++++++++--- drivers/net/ipvlan/ipvlan_main.c | 38 +++++++++++++++++++++++++++-- include/uapi/linux/if_link.h | 3 +++ 5 files changed, 94 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 1fe42a874aae..bfa91c77a4c9 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,9 +22,19 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link name type ipvlan mode { l2 | l3 | l3s } + ip link add link name type ipvlan [ mode MODE ] [ FLAGS ] + where + MODE: l3 (default) | l3s | l2 + FLAGS: bridge (default) | private - e.g. ip link add link eth0 name ipvl0 type ipvlan mode l2 + e.g. + (a) Following will create IPvlan link with eth0 as master in + L3 bridge mode + bash# ip link add link eth0 name ipvl0 type ipvlan + (b) This command will create IPvlan link in L2 bridge mode. + bash# ip link add link eth0 name ipvl0 type ipvlan mode l2 bridge + (c) This command will create an IPvlan device in L2 private mode. + bash# ip link add link eth0 name ipvlan type ipvlan mode l2 private 4. Operating modes: @@ -54,7 +64,21 @@ works in this mode and hence it is L3-symmetric (L3s). This will have slightly l performance but that shouldn't matter since you are choosing this mode over plain-L3 mode to make conn-tracking work. -5. What to choose (macvlan vs. ipvlan)? +5. Mode flags: + At this time following mode flags are available + +5.1 bridge: + This is the default option. To configure the IPvlan port in this mode, +user can choose to either add this option on the command-line or don't specify +anything. This is the traditional mode where slaves can cross-talk among +themseleves apart from talking through the master device. + +5.2 private: + If this option is added to the command-line, the port is set in private +mode. i.e. port wont allow cross communication between slaves. + + +6. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use case could very well define which device to choose. if one of the following situations defines your use case then you can choose to use ipvlan - diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index ba8173a0b62e..9941851bcc13 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -96,6 +96,7 @@ struct ipvl_port { struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; u16 mode; + u16 flags; u16 dev_id_start; struct work_struct wq; struct sk_buff_head backlog; @@ -123,6 +124,21 @@ static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) return rtnl_dereference(d->rx_handler_data); } +static inline bool ipvlan_is_private(const struct ipvl_port *port) +{ + return !!(port->flags & IPVLAN_F_PRIVATE); +} + +static inline void ipvlan_mark_private(struct ipvl_port *port) +{ + port->flags |= IPVLAN_F_PRIVATE; +} + +static inline void ipvlan_clear_private(struct ipvl_port *port) +{ + port->flags &= ~IPVLAN_F_PRIVATE; +} + void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1f3295e274d0..72fd56de9c00 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -515,9 +515,13 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) goto out; addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); - if (addr) + if (addr) { + if (ipvlan_is_private(ipvlan->port)) { + consume_skb(skb); + return NET_XMIT_DROP; + } return ipvlan_rcv_frame(addr, &skb, true); - + } out: ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); return ipvlan_process_outbound(skb); @@ -535,8 +539,13 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); if (lyr3h) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); - if (addr) + if (addr) { + if (ipvlan_is_private(ipvlan->port)) { + consume_skb(skb); + return NET_XMIT_DROP; + } return ipvlan_rcv_frame(addr, &skb, true); + } } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f0ab55df57f1..4368afb1934c 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -462,11 +462,24 @@ static int ipvlan_nl_changelink(struct net_device *dev, struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); int err = 0; - if (data && data[IFLA_IPVLAN_MODE]) { + if (!data) + return 0; + + if (data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); err = ipvlan_set_port_mode(port, nmode); } + + if (!err && data[IFLA_IPVLAN_FLAGS]) { + u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); + + if (flags & IPVLAN_F_PRIVATE) + ipvlan_mark_private(port); + else + ipvlan_clear_private(port); + } + return err; } @@ -474,18 +487,30 @@ static size_t ipvlan_nl_getsize(const struct net_device *dev) { return (0 + nla_total_size(2) /* IFLA_IPVLAN_MODE */ + + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */ ); } static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - if (data && data[IFLA_IPVLAN_MODE]) { + if (!data) + return 0; + + if (data[IFLA_IPVLAN_MODE]) { u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX) return -EINVAL; } + if (data[IFLA_IPVLAN_FLAGS]) { + u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); + + /* Only one bit is used at this moment. */ + if (flags & ~IPVLAN_F_PRIVATE) + return -EINVAL; + } + return 0; } @@ -502,6 +527,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, ret = -EMSGSIZE; if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) goto err; + if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags)) + goto err; return 0; @@ -549,6 +576,12 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); + /* Flags are per port and latest update overrides. User has + * to be consistent in setting it just like the mode attribute. + */ + if (data && data[IFLA_IPVLAN_FLAGS]) + ipvlan->port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); + /* If the port-id base is at the MAX value, then wrap it around and * begin from 0x1 again. This may be due to a busy system where lots * of slaves are getting created and deleted. @@ -644,6 +677,7 @@ EXPORT_SYMBOL_GPL(ipvlan_link_setup); static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = { [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, + [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ipvlan_link_ops = { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b037e0ab1975..052e32cd584c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -465,6 +465,7 @@ enum macsec_validation_type { enum { IFLA_IPVLAN_UNSPEC, IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, __IFLA_IPVLAN_MAX }; @@ -477,6 +478,8 @@ enum ipvlan_mode { IPVLAN_MODE_MAX }; +#define IPVLAN_F_PRIVATE 0x01 + /* VXLAN section */ enum { IFLA_VXLAN_UNSPEC, From fe89aa6b250c1011ccf425fbb7998e96bd54263f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Thu, 26 Oct 2017 15:09:25 -0700 Subject: [PATCH 1578/2177] ipvlan: implement VEPA mode This is very similar to the Macvlan VEPA mode, however, there is some difference. IPvlan uses the mac-address of the lower device, so the VEPA mode has implications of ICMP-redirects for packets destined for its immediate neighbors sharing same master since the packets will have same source and dest mac. The external switch/router will send redirect msg. Having said that, this will be useful tool in terms of debugging since IPvlan will not switch packets within its slaves and rely completely on the external entity as intended in 802.1Qbg. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 12 +++++++++++- drivers/net/ipvlan/ipvlan.h | 15 +++++++++++++++ drivers/net/ipvlan/ipvlan_core.c | 17 ++++++++++------- drivers/net/ipvlan/ipvlan_main.c | 13 +++++++++++-- include/uapi/linux/if_link.h | 1 + 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index bfa91c77a4c9..812ef003e0a8 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -25,7 +25,7 @@ using IProute2/ip utility. ip link add link name type ipvlan [ mode MODE ] [ FLAGS ] where MODE: l3 (default) | l3s | l2 - FLAGS: bridge (default) | private + FLAGS: bridge (default) | private | vepa e.g. (a) Following will create IPvlan link with eth0 as master in @@ -35,6 +35,8 @@ using IProute2/ip utility. bash# ip link add link eth0 name ipvl0 type ipvlan mode l2 bridge (c) This command will create an IPvlan device in L2 private mode. bash# ip link add link eth0 name ipvlan type ipvlan mode l2 private + (d) This command will create an IPvlan device in L2 vepa mode. + bash# ip link add link eth0 name ipvlan type ipvlan mode l2 vepa 4. Operating modes: @@ -77,6 +79,14 @@ themseleves apart from talking through the master device. If this option is added to the command-line, the port is set in private mode. i.e. port wont allow cross communication between slaves. +5.3 vepa: + If this is added to the command-line, the port is set in VEPA mode. +i.e. port will offload switching functionality to the external entity as +described in 802.1Qbg +Note: VEPA mode in IPvlan has limitations. IPvlan uses the mac-address of the +master-device, so the packets which are emitted in this mode for the adjacent +neighbor will have source and destination mac same. This will make the switch / +router send the redirect message. 6. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 9941851bcc13..5166575a164d 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -139,6 +139,21 @@ static inline void ipvlan_clear_private(struct ipvl_port *port) port->flags &= ~IPVLAN_F_PRIVATE; } +static inline bool ipvlan_is_vepa(const struct ipvl_port *port) +{ + return !!(port->flags & IPVLAN_F_VEPA); +} + +static inline void ipvlan_mark_vepa(struct ipvl_port *port) +{ + port->flags |= IPVLAN_F_VEPA; +} + +static inline void ipvlan_clear_vepa(struct ipvl_port *port) +{ + port->flags &= ~IPVLAN_F_VEPA; +} + void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 72fd56de9c00..034ae4c57196 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -514,13 +514,15 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) if (!lyr3h) goto out; - addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); - if (addr) { - if (ipvlan_is_private(ipvlan->port)) { - consume_skb(skb); - return NET_XMIT_DROP; + if (!ipvlan_is_vepa(ipvlan->port)) { + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); + if (addr) { + if (ipvlan_is_private(ipvlan->port)) { + consume_skb(skb); + return NET_XMIT_DROP; + } + return ipvlan_rcv_frame(addr, &skb, true); } - return ipvlan_rcv_frame(addr, &skb, true); } out: ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); @@ -535,7 +537,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) void *lyr3h; int addr_type; - if (ether_addr_equal(eth->h_dest, eth->h_source)) { + if (!ipvlan_is_vepa(ipvlan->port) && + ether_addr_equal(eth->h_dest, eth->h_source)) { lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); if (lyr3h) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 4368afb1934c..a266aa435d4d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -478,6 +478,11 @@ static int ipvlan_nl_changelink(struct net_device *dev, ipvlan_mark_private(port); else ipvlan_clear_private(port); + + if (flags & IPVLAN_F_VEPA) + ipvlan_mark_vepa(port); + else + ipvlan_clear_vepa(port); } return err; @@ -506,8 +511,12 @@ static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_IPVLAN_FLAGS]) { u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); - /* Only one bit is used at this moment. */ - if (flags & ~IPVLAN_F_PRIVATE) + /* Only two bits are used at this moment. */ + if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) + return -EINVAL; + /* Also both flags can't be active at the same time. */ + if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) == + (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) return -EINVAL; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 052e32cd584c..81f26473d728 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum ipvlan_mode { }; #define IPVLAN_F_PRIVATE 0x01 +#define IPVLAN_F_VEPA 0x02 /* VXLAN section */ enum { From 67d2f8781b9f00d1089aafcfa3dc09fcd0f343e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Wed, 25 Oct 2017 22:14:53 -0700 Subject: [PATCH 1579/2177] Bluetooth: hci_ldisc: Allow sleeping while proto locks are held. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit dec2c92880cc5435381d50e3045ef018a762a917 ("Bluetooth: hci_ldisc: Use rwlocking to avoid closing proto races") introduced locks in hci_ldisc that are held while calling the proto functions. These locks are rwlock's, and hence do not allow sleeping while they are held. However, the proto functions that hci_bcm registers use mutexes and hence need to be able to sleep. In more detail: hci_uart_tty_receive() and hci_uart_dequeue() both acquire the rwlock, after which they call proto->recv() and proto->dequeue(), respectively. In the case of hci_bcm these point to bcm_recv() and bcm_dequeue(). The latter both acquire the bcm_device_lock, which is a mutex, so doing so results in a call to might_sleep(). But since we're holding a rwlock in hci_ldisc, that results in the following BUG (this for the dequeue case - a similar one for the receive case is omitted for brevity): BUG: sleeping function called from invalid context at kernel/locking/mutex.c in_atomic(): 1, irqs_disabled(): 0, pid: 7303, name: kworker/7:3 INFO: lockdep is turned off. CPU: 7 PID: 7303 Comm: kworker/7:3 Tainted: G W OE 4.13.2+ #17 Hardware name: Apple Inc. MacBookPro13,3/Mac-A5C67F76ED83108C, BIOS MBP133.8 Workqueue: events hci_uart_write_work [hci_uart] Call Trace: dump_stack+0x8e/0xd6 ___might_sleep+0x164/0x250 __might_sleep+0x4a/0x80 __mutex_lock+0x59/0xa00 ? lock_acquire+0xa3/0x1f0 ? lock_acquire+0xa3/0x1f0 ? hci_uart_write_work+0xd3/0x160 [hci_uart] mutex_lock_nested+0x1b/0x20 ? mutex_lock_nested+0x1b/0x20 bcm_dequeue+0x21/0xc0 [hci_uart] hci_uart_write_work+0xe6/0x160 [hci_uart] process_one_work+0x253/0x6a0 worker_thread+0x4d/0x3b0 kthread+0x133/0x150 We can't replace the mutex in hci_bcm, because there are other calls there that might sleep. Therefore this replaces the rwlock's in hci_ldisc with rw_semaphore's (which allow sleeping). This is a safer approach anyway as it reduces the restrictions on the proto callbacks. Also, because acquiring write-lock is very rare compared to acquiring the read-lock, the percpu variant of rw_semaphore is used. Lastly, because hci_uart_tx_wakeup() may be called from an IRQ context, we can't block (sleep) while trying acquire the read lock there, so we use the trylock variant. Signed-off-by: Ronald Tschalär Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ldisc.c | 38 ++++++++++++++++++++--------------- drivers/bluetooth/hci_uart.h | 2 +- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index eec95019f15c..31def781a562 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -115,12 +115,12 @@ static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) struct sk_buff *skb = hu->tx_skb; if (!skb) { - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) skb = hu->proto->dequeue(hu); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); } else { hu->tx_skb = NULL; } @@ -130,7 +130,14 @@ static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) int hci_uart_tx_wakeup(struct hci_uart *hu) { - read_lock(&hu->proto_lock); + /* This may be called in an IRQ context, so we can't sleep. Therefore + * we try to acquire the lock only, and if that fails we assume the + * tty is being closed because that is the only time the write lock is + * acquired. If, however, at some point in the future the write lock + * is also acquired in other situations, then this must be revisited. + */ + if (!percpu_down_read_trylock(&hu->proto_lock)) + return 0; if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; @@ -145,7 +152,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu) schedule_work(&hu->write_work); no_schedule: - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return 0; } @@ -247,12 +254,12 @@ static int hci_uart_flush(struct hci_dev *hdev) tty_ldisc_flush(tty); tty_driver_flush_buffer(tty); - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hu->proto->flush(hu); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return 0; } @@ -275,15 +282,15 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb), skb->len); - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return -EUNATCH; } hu->proto->enqueue(hu, skb); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); hci_uart_tx_wakeup(hu); @@ -486,7 +493,7 @@ static int hci_uart_tty_open(struct tty_struct *tty) INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - rwlock_init(&hu->proto_lock); + percpu_init_rwsem(&hu->proto_lock); /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); @@ -503,7 +510,6 @@ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; - unsigned long flags; BT_DBG("tty %p", tty); @@ -520,9 +526,9 @@ static void hci_uart_tty_close(struct tty_struct *tty) cancel_work_sync(&hu->write_work); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - write_lock_irqsave(&hu->proto_lock, flags); + percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); - write_unlock_irqrestore(&hu->proto_lock, flags); + percpu_up_write(&hu->proto_lock); if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) @@ -582,10 +588,10 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, if (!hu || tty != hu->tty) return; - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return; } @@ -593,7 +599,7 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * tty caller */ hu->proto->recv(hu, data, count); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index d9cd95d81149..66e8c68e4607 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -87,7 +87,7 @@ struct hci_uart { struct work_struct write_work; const struct hci_uart_proto *proto; - rwlock_t proto_lock; /* Stop work for proto close */ + struct percpu_rw_semaphore proto_lock; /* Stop work for proto close */ void *priv; struct sk_buff *tx_skb; From 858ff38af77fc660092e82474ecc6ac135ed29fe Mon Sep 17 00:00:00 2001 From: Bartosz Chronowski Date: Thu, 26 Oct 2017 10:22:43 +0200 Subject: [PATCH 1580/2177] Bluetooth: btusb: Add new NFA344A entry. This change allows proper low power mode entry in suspend. /sys/kernel/debug/usb/devices entry: T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=03 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e09f Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Bartosz Chronowski Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c054d7bce490..b8eb39436ef2 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -267,6 +267,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe301), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x0489, 0xe09f), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe0a2), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME }, From f17d858ed0a48270db4368d8cf370e3839ee6f4f Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Wed, 25 Oct 2017 10:58:48 +0530 Subject: [PATCH 1581/2177] Bluetooth: Fix potential memory leak If command is added to req then it should be freed in case if hdev is down or HCI_ADVERTISING flag is set. This introduces a helper in hci_request to purge the cmd_q to make cmd_q internal to hci_request which is used to fix the leak. This also replace accessing of cmd_q in hci_conn with the new helper. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_request.c | 5 +++++ net/bluetooth/hci_request.h | 1 + net/bluetooth/mgmt.c | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index dc59eae54717..746adcb62259 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE) { - skb_queue_purge(&req.cmd_q); + hci_req_purge(&req); hci_conn_del(conn); return ERR_PTR(-EBUSY); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b73ac149de34..7f28d17dc792 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev) req->err = 0; } +void hci_req_purge(struct hci_request *req) +{ + skb_queue_purge(&req->cmd_q); +} + static int req_run(struct hci_request *req, hci_req_complete_t complete, hci_req_complete_skb_t complete_skb) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index dde77bd59f91..702beb140d9f 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -36,6 +36,7 @@ struct hci_request { }; void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +void hci_req_purge(struct hci_request *req); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1fba2a03f8ae..07a3cc29f426 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6383,6 +6383,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, if (skb_queue_empty(&req.cmd_q) || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + hci_req_purge(&req); rp.instance = cp->instance; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, From 13df5000d39a1570a88daa44855ebe0b0f966b14 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 19 Oct 2017 14:29:10 +0200 Subject: [PATCH 1582/2177] Bluetooth: hci_ath: Add ath_vendor_cmd helper Introduce ath_vendor_cmd function which can be used to configure 'tags' and patch the firmware. ATH vendor command has the following format: | OPCODE (u8) | INDEX (LE16) | DLEN (U8) | DATA (U8 * DLEN) | BD address configuration tag is at index 0x0001. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ath.c | 45 ++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index 0ccf6bf01ed4..5f17ec339fb8 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -50,6 +50,17 @@ struct ath_struct { struct work_struct ctxtsw; }; +#define OP_WRITE_TAG 0x01 + +#define INDEX_BDADDR 0x01 + +struct ath_vendor_cmd { + __u8 opcode; + __le16 index; + __u8 len; + __u8 data[251]; +} __packed; + static int ath_wakeup_ar3k(struct tty_struct *tty) { int status = tty->driver->ops->tiocmget(tty); @@ -144,30 +155,34 @@ static int ath_flush(struct hci_uart *hu) return 0; } -static int ath_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) +static int ath_vendor_cmd(struct hci_dev *hdev, uint8_t opcode, uint16_t index, + const void *data, size_t dlen) { struct sk_buff *skb; - u8 buf[10]; - int err; + struct ath_vendor_cmd cmd; - buf[0] = 0x01; - buf[1] = 0x01; - buf[2] = 0x00; - buf[3] = sizeof(bdaddr_t); - memcpy(buf + 4, bdaddr, sizeof(bdaddr_t)); + if (dlen > sizeof(cmd.data)) + return -EINVAL; - skb = __hci_cmd_sync(hdev, 0xfc0b, sizeof(buf), buf, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - BT_ERR("%s: Change address command failed (%d)", - hdev->name, err); - return err; - } + cmd.opcode = opcode; + cmd.index = cpu_to_le16(index); + cmd.len = dlen; + memcpy(cmd.data, data, dlen); + + skb = __hci_cmd_sync(hdev, 0xfc0b, dlen + 4, &cmd, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) + return PTR_ERR(skb); kfree_skb(skb); return 0; } +static int ath_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) +{ + return ath_vendor_cmd(hdev, OP_WRITE_TAG, INDEX_BDADDR, bdaddr, + sizeof(*bdaddr)); +} + static int ath_setup(struct hci_uart *hu) { BT_DBG("hu %p", hu); From 1f01d8be0e6a04bd682a55f6d50c14c1679e7571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konrad=20Zapa=C5=82owicz?= Date: Tue, 17 Oct 2017 15:53:49 +0200 Subject: [PATCH 1583/2177] Bluetooth: increase timeout for le auto connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch increases the connection timeout for LE connections that are triggered by the advertising report to 4 seconds. It has been observed that devices equipped with wifi+bt combo SoC fail to create a connection with BLE devices due to their coexistence issues. Increasing this timeout gives them enough time to complete the connection with success. Signed-off-by: Konrad Zapałowicz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe98f0a5bef0..1668211297a9 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -273,7 +273,7 @@ enum { #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ -#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 From a9ee77af751f435675054f5a7e2d2e69cbfe9e33 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Sep 2017 15:51:23 +0200 Subject: [PATCH 1584/2177] Bluetooth: avoid recursive locking in hci_send_to_channel() Mart reported a deadlock in -RT in the call path: hci_send_monitor_ctrl_event() -> hci_send_to_channel() because both functions acquire the same read lock hci_sk_list.lock. This is also a mainline issue because the qrwlock implementation is writer fair (the traditional rwlock implementation is reader biased). To avoid the deadlock there is now __hci_send_to_channel() which expects the readlock to be held. Fixes: 38ceaa00d02d ("Bluetooth: Add support for sending MGMT commands and events to monitor") Reported-by: Mart van de Wege Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 65d734c165bd..923e9a271872 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) } /* Send frame to sockets with specific channel */ -void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, - int flag, struct sock *skip_sk) +static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) { struct sock *sk; BT_DBG("channel %u len %d", channel, skb->len); - read_lock(&hci_sk_list.lock); - sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; @@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, kfree_skb(nskb); } +} + +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) +{ + read_lock(&hci_sk_list.lock); + __hci_send_to_channel(channel, skb, flag, skip_sk); read_unlock(&hci_sk_list.lock); } @@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); - hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, - HCI_SOCK_TRUSTED, NULL); + __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } From 2064ee332e4c1b7495cf68b84355c213d8fe71fd Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 30 Oct 2017 10:42:59 +0100 Subject: [PATCH 1585/2177] Bluetooth: Use bt_dev_err and bt_dev_info when possible In case of using BT_ERR and BT_INFO, convert to bt_dev_err and bt_dev_info when possible. This allows for controller specific reporting. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- drivers/bluetooth/bpa10x.c | 15 ++- drivers/bluetooth/bt3c_cs.c | 2 +- drivers/bluetooth/btbcm.c | 103 ++++++++++---------- drivers/bluetooth/btintel.c | 42 ++++---- drivers/bluetooth/btqca.c | 6 +- drivers/bluetooth/btrtl.c | 21 ++-- drivers/bluetooth/btusb.c | 155 +++++++++++++++--------------- drivers/bluetooth/hci_ath.c | 2 +- drivers/bluetooth/hci_h4.c | 2 +- drivers/bluetooth/hci_qca.c | 12 +-- drivers/bluetooth/hci_serdev.c | 9 +- include/net/bluetooth/bluetooth.h | 3 + net/bluetooth/a2mp.c | 2 +- net/bluetooth/amp.c | 4 +- net/bluetooth/hci_conn.c | 4 +- net/bluetooth/hci_core.c | 35 ++++--- net/bluetooth/hci_event.c | 46 ++++----- net/bluetooth/hci_request.c | 16 +-- net/bluetooth/hci_sysfs.c | 2 +- net/bluetooth/mgmt.c | 56 ++++++----- net/bluetooth/smp.c | 22 ++--- 21 files changed, 279 insertions(+), 280 deletions(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 48d10cb5c9a1..7971bfbd4321 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -117,7 +117,7 @@ static void bpa10x_rx_complete(struct urb *urb) bpa10x_recv_pkts, ARRAY_SIZE(bpa10x_recv_pkts)); if (IS_ERR(data->rx_skb[idx])) { - BT_ERR("%s corrupted event packet", hdev->name); + bt_dev_err(hdev, "corrupted event packet"); hdev->stat.err_rx++; data->rx_skb[idx] = NULL; } @@ -127,8 +127,7 @@ static void bpa10x_rx_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - BT_ERR("%s urb %p failed to resubmit (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p failed to resubmit (%d)", urb, -err); usb_unanchor_urb(urb); } } @@ -164,8 +163,7 @@ static inline int bpa10x_submit_intr_urb(struct hci_dev *hdev) err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); usb_unanchor_urb(urb); } @@ -205,8 +203,7 @@ static inline int bpa10x_submit_bulk_urb(struct hci_dev *hdev) err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); usb_unanchor_urb(urb); } @@ -272,7 +269,7 @@ static int bpa10x_setup(struct hci_dev *hdev) if (IS_ERR(skb)) return PTR_ERR(skb); - BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1)); + bt_dev_info(hdev, "%s", (char *)(skb->data + 1)); hci_set_fw_info(hdev, "%s", skb->data + 1); @@ -348,7 +345,7 @@ static int bpa10x_send_frame(struct hci_dev *hdev, struct sk_buff *skb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - BT_ERR("%s urb %p submission failed", hdev->name, urb); + bt_dev_err(hdev, "urb %p submission failed", urb); kfree(urb->setup_packet); usb_unanchor_urb(urb); } diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index 194788739a83..25b0cf952b91 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -355,7 +355,7 @@ static irqreturn_t bt3c_interrupt(int irq, void *dev_inst) } else if ((stat & 0xff) != 0xff) { if (stat & 0x0020) { int status = bt3c_read(iobase, 0x7002) & 0x10; - BT_INFO("%s: Antenna %s", info->hdev->name, + bt_dev_info(info->hdev, "Antenna %s", status ? "out" : "in"); } if (stat & 0x0001) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index ae1fa390f508..afa4cb3b16e3 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -45,13 +45,12 @@ int btbcm_check_bdaddr(struct hci_dev *hdev) HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { int err = PTR_ERR(skb); - BT_ERR("%s: BCM: Reading device address failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "BCM: Reading device address failed (%d)", err); return err; } if (skb->len != sizeof(*bda)) { - BT_ERR("%s: BCM: Device address length mismatch", hdev->name); + bt_dev_err(hdev, "BCM: Device address length mismatch"); kfree_skb(skb); return -EIO; } @@ -74,8 +73,8 @@ int btbcm_check_bdaddr(struct hci_dev *hdev) if (!bacmp(&bda->bdaddr, BDADDR_BCM20702A0) || !bacmp(&bda->bdaddr, BDADDR_BCM4324B3) || !bacmp(&bda->bdaddr, BDADDR_BCM4330B1)) { - BT_INFO("%s: BCM: Using default device address (%pMR)", - hdev->name, &bda->bdaddr); + bt_dev_info(hdev, "BCM: Using default device address (%pMR)", + &bda->bdaddr); set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); } @@ -93,8 +92,7 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) skb = __hci_cmd_sync(hdev, 0xfc01, 6, bdaddr, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); - BT_ERR("%s: BCM: Change address command failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "BCM: Change address command failed (%d)", err); return err; } kfree_skb(skb); @@ -116,8 +114,8 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) skb = __hci_cmd_sync(hdev, 0xfc2e, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); - BT_ERR("%s: BCM: Download Minidrv command failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "BCM: Download Minidrv command failed (%d)", + err); goto done; } kfree_skb(skb); @@ -136,7 +134,7 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) fw_size -= sizeof(*cmd); if (fw_size < cmd->plen) { - BT_ERR("%s: BCM: Patch is corrupted", hdev->name); + bt_dev_err(hdev, "BCM: Patch is corrupted"); err = -EINVAL; goto done; } @@ -151,8 +149,8 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); - BT_ERR("%s: BCM: Patch command %04x failed (%d)", - hdev->name, opcode, err); + bt_dev_err(hdev, "BCM: Patch command %04x failed (%d)", + opcode, err); goto done; } kfree_skb(skb); @@ -173,7 +171,7 @@ static int btbcm_reset(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { int err = PTR_ERR(skb); - BT_ERR("%s: BCM: Reset failed (%d)", hdev->name, err); + bt_dev_err(hdev, "BCM: Reset failed (%d)", err); return err; } kfree_skb(skb); @@ -191,13 +189,13 @@ static struct sk_buff *btbcm_read_local_name(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: BCM: Reading local name failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "BCM: Reading local name failed (%ld)", + PTR_ERR(skb)); return skb; } if (skb->len != sizeof(struct hci_rp_read_local_name)) { - BT_ERR("%s: BCM: Local name length mismatch", hdev->name); + bt_dev_err(hdev, "BCM: Local name length mismatch"); kfree_skb(skb); return ERR_PTR(-EIO); } @@ -212,13 +210,13 @@ static struct sk_buff *btbcm_read_local_version(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: BCM: Reading local version info failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "BCM: Reading local version info failed (%ld)", + PTR_ERR(skb)); return skb; } if (skb->len != sizeof(struct hci_rp_read_local_version)) { - BT_ERR("%s: BCM: Local version length mismatch", hdev->name); + bt_dev_err(hdev, "BCM: Local version length mismatch"); kfree_skb(skb); return ERR_PTR(-EIO); } @@ -232,13 +230,13 @@ static struct sk_buff *btbcm_read_verbose_config(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, 0xfc79, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: BCM: Read verbose config info failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "BCM: Read verbose config info failed (%ld)", + PTR_ERR(skb)); return skb; } if (skb->len != 7) { - BT_ERR("%s: BCM: Verbose config length mismatch", hdev->name); + bt_dev_err(hdev, "BCM: Verbose config length mismatch"); kfree_skb(skb); return ERR_PTR(-EIO); } @@ -252,14 +250,13 @@ static struct sk_buff *btbcm_read_controller_features(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, 0xfc6e, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: BCM: Read controller features failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "BCM: Read controller features failed (%ld)", + PTR_ERR(skb)); return skb; } if (skb->len != 9) { - BT_ERR("%s: BCM: Controller features length mismatch", - hdev->name); + bt_dev_err(hdev, "BCM: Controller features length mismatch"); kfree_skb(skb); return ERR_PTR(-EIO); } @@ -273,13 +270,13 @@ static struct sk_buff *btbcm_read_usb_product(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, 0xfc5a, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: BCM: Read USB product info failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "BCM: Read USB product info failed (%ld)", + PTR_ERR(skb)); return skb; } if (skb->len != 5) { - BT_ERR("%s: BCM: USB product length mismatch", hdev->name); + bt_dev_err(hdev, "BCM: USB product length mismatch"); kfree_skb(skb); return ERR_PTR(-EIO); } @@ -296,7 +293,7 @@ static int btbcm_read_info(struct hci_dev *hdev) if (IS_ERR(skb)) return PTR_ERR(skb); - BT_INFO("%s: BCM: chip id %u", hdev->name, skb->data[1]); + bt_dev_info(hdev, "BCM: chip id %u", skb->data[1]); kfree_skb(skb); /* Read Controller Features */ @@ -304,7 +301,7 @@ static int btbcm_read_info(struct hci_dev *hdev) if (IS_ERR(skb)) return PTR_ERR(skb); - BT_INFO("%s: BCM: features 0x%2.2x", hdev->name, skb->data[1]); + bt_dev_info(hdev, "BCM: features 0x%2.2x", skb->data[1]); kfree_skb(skb); /* Read Local Name */ @@ -312,7 +309,7 @@ static int btbcm_read_info(struct hci_dev *hdev) if (IS_ERR(skb)) return PTR_ERR(skb); - BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1)); + bt_dev_info(hdev, "%s", (char *)(skb->data + 1)); kfree_skb(skb); return 0; @@ -378,9 +375,9 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len) return 0; } - BT_INFO("%s: %s (%3.3u.%3.3u.%3.3u) build %4.4u", hdev->name, - hw_name ? : "BCM", (subver & 0xe000) >> 13, - (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); + bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u", + hw_name ? : "BCM", (subver & 0xe000) >> 13, + (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); return 0; } @@ -408,9 +405,9 @@ int btbcm_finalize(struct hci_dev *hdev) subver = le16_to_cpu(ver->lmp_subver); kfree_skb(skb); - BT_INFO("%s: BCM (%3.3u.%3.3u.%3.3u) build %4.4u", hdev->name, - (subver & 0xe000) >> 13, (subver & 0x1f00) >> 8, - (subver & 0x00ff), rev & 0x0fff); + bt_dev_info(hdev, "BCM (%3.3u.%3.3u.%3.3u) build %4.4u", + (subver & 0xe000) >> 13, (subver & 0x1f00) >> 8, + (subver & 0x00ff), rev & 0x0fff); btbcm_check_bdaddr(hdev); @@ -505,13 +502,13 @@ int btbcm_setup_patchram(struct hci_dev *hdev) return 0; } - BT_INFO("%s: %s (%3.3u.%3.3u.%3.3u) build %4.4u", hdev->name, - hw_name ? : "BCM", (subver & 0xe000) >> 13, - (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); + bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u", + hw_name ? : "BCM", (subver & 0xe000) >> 13, + (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); err = request_firmware(&fw, fw_name, &hdev->dev); if (err < 0) { - BT_INFO("%s: BCM: Patch %s not found", hdev->name, fw_name); + bt_dev_info(hdev, "BCM: Patch %s not found", fw_name); goto done; } @@ -534,16 +531,16 @@ int btbcm_setup_patchram(struct hci_dev *hdev) subver = le16_to_cpu(ver->lmp_subver); kfree_skb(skb); - BT_INFO("%s: %s (%3.3u.%3.3u.%3.3u) build %4.4u", hdev->name, - hw_name ? : "BCM", (subver & 0xe000) >> 13, - (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); + bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u", + hw_name ? : "BCM", (subver & 0xe000) >> 13, + (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff); /* Read Local Name */ skb = btbcm_read_local_name(hdev); if (IS_ERR(skb)) return PTR_ERR(skb); - BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1)); + bt_dev_info(hdev, "%s", (char *)(skb->data + 1)); kfree_skb(skb); done: @@ -568,31 +565,31 @@ int btbcm_setup_apple(struct hci_dev *hdev) /* Read Verbose Config Version Info */ skb = btbcm_read_verbose_config(hdev); if (!IS_ERR(skb)) { - BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, - skb->data[1], get_unaligned_le16(skb->data + 5)); + bt_dev_info(hdev, "BCM: chip id %u build %4.4u", + skb->data[1], get_unaligned_le16(skb->data + 5)); kfree_skb(skb); } /* Read USB Product Info */ skb = btbcm_read_usb_product(hdev); if (!IS_ERR(skb)) { - BT_INFO("%s: BCM: product %4.4x:%4.4x", hdev->name, - get_unaligned_le16(skb->data + 1), - get_unaligned_le16(skb->data + 3)); + bt_dev_info(hdev, "BCM: product %4.4x:%4.4x", + get_unaligned_le16(skb->data + 1), + get_unaligned_le16(skb->data + 3)); kfree_skb(skb); } /* Read Controller Features */ skb = btbcm_read_controller_features(hdev); if (!IS_ERR(skb)) { - BT_INFO("%s: BCM: features 0x%2.2x", hdev->name, skb->data[1]); + bt_dev_info(hdev, "BCM: features 0x%2.2x", skb->data[1]); kfree_skb(skb); } /* Read Local Name */ skb = btbcm_read_local_name(hdev); if (!IS_ERR(skb)) { - BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1)); + bt_dev_info(hdev, "%s", (char *)(skb->data + 1)); kfree_skb(skb); } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index d32e109bd5cb..4459555c9d88 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -43,13 +43,13 @@ int btintel_check_bdaddr(struct hci_dev *hdev) HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { int err = PTR_ERR(skb); - BT_ERR("%s: Reading Intel device address failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "Reading Intel device address failed (%d)", + err); return err; } if (skb->len != sizeof(*bda)) { - BT_ERR("%s: Intel device address length mismatch", hdev->name); + bt_dev_err(hdev, "Intel device address length mismatch"); kfree_skb(skb); return -EIO; } @@ -62,8 +62,8 @@ int btintel_check_bdaddr(struct hci_dev *hdev) * and that in turn can cause problems with Bluetooth operation. */ if (!bacmp(&bda->bdaddr, BDADDR_INTEL)) { - BT_ERR("%s: Found Intel default device address (%pMR)", - hdev->name, &bda->bdaddr); + bt_dev_err(hdev, "Found Intel default device address (%pMR)", + &bda->bdaddr); set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); } @@ -123,8 +123,8 @@ int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) skb = __hci_cmd_sync(hdev, 0xfc31, 6, bdaddr, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); - BT_ERR("%s: Changing Intel device address failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "Changing Intel device address failed (%d)", + err); return err; } kfree_skb(skb); @@ -154,8 +154,8 @@ int btintel_set_diag(struct hci_dev *hdev, bool enable) err = PTR_ERR(skb); if (err == -ENODATA) goto done; - BT_ERR("%s: Changing Intel diagnostic mode failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "Changing Intel diagnostic mode failed (%d)", + err); return err; } kfree_skb(skb); @@ -189,30 +189,30 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) struct sk_buff *skb; u8 type = 0x00; - BT_ERR("%s: Hardware error 0x%2.2x", hdev->name, code); + bt_dev_err(hdev, "Hardware error 0x%2.2x", code); skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: Reset after hardware error failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "Reset after hardware error failed (%ld)", + PTR_ERR(skb)); return; } kfree_skb(skb); skb = __hci_cmd_sync(hdev, 0xfc22, 1, &type, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: Retrieving Intel exception info failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)", + PTR_ERR(skb)); return; } if (skb->len != 13) { - BT_ERR("%s: Exception info size mismatch", hdev->name); + bt_dev_err(hdev, "Exception info size mismatch"); kfree_skb(skb); return; } - BT_ERR("%s: Exception info %s", hdev->name, (char *)(skb->data + 1)); + bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1)); kfree_skb(skb); } @@ -233,9 +233,10 @@ void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) return; } - BT_INFO("%s: %s revision %u.%u build %u week %u %u", hdev->name, - variant, ver->fw_revision >> 4, ver->fw_revision & 0x0f, - ver->fw_build_num, ver->fw_build_ww, 2000 + ver->fw_build_yy); + bt_dev_info(hdev, "%s revision %u.%u build %u week %u %u", + variant, ver->fw_revision >> 4, ver->fw_revision & 0x0f, + ver->fw_build_num, ver->fw_build_ww, + 2000 + ver->fw_build_yy); } EXPORT_SYMBOL_GPL(btintel_version_info); @@ -321,8 +322,7 @@ int btintel_set_event_mask(struct hci_dev *hdev, bool debug) skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); - BT_ERR("%s: Setting Intel event mask failed (%d)", - hdev->name, err); + bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err); return err; } kfree_skb(skb); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 0bbdfcef2aa8..2793d4180d2f 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -287,7 +287,7 @@ static int rome_download_firmware(struct hci_dev *hdev, const struct firmware *fw; int ret; - BT_INFO("%s: ROME Downloading %s", hdev->name, config->fwname); + bt_dev_info(hdev, "ROME Downloading %s", config->fwname); ret = request_firmware(&fw, config->fwname, &hdev->dev); if (ret) { @@ -351,7 +351,7 @@ int qca_uart_setup_rome(struct hci_dev *hdev, uint8_t baudrate) return err; } - BT_INFO("%s: ROME controller version 0x%08x", hdev->name, rome_ver); + bt_dev_info(hdev, "ROME controller version 0x%08x", rome_ver); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; @@ -380,7 +380,7 @@ int qca_uart_setup_rome(struct hci_dev *hdev, uint8_t baudrate) return err; } - BT_INFO("%s: ROME setup on UART is completed", hdev->name); + bt_dev_info(hdev, "ROME setup on UART is completed"); return 0; } diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index d9a99b4302ea..6e2ad748abba 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -55,8 +55,8 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) } rom_version = (struct rtl_rom_version_evt *)skb->data; - BT_INFO("%s: rom_version status=%x version=%x", - hdev->name, rom_version->status, rom_version->version); + bt_dev_info(hdev, "rom_version status=%x version=%x", + rom_version->status, rom_version->version); *version = rom_version->version; @@ -273,7 +273,7 @@ static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff) const struct firmware *fw; int ret; - BT_INFO("%s: rtl: loading %s", hdev->name, name); + bt_dev_info(hdev, "rtl: loading %s", name); ret = request_firmware(&fw, name, &hdev->dev); if (ret < 0) return ret; @@ -292,7 +292,7 @@ static int btrtl_setup_rtl8723a(struct hci_dev *hdev) const struct firmware *fw; int ret; - BT_INFO("%s: rtl: loading rtl_bt/rtl8723a_fw.bin", hdev->name); + bt_dev_info(hdev, "rtl: loading rtl_bt/rtl8723a_fw.bin"); ret = request_firmware(&fw, "rtl_bt/rtl8723a_fw.bin", &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load rtl_bt/rtl8723a_fw.bin", hdev->name); @@ -363,7 +363,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, } else cfg_sz = 0; - BT_INFO("%s: rtl: loading %s", hdev->name, fw_name); + bt_dev_info(hdev, "rtl: loading %s", fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load %s", hdev->name, fw_name); @@ -390,7 +390,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, fw_data = tbuff; } - BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret); + bt_dev_info(hdev, "cfg_sz %d, total size %d", cfg_sz, ret); ret = rtl_download_firmware(hdev, fw_data, ret); @@ -436,9 +436,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev) return -PTR_ERR(skb); resp = (struct hci_rp_read_local_version *)skb->data; - BT_INFO("%s: rtl: examining hci_ver=%02x hci_rev=%04x lmp_ver=%02x " - "lmp_subver=%04x", hdev->name, resp->hci_ver, resp->hci_rev, - resp->lmp_ver, resp->lmp_subver); + bt_dev_info(hdev, "rtl: examining hci_ver=%02x hci_rev=%04x " + "lmp_ver=%02x lmp_subver=%04x", + resp->hci_ver, resp->hci_rev, + resp->lmp_ver, resp->lmp_subver); lmp_subver = le16_to_cpu(resp->lmp_subver); kfree_skb(skb); @@ -466,7 +467,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev) return btrtl_setup_rtl8723b(hdev, lmp_subver, "rtl_bt/rtl8822b_fw.bin"); default: - BT_INFO("rtl: assuming no firmware upload needed."); + bt_dev_info(hdev, "rtl: assuming no firmware upload needed"); return 0; } } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b8eb39436ef2..8701140e11f9 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -643,7 +643,7 @@ static void btusb_intr_complete(struct urb *urb) if (btusb_recv_intr(data, urb->transfer_buffer, urb->actual_length) < 0) { - BT_ERR("%s corrupted event packet", hdev->name); + bt_dev_err(hdev, "corrupted event packet"); hdev->stat.err_rx++; } } else if (urb->status == -ENOENT) { @@ -663,8 +663,8 @@ static void btusb_intr_complete(struct urb *urb) * -ENODEV: device got disconnected */ if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p failed to resubmit (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p failed to resubmit (%d)", + urb, -err); usb_unanchor_urb(urb); } } @@ -706,8 +706,8 @@ static int btusb_submit_intr_urb(struct hci_dev *hdev, gfp_t mem_flags) err = usb_submit_urb(urb, mem_flags); if (err < 0) { if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", + urb, -err); usb_unanchor_urb(urb); } @@ -733,7 +733,7 @@ static void btusb_bulk_complete(struct urb *urb) if (data->recv_bulk(data, urb->transfer_buffer, urb->actual_length) < 0) { - BT_ERR("%s corrupted ACL packet", hdev->name); + bt_dev_err(hdev, "corrupted ACL packet"); hdev->stat.err_rx++; } } else if (urb->status == -ENOENT) { @@ -753,8 +753,8 @@ static void btusb_bulk_complete(struct urb *urb) * -ENODEV: device got disconnected */ if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p failed to resubmit (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p failed to resubmit (%d)", + urb, -err); usb_unanchor_urb(urb); } } @@ -795,8 +795,8 @@ static int btusb_submit_bulk_urb(struct hci_dev *hdev, gfp_t mem_flags) err = usb_submit_urb(urb, mem_flags); if (err < 0) { if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", + urb, -err); usb_unanchor_urb(urb); } @@ -829,7 +829,7 @@ static void btusb_isoc_complete(struct urb *urb) if (btusb_recv_isoc(data, urb->transfer_buffer + offset, length) < 0) { - BT_ERR("%s corrupted SCO packet", hdev->name); + bt_dev_err(hdev, "corrupted SCO packet"); hdev->stat.err_rx++; } } @@ -849,8 +849,8 @@ static void btusb_isoc_complete(struct urb *urb) * -ENODEV: device got disconnected */ if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p failed to resubmit (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p failed to resubmit (%d)", + urb, -err); usb_unanchor_urb(urb); } } @@ -917,8 +917,8 @@ static int btusb_submit_isoc_urb(struct hci_dev *hdev, gfp_t mem_flags) err = usb_submit_urb(urb, mem_flags); if (err < 0) { if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", + urb, -err); usb_unanchor_urb(urb); } @@ -962,8 +962,8 @@ static void btusb_diag_complete(struct urb *urb) * -ENODEV: device got disconnected */ if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p failed to resubmit (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p failed to resubmit (%d)", + urb, -err); usb_unanchor_urb(urb); } } @@ -1004,8 +1004,8 @@ static int btusb_submit_diag_urb(struct hci_dev *hdev, gfp_t mem_flags) err = usb_submit_urb(urb, mem_flags); if (err < 0) { if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", + urb, -err); usb_unanchor_urb(urb); } @@ -1266,8 +1266,8 @@ static int submit_tx_urb(struct hci_dev *hdev, struct urb *urb) err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { if (err != -EPERM && err != -ENODEV) - BT_ERR("%s urb %p submission failed (%d)", - hdev->name, urb, -err); + bt_dev_err(hdev, "urb %p submission failed (%d)", + urb, -err); kfree(urb->setup_packet); usb_unanchor_urb(urb); } else { @@ -1362,7 +1362,7 @@ static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting) err = usb_set_interface(data->udev, 1, altsetting); if (err < 0) { - BT_ERR("%s setting interface failed (%d)", hdev->name, -err); + bt_dev_err(hdev, "setting interface failed (%d)", -err); return err; } @@ -1386,7 +1386,7 @@ static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting) } if (!data->isoc_tx_ep || !data->isoc_rx_ep) { - BT_ERR("%s invalid SCO descriptors", hdev->name); + bt_dev_err(hdev, "invalid SCO descriptors"); return -ENODEV; } @@ -1481,7 +1481,7 @@ static int btusb_setup_bcm92035(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, 0xfc3b, 1, &val, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) - BT_ERR("BCM92035 command failed (%ld)", -PTR_ERR(skb)); + bt_dev_err(hdev, "BCM92035 command failed (%ld)", PTR_ERR(skb)); else kfree_skb(skb); @@ -1499,12 +1499,12 @@ static int btusb_setup_csr(struct hci_dev *hdev) HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { int err = PTR_ERR(skb); - BT_ERR("%s: CSR: Local version failed (%d)", hdev->name, err); + bt_dev_err(hdev, "CSR: Local version failed (%d)", err); return err; } if (skb->len != sizeof(struct hci_rp_read_local_version)) { - BT_ERR("%s: CSR: Local version length mismatch", hdev->name); + bt_dev_err(hdev, "CSR: Local version length mismatch"); kfree_skb(skb); return -EIO; } @@ -1566,7 +1566,7 @@ static const struct firmware *btusb_setup_intel_get_fw(struct hci_dev *hdev, } } - BT_INFO("%s: Intel Bluetooth firmware file: %s", hdev->name, fwname); + bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname); return fw; } @@ -1722,18 +1722,18 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (err) return err; - BT_INFO("%s: read Intel version: %02x%02x%02x%02x%02x%02x%02x%02x%02x", - hdev->name, ver.hw_platform, ver.hw_variant, ver.hw_revision, - ver.fw_variant, ver.fw_revision, ver.fw_build_num, - ver.fw_build_ww, ver.fw_build_yy, ver.fw_patch_num); + bt_dev_info(hdev, "read Intel version: %02x%02x%02x%02x%02x%02x%02x%02x%02x", + ver.hw_platform, ver.hw_variant, ver.hw_revision, + ver.fw_variant, ver.fw_revision, ver.fw_build_num, + ver.fw_build_ww, ver.fw_build_yy, ver.fw_patch_num); /* fw_patch_num indicates the version of patch the device currently * have. If there is no patch data in the device, it is always 0x00. * So, if it is other than 0x00, no need to patch the device again. */ if (ver.fw_patch_num) { - BT_INFO("%s: Intel device is already patched. patch num: %02x", - hdev->name, ver.fw_patch_num); + bt_dev_info(hdev, "Intel device is already patched. " + "patch num: %02x", ver.fw_patch_num); goto complete; } @@ -1801,8 +1801,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (err) return err; - BT_INFO("%s: Intel Bluetooth firmware patch completed and activated", - hdev->name); + bt_dev_info(hdev, "Intel firmware patch completed and activated"); goto complete; @@ -1812,7 +1811,7 @@ exit_mfg_disable: if (err) return err; - BT_INFO("%s: Intel Bluetooth firmware patch completed", hdev->name); + bt_dev_info(hdev, "Intel firmware patch completed"); goto complete; @@ -1826,8 +1825,7 @@ exit_mfg_deactivate: if (err) return err; - BT_INFO("%s: Intel Bluetooth firmware patch completed and deactivated", - hdev->name); + bt_dev_info(hdev, "Intel firmware patch completed and deactivated"); complete: /* Set the event mask for Intel specific vendor events. This enables @@ -2108,24 +2106,24 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) params = (struct intel_boot_params *)skb->data; - BT_INFO("%s: Device revision is %u", hdev->name, - le16_to_cpu(params->dev_revid)); + bt_dev_info(hdev, "Device revision is %u", + le16_to_cpu(params->dev_revid)); - BT_INFO("%s: Secure boot is %s", hdev->name, - params->secure_boot ? "enabled" : "disabled"); + bt_dev_info(hdev, "Secure boot is %s", + params->secure_boot ? "enabled" : "disabled"); - BT_INFO("%s: OTP lock is %s", hdev->name, - params->otp_lock ? "enabled" : "disabled"); + bt_dev_info(hdev, "OTP lock is %s", + params->otp_lock ? "enabled" : "disabled"); - BT_INFO("%s: API lock is %s", hdev->name, - params->api_lock ? "enabled" : "disabled"); + bt_dev_info(hdev, "API lock is %s", + params->api_lock ? "enabled" : "disabled"); - BT_INFO("%s: Debug lock is %s", hdev->name, - params->debug_lock ? "enabled" : "disabled"); + bt_dev_info(hdev, "Debug lock is %s", + params->debug_lock ? "enabled" : "disabled"); - BT_INFO("%s: Minimum firmware build %u week %u %u", hdev->name, - params->min_fw_build_nn, params->min_fw_build_cw, - 2000 + params->min_fw_build_yy); + bt_dev_info(hdev, "Minimum firmware build %u week %u %u", + params->min_fw_build_nn, params->min_fw_build_cw, + 2000 + params->min_fw_build_yy); /* It is required that every single firmware fragment is acknowledged * with a command complete event. If the boot parameters indicate @@ -2142,7 +2140,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) * also be no valid address for the operational firmware. */ if (!bacmp(¶ms->otp_bdaddr, BDADDR_ANY)) { - BT_INFO("%s: No device address configured", hdev->name); + bt_dev_info(hdev, "No device address configured"); set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); } @@ -2169,7 +2167,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) return err; } - BT_INFO("%s: Found device firmware: %s", hdev->name, fwname); + bt_dev_info(hdev, "Found device firmware: %s", fwname); /* Save the DDC file name for later use to apply once the firmware * downloading is done. @@ -2250,7 +2248,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - BT_INFO("%s: Waiting for firmware download to complete", hdev->name); + bt_dev_info(hdev, "Waiting for firmware download to complete"); /* Before switching the device into operational mode and with that * booting the loaded firmware, wait for the bootloader notification @@ -2287,7 +2285,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - BT_INFO("%s: Firmware loaded in %llu usecs", hdev->name, duration); + bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); done: release_firmware(fw); @@ -2313,7 +2311,7 @@ done: * 1 second. However if that happens, then just fail the setup * since something went wrong. */ - BT_INFO("%s: Waiting for device to boot", hdev->name); + bt_dev_info(hdev, "Waiting for device to boot"); err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING, TASK_INTERRUPTIBLE, @@ -2333,7 +2331,7 @@ done: delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - BT_INFO("%s: Device booted in %llu usecs", hdev->name, duration); + bt_dev_info(hdev, "Device booted in %llu usecs", duration); clear_bit(BTUSB_BOOTLOADER, &data->flags); @@ -2436,8 +2434,8 @@ static int btusb_set_bdaddr_marvell(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, 0xfc22, sizeof(buf), buf, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { ret = PTR_ERR(skb); - BT_ERR("%s: changing Marvell device address failed (%ld)", - hdev->name, ret); + bt_dev_err(hdev, "changing Marvell device address failed (%ld)", + ret); return ret; } kfree_skb(skb); @@ -2461,8 +2459,7 @@ static int btusb_set_bdaddr_ath3012(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, 0xfc0b, sizeof(buf), buf, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { ret = PTR_ERR(skb); - BT_ERR("%s: Change address command failed (%ld)", - hdev->name, ret); + bt_dev_err(hdev, "Change address command failed (%ld)", ret); return ret; } kfree_skb(skb); @@ -2528,7 +2525,7 @@ static int btusb_qca_send_vendor_req(struct hci_dev *hdev, u8 request, err = usb_control_msg(udev, pipe, request, USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, buf, size, USB_CTRL_SET_TIMEOUT); if (err < 0) { - BT_ERR("%s: Failed to access otp area (%d)", hdev->name, err); + bt_dev_err(hdev, "Failed to access otp area (%d)", err); goto done; } @@ -2568,7 +2565,7 @@ static int btusb_setup_qca_download_fw(struct hci_dev *hdev, err = usb_control_msg(udev, pipe, QCA_DFU_DOWNLOAD, USB_TYPE_VENDOR, 0, 0, buf, size, USB_CTRL_SET_TIMEOUT); if (err < 0) { - BT_ERR("%s: Failed to send headers (%d)", hdev->name, err); + bt_dev_err(hdev, "Failed to send headers (%d)", err); goto done; } @@ -2584,13 +2581,13 @@ static int btusb_setup_qca_download_fw(struct hci_dev *hdev, err = usb_bulk_msg(udev, pipe, buf, size, &len, QCA_DFU_TIMEOUT); if (err < 0) { - BT_ERR("%s: Failed to send body at %zd of %zd (%d)", - hdev->name, sent, firmware->size, err); + bt_dev_err(hdev, "Failed to send body at %zd of %zd (%d)", + sent, firmware->size, err); break; } if (size != len) { - BT_ERR("%s: Failed to get bulk buffer", hdev->name); + bt_dev_err(hdev, "Failed to get bulk buffer"); err = -EILSEQ; break; } @@ -2622,24 +2619,23 @@ static int btusb_setup_qca_load_rampatch(struct hci_dev *hdev, err = request_firmware(&fw, fwname, &hdev->dev); if (err) { - BT_ERR("%s: failed to request rampatch file: %s (%d)", - hdev->name, fwname, err); + bt_dev_err(hdev, "failed to request rampatch file: %s (%d)", + fwname, err); return err; } - BT_INFO("%s: using rampatch file: %s", hdev->name, fwname); + bt_dev_info(hdev, "using rampatch file: %s", fwname); rver = (struct qca_rampatch_version *)(fw->data + info->ver_offset); rver_rom = le16_to_cpu(rver->rom_version); rver_patch = le16_to_cpu(rver->patch_version); - BT_INFO("%s: QCA: patch rome 0x%x build 0x%x, firmware rome 0x%x " - "build 0x%x", hdev->name, rver_rom, rver_patch, ver_rom, - ver_patch); + bt_dev_info(hdev, "QCA: patch rome 0x%x build 0x%x, " + "firmware rome 0x%x build 0x%x", + rver_rom, rver_patch, ver_rom, ver_patch); if (rver_rom != ver_rom || rver_patch <= ver_patch) { - BT_ERR("%s: rampatch file version did not match with firmware", - hdev->name); + bt_dev_err(hdev, "rampatch file version did not match with firmware"); err = -EINVAL; goto done; } @@ -2665,12 +2661,12 @@ static int btusb_setup_qca_load_nvm(struct hci_dev *hdev, err = request_firmware(&fw, fwname, &hdev->dev); if (err) { - BT_ERR("%s: failed to request NVM file: %s (%d)", - hdev->name, fwname, err); + bt_dev_err(hdev, "failed to request NVM file: %s (%d)", + fwname, err); return err; } - BT_INFO("%s: using NVM file: %s", hdev->name, fwname); + bt_dev_info(hdev, "using NVM file: %s", fwname); err = btusb_setup_qca_download_fw(hdev, fw, info->nvm_hdr); @@ -2698,8 +2694,7 @@ static int btusb_setup_qca(struct hci_dev *hdev) info = &qca_devices_table[i]; } if (!info) { - BT_ERR("%s: don't support firmware rome 0x%x", hdev->name, - ver_rom); + bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom); return -ENODEV; } @@ -2753,7 +2748,7 @@ static inline int __set_diag_interface(struct hci_dev *hdev) } if (!data->diag_tx_ep || !data->diag_rx_ep) { - BT_ERR("%s invalid diagnostic descriptors", hdev->name); + bt_dev_err(hdev, "invalid diagnostic descriptors"); return -ENODEV; } diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index 5f17ec339fb8..14ae7ee88acb 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -206,7 +206,7 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { int err = PTR_ERR(ath->rx_skb); - BT_ERR("%s: Frame reassembly failed (%d)", hu->hdev->name, err); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); ath->rx_skb = NULL; return err; } diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 3b82a87224a9..fb97a3bf069b 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -132,7 +132,7 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { int err = PTR_ERR(h4->rx_skb); - BT_ERR("%s: Frame reassembly failed (%d)", hu->hdev->name, err); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); h4->rx_skb = NULL; return err; } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4a949bb60394..bbd7db7384e6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -800,7 +800,7 @@ static int qca_recv(struct hci_uart *hu, const void *data, int count) qca_recv_pkts, ARRAY_SIZE(qca_recv_pkts)); if (IS_ERR(qca->rx_skb)) { int err = PTR_ERR(qca->rx_skb); - BT_ERR("%s: Frame reassembly failed (%d)", hu->hdev->name, err); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); qca->rx_skb = NULL; return err; } @@ -863,7 +863,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) skb = bt_skb_alloc(sizeof(cmd), GFP_ATOMIC); if (!skb) { - BT_ERR("Failed to allocate memory for baudrate packet"); + bt_dev_err(hdev, "Failed to allocate baudrate packet"); return -ENOMEM; } @@ -892,7 +892,7 @@ static int qca_setup(struct hci_uart *hu) unsigned int speed, qca_baudrate = QCA_BAUDRATE_115200; int ret; - BT_INFO("%s: ROME setup", hdev->name); + bt_dev_info(hdev, "ROME setup"); /* Patch downloading has to be done without IBS mode */ clear_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags); @@ -917,11 +917,11 @@ static int qca_setup(struct hci_uart *hu) if (speed) { qca_baudrate = qca_get_baudrate_value(speed); - BT_INFO("%s: Set UART speed to %d", hdev->name, speed); + bt_dev_info(hdev, "Set UART speed to %d", speed); ret = qca_set_baudrate(hdev, qca_baudrate); if (ret) { - BT_ERR("%s: Failed to change the baud rate (%d)", - hdev->name, ret); + bt_dev_err(hdev, "Failed to change the baud rate (%d)", + ret); return ret; } hci_uart_set_baudrate(hu, speed); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index b725ac4f7ff6..71664b22ec9d 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -185,7 +185,7 @@ static int hci_uart_setup(struct hci_dev *hdev) if (hu->proto->set_baudrate && speed) { err = hu->proto->set_baudrate(hu, speed); if (err) - BT_ERR("%s: failed to set baudrate", hdev->name); + bt_dev_err(hdev, "Failed to set baudrate"); else serdev_device_set_baudrate(hu->serdev, speed); } @@ -199,14 +199,13 @@ static int hci_uart_setup(struct hci_dev *hdev) skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { - BT_ERR("%s: Reading local version information failed (%ld)", - hdev->name, PTR_ERR(skb)); + bt_dev_err(hdev, "Reading local version info failed (%ld)", + PTR_ERR(skb)); return 0; } if (skb->len != sizeof(*ver)) { - BT_ERR("%s: Event length mismatch for version information", - hdev->name); + bt_dev_err(hdev, "Event length mismatch for version info"); } kfree_skb(skb); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 020142bb9735..e89cff0c4c23 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -147,6 +147,9 @@ void bt_err_ratelimited(const char *fmt, ...); #define bt_dev_dbg(hdev, fmt, ...) \ BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) +#define bt_dev_err_ratelimited(hdev, fmt, ...) \ + BT_ERR_RATELIMITED("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + /* Connection and socket states */ enum { BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index aad994edd3bb..51c2cf2d8923 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, &mgr->l2cap_conn->hcon->dst); if (!hcon) { - BT_ERR("No phys link exist"); + bt_dev_err(hdev, "no phys link exist"); rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS; goto clean; } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index ebcab5bbadd7..78bec8df8525 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type) /* Legacy key */ if (conn->key_type < 3) { - BT_ERR("Legacy key type %d", conn->key_type); + bt_dev_err(hdev, "legacy key type %d", conn->key_type); return -EACCES; } @@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type) /* Derive Generic AMP Link Key (gamp) */ err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key); if (err) { - BT_ERR("Could not derive Generic AMP Key: err %d", err); + bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err); return err; } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 746adcb62259..a9682534c377 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) goto done; } - BT_ERR("HCI request failed to create LE connection: status 0x%2.2x", - status); + bt_dev_err(hdev, "request failed to create LE connection: " + "status 0x%2.2x", status); if (!conn) goto done; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6bc679cd3481..40d260f2bea5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt) amp_init1(req); break; default: - BT_ERR("Unknown device type %d", hdev->dev_type); + bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); break; } @@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work) if (hdev->hw_error) hdev->hw_error(hdev, hdev->hw_error_code); else - BT_ERR("%s hardware error 0x%2.2x", hdev->name, - hdev->hw_error_code); + bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); if (hci_dev_do_close(hdev)) return; @@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; u16 opcode = __le16_to_cpu(sent->opcode); - BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode); + bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); } else { - BT_ERR("%s command tx timeout", hdev->name); + bt_dev_err(hdev, "command tx timeout"); } atomic_set(&hdev->cmd_cnt, 1); @@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) { - BT_ERR("Out of memory"); + bt_dev_err(hdev, "out of memory"); return NULL; } @@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) err = hdev->send(hdev, skb); if (err < 0) { - BT_ERR("%s sending frame failed (%d)", hdev->name, err); + bt_dev_err(hdev, "sending frame failed (%d)", err); kfree_skb(skb); } } @@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command", hdev->name); + bt_dev_err(hdev, "no memory for command"); return -ENOMEM; } @@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, hci_add_acl_hdr(skb, chan->handle, flags); break; default: - BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); + bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); return; } @@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, break; default: cnt = 0; - BT_ERR("Unknown link type"); + bt_dev_err(hdev, "unknown link type %d", conn->type); } q = cnt / num; @@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; - BT_ERR("%s link tx timeout", hdev->name); + bt_dev_err(hdev, "link tx timeout"); rcu_read_lock(); /* Kill stalled connections */ list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && c->sent) { - BT_ERR("%s killing stalled connection %pMR", - hdev->name, &c->dst); + bt_dev_err(hdev, "killing stalled connection %pMR", + &c->dst); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } @@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, break; default: cnt = 0; - BT_ERR("Unknown link type"); + bt_dev_err(hdev, "unknown link type %d", chan->conn->type); } q = cnt / num; @@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) l2cap_recv_acldata(conn, skb, flags); return; } else { - BT_ERR("%s ACL packet for unknown connection handle %d", - hdev->name, handle); + bt_dev_err(hdev, "ACL packet for unknown connection handle %d", + handle); } kfree_skb(skb); @@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) sco_recv_scodata(conn, skb); return; } else { - BT_ERR("%s SCO packet for unknown connection handle %d", - hdev->name, handle); + bt_dev_err(hdev, "SCO packet for unknown connection handle %d", + handle); } kfree_skb(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0b4dba08a14e..cd3bbb766c24 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, break; default: - BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); + bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d", + cp->enable); break; } @@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, HCI_ROLE_MASTER); if (!conn) - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); } } @@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, HCI_ROLE_SLAVE); if (!conn) { - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); hci_dev_unlock(hdev); return; } @@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!hci_conn_ssp_enabled(conn) && test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { - BT_INFO("re-auth of legacy device is not possible."); + bt_dev_info(hdev, "re-auth of legacy device is not possible."); } else { set_bit(HCI_CONN_AUTH, &conn->flags); conn->sec_level = conn->pending_sec_level; @@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, BT_DBG("%s status 0x%02x", hdev->name, status); if (!skb || skb->len < sizeof(*rp)) { - BT_ERR("%s invalid HCI Read Encryption Key Size response", - hdev->name); + bt_dev_err(hdev, "invalid read key size response"); return; } @@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, * supported. */ if (rp->status) { - BT_ERR("%s failed to read key size for handle %u", hdev->name, - handle); + bt_dev_err(hdev, "failed to read key size for handle %u", + handle); conn->enc_key_size = HCI_LINK_KEY_SIZE; } else { conn->enc_key_size = rp->key_size; @@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); if (hci_req_run_skb(&req, read_enc_key_size_complete)) { - BT_ERR("Sending HCI Read Encryption Key Size failed"); + bt_dev_err(hdev, "sending read key size failed"); conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } @@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) int i; if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { - BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); + bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } @@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) break; default: - BT_ERR("Unknown type %d conn %p", conn->type, conn); + bt_dev_err(hdev, "unknown type %d conn %p", + conn->type, conn); break; } } @@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, return chan->conn; break; default: - BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); + bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); break; } @@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) int i; if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); + bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } @@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) break; default: - BT_ERR("Unknown type %d conn %p", conn->type, conn); + bt_dev_err(hdev, "unknown type %d conn %p", + conn->type, conn); break; } } @@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!conn) { conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); if (!conn) { - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); goto unlock; } @@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, case LE_ADV_SCAN_RSP: break; default: - BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x", - type); + bt_dev_err_ratelimited(hdev, "unknown advertising packet " + "type: 0x%02x", type); return; } @@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* Adjust for actual length */ if (len != real_len) { - BT_ERR_RATELIMITED("%s advertising data length corrected", - hdev->name); + bt_dev_err_ratelimited(hdev, "advertising data len corrected"); len = real_len; } @@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return false; if (skb->len < sizeof(*hdr)) { - BT_ERR("Too short HCI event"); + bt_dev_err(hdev, "too short HCI event"); return false; } @@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, } if (hdr->evt != HCI_EV_CMD_COMPLETE) { - BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); + bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)", + hdr->evt); return false; } if (skb->len < sizeof(*ev)) { - BT_ERR("Too short cmd_complete event"); + bt_dev_err(hdev, "too short cmd_complete event"); return false; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7f28d17dc792..abc0f3224dd1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -336,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command (opcode 0x%4.4x)", - hdev->name, opcode); + bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", + opcode); req->err = -ENOMEM; return; } @@ -1426,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); + bt_dev_err(hdev, "failed to generate new RPA"); return err; } @@ -1788,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) err = hci_req_run(&req, abort_conn_complete); if (err && err != -ENODATA) { - BT_ERR("Failed to run HCI request: err %d", err); + bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err); return err; } @@ -1872,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work) hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Failed to disable LE scan: status 0x%02x", status); + bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x", + status); return; } @@ -1903,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work) hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Inquiry failed: status 0x%02x", status); + bt_dev_err(hdev, "inquiry failed: status 0x%02x", status); goto discov_stopped; } @@ -1945,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work) hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Failed to restart LE scan: status %d", status); + bt_dev_err(hdev, "failed to restart LE scan: status %d", + status); return; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index aa300f3a0d51..34aaa2340ac8 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -50,7 +50,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { - BT_ERR("Failed to register connection device"); + bt_dev_err(hdev, "failed to register connection device"); return; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 07a3cc29f426..6e9fc86d8daf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { - BT_ERR("load_link_keys: too big key_count value %u", - key_count); + bt_dev_err(hdev, "load_link_keys: too big key_count value %u", + key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_link_key_info); if (expected_len != len) { - BT_ERR("load_link_keys: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr)); - BT_ERR("PIN code is not 16 bytes long"); + bt_dev_err(hdev, "PIN code is not 16 bytes long"); err = send_pin_code_neg_reply(sk, hdev, &ncp); if (err >= 0) @@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); } else { - BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); + bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes", + len); err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS); } @@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, uuid_count = __le16_to_cpu(cp->uuid_count); if (uuid_count > max_uuid_count) { - BT_ERR("service_discovery: too big uuid_count value %u", - uuid_count); + bt_dev_err(hdev, "service_discovery: too big uuid_count value %u", + uuid_count); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, @@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, expected_len = sizeof(*cp) + uuid_count * 16; if (expected_len != len) { - BT_ERR("service_discovery: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes", + expected_len, len); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, @@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, err = hci_req_run(&req, enable_advertising_instance); if (err) - BT_ERR("Failed to re-configure advertising"); + bt_dev_err(hdev, "failed to re-configure advertising"); unlock: hci_dev_unlock(hdev); @@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, irk_count = __le16_to_cpu(cp->irk_count); if (irk_count > max_irk_count) { - BT_ERR("load_irks: too big irk_count value %u", irk_count); + bt_dev_err(hdev, "load_irks: too big irk_count value %u", + irk_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); if (expected_len != len) { - BT_ERR("load_irks: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } @@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { - BT_ERR("load_ltks: too big key_count value %u", key_count); + bt_dev_err(hdev, "load_ltks: too big key_count value %u", + key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_ltk_info); if (expected_len != len) { - BT_ERR("load_keys: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, } if (!cp) { - BT_ERR("invalid sent_cmd in conn_info response"); + bt_dev_err(hdev, "invalid sent_cmd in conn_info response"); goto unlock; } handle = __le16_to_cpu(cp->handle); conn = hci_conn_hash_lookup_handle(hdev, handle); if (!conn) { - BT_ERR("unknown handle (%d) in conn_info response", handle); + bt_dev_err(hdev, "unknown handle (%d) in conn_info response", + handle); goto unlock; } @@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, param_count = __le16_to_cpu(cp->param_count); if (param_count > max_param_count) { - BT_ERR("load_conn_param: too big param_count value %u", - param_count); + bt_dev_err(hdev, "load_conn_param: too big param_count value %u", + param_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } @@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, expected_len = sizeof(*cp) + param_count * sizeof(struct mgmt_conn_param); if (expected_len != len) { - BT_ERR("load_conn_param: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } @@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, } else if (param->addr.type == BDADDR_LE_RANDOM) { addr_type = ADDR_LE_DEV_RANDOM; } else { - BT_ERR("Ignoring invalid connection parameters"); + bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } @@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, min, max, latency, timeout); if (hci_check_conn_params(min, max, latency, timeout) < 0) { - BT_ERR("Ignoring invalid connection parameters"); + bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, addr_type); if (!hci_param) { - BT_ERR("Failed to add connection parameters"); + bt_dev_err(hdev, "failed to add connection parameters"); continue; } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index d41449b9e9d6..01117ae84f1d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -996,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp) return SMP_UNSPECIFIED; if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { - BT_ERR("Pairing failed (confirmation values mismatch)"); + bt_dev_err(hcon->hdev, "pairing failed " + "(confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1210,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp) key = hci_find_link_key(hdev, &hcon->dst); if (!key) { - BT_ERR("%s No Link Key found to generate LTK", hdev->name); + bt_dev_err(hdev, "no Link Key found to generate LTK"); return; } @@ -2067,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { - BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); + bt_dev_err(hdev, "refusing legacy fallback in SC-only mode"); return SMP_UNSPECIFIED; } - BT_ERR("Trying to fall back to legacy SMP"); + bt_dev_err(hdev, "trying to fall back to legacy SMP"); req = (void *) &smp->preq[1]; rsp = (void *) &smp->prsp[1]; @@ -2082,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) auth = req->auth_req & AUTH_REQ_MASK(hdev); if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { - BT_ERR("Failed to fall back to legacy SMP"); + bt_dev_err(hdev, "failed to fall back to legacy SMP"); return SMP_UNSPECIFIED; } @@ -2355,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) chan = conn->smp; if (!chan) { - BT_ERR("SMP security requested but not available"); + bt_dev_err(hcon->hdev, "security requested but not available"); return 1; } @@ -2548,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, */ if (!bacmp(&info->bdaddr, BDADDR_ANY) || !hci_is_identity_address(&info->bdaddr, info->addr_type)) { - BT_ERR("Ignoring IRK with no identity address"); + bt_dev_err(hcon->hdev, "ignoring IRK with no identity address"); goto distribute; } @@ -2953,8 +2954,8 @@ done: return err; drop: - BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name, - code, &hcon->dst); + bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR", + code, &hcon->dst); kfree_skb(skb); return 0; } @@ -3021,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan) smp = smp_chan_create(conn); if (!smp) { - BT_ERR("%s unable to create SMP context for BR/EDR", - hdev->name); + bt_dev_err(hdev, "unable to create SMP context for BR/EDR"); return; } From af3715e5cef8859571cabecc71e1cd4cc5869d6a Mon Sep 17 00:00:00 2001 From: Jaya P G Date: Mon, 30 Oct 2017 11:01:22 +0100 Subject: [PATCH 1586/2177] Bluetooth: btusb: Update firmware filename for Intel 9x60 and later The format of Intel Bluetooth firmware for bootloader product is ibt--.sfi and .ddc. But for the SKU's 9x60, there a 3 variants of FW, which cannot be differentiated just with hw_variant and devision_revision_id. So to pick the appropriate FW file for 9x60 SKU's, it will be differentiated using hw_variant, hw_revision and fw_revision rather than hw_variant and device_revision_id only. Format will be like this: ibt---.sfi and .ddc Signed-off-by: Jaya P G Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- drivers/bluetooth/btusb.c | 54 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8701140e11f9..d56905fed79d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2145,19 +2145,44 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) } /* With this Intel bootloader only the hardware variant and device - * revision information are used to select the right firmware. + * revision information are used to select the right firmware for SfP + * and WsP. * * The firmware filename is ibt--.sfi. * * Currently the supported hardware variants are: * 11 (0x0b) for iBT3.0 (LnP/SfP) * 12 (0x0c) for iBT3.5 (WsP) + * + * For ThP/JfP and for future SKU's, the FW name varies based on HW + * variant, HW revision and FW revision, as these are dependent on CNVi + * and RF Combination. + * * 17 (0x11) for iBT3.5 (JfP) * 18 (0x12) for iBT3.5 (ThP) + * + * The firmware file name for these will be + * ibt---.sfi. + * */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(params->dev_revid)); + switch (ver.hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi", + le16_to_cpu(ver.hw_variant), + le16_to_cpu(params->dev_revid)); + break; + case 0x11: /* JfP */ + case 0x12: /* ThP */ + snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi", + le16_to_cpu(ver.hw_variant), + le16_to_cpu(ver.hw_revision), + le16_to_cpu(ver.fw_revision)); + break; + default: + BT_ERR("%s: Unsupported Intel firmware naming", hdev->name); + return -EINVAL; + } err = request_firmware(&fw, fwname, &hdev->dev); if (err < 0) { @@ -2172,9 +2197,24 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) /* Save the DDC file name for later use to apply once the firmware * downloading is done. */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(params->dev_revid)); + switch (ver.hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc", + le16_to_cpu(ver.hw_variant), + le16_to_cpu(params->dev_revid)); + break; + case 0x11: /* JfP */ + case 0x12: /* ThP */ + snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc", + le16_to_cpu(ver.hw_variant), + le16_to_cpu(ver.hw_revision), + le16_to_cpu(ver.fw_revision)); + break; + default: + BT_ERR("%s: Unsupported Intel firmware naming", hdev->name); + return -EINVAL; + } kfree_skb(skb); From 459232fc0e2505d489e2dc3befc1ad01dcdccb47 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 24 Oct 2017 19:42:45 +0200 Subject: [PATCH 1587/2177] Bluetooth: btusb: Fix isochronous interface assignments The recent MacBook's with multi-function USB interfaces for HID and Bluetooth operation have the isochronous interface on number 3 instead of number 1. Store the interface number and use it. P: Vendor=05ac ProdID=8290 Rev= 1.40 S: Manufacturer=Broadcom Corp. S: Product=Bluetooth USB Host Controller C:* #Ifs= 6 Cfg#= 1 Atr=e0 MxPwr= 0mA A: FirstIf#= 2 IfCount= 4 Cls=ff(vend.) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=01 Driver=usbhid E: Ad=85(I) Atr=03(Int.) MxPS= 8 Ivl=10ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=usbhid E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=10ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 3 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 3 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 3 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 3 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 3 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=btusb E: Ad=84(I) Atr=02(Bulk) MxPS= 32 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 32 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- drivers/bluetooth/btusb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d56905fed79d..f7120c9eb9bd 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -397,6 +397,7 @@ struct btusb_data { struct usb_interface *intf; struct usb_interface *isoc; struct usb_interface *diag; + unsigned isoc_ifnum; unsigned long flags; @@ -1360,7 +1361,7 @@ static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting) if (!data->isoc) return -ENODEV; - err = usb_set_interface(data->udev, 1, altsetting); + err = usb_set_interface(data->udev, data->isoc_ifnum, altsetting); if (err < 0) { bt_dev_err(hdev, "setting interface failed (%d)", -err); return err; @@ -3142,6 +3143,7 @@ static int btusb_probe(struct usb_interface *intf, } else { /* Interface orders are hardcoded in the specification */ data->isoc = usb_ifnum_to_if(data->udev, ifnum_base + 1); + data->isoc_ifnum = ifnum_base + 1; } if (!reset) From 39a9cd5a0e4e64d02b1b201bb767fde22712405a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Oct 2017 21:37:45 +0200 Subject: [PATCH 1588/2177] libertas: don't write wdev->ssid/_len When joining an IBSS network, wdev->ssid/_len will already be set, so there's no need to write them. In any case, they are internal cfg80211 values, and have very little user-visible impact. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/libertas/cfg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 71ba2c8d09b5..bb76707881cd 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1698,9 +1698,6 @@ static void lbs_join_post(struct lbs_private *priv, 0, GFP_KERNEL); cfg80211_put_bss(priv->wdev->wiphy, bss); - memcpy(priv->wdev->ssid, params->ssid, params->ssid_len); - priv->wdev->ssid_len = params->ssid_len; - cfg80211_ibss_joined(priv->dev, bssid, params->chandef.chan, GFP_KERNEL); From c6c65a8411cf0842c9036e30ed1e7a6ccd7fdf41 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Fri, 27 Oct 2017 15:32:49 +0530 Subject: [PATCH 1589/2177] mwifiex: do not transmit in 11N rates when connected in TKIP security Driver is transmitting in 11N rates, when connected to an AP in TKIP security mode. Add a check to disable_11n to fix the issue. Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/fw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 6b765f3f37fd..13cd58e963b3 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -225,7 +225,8 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define IS_11N_ENABLED(priv) ((priv->adapter->config_bands & BAND_GN || \ priv->adapter->config_bands & BAND_AN) && \ - priv->curr_bss_params.bss_descriptor.bcn_ht_cap) + priv->curr_bss_params.bss_descriptor.bcn_ht_cap && \ + !priv->curr_bss_params.bss_descriptor.disable_11n) #define INITIATOR_BIT(DelBAParamSet) (((DelBAParamSet) &\ BIT(DELBA_INITIATOR_POS)) >> DELBA_INITIATOR_POS) From f3ac4e7394a1aa89c5ca49f8a5344a98b56df046 Mon Sep 17 00:00:00 2001 From: Karun Eagalapati Date: Fri, 27 Oct 2017 16:55:55 +0530 Subject: [PATCH 1590/2177] rsi: sdio: add WOWLAN support for S3 suspend state WoWLAN is supported in RS9113 device through GPIO pin2. wowlan config frame is internally sent to firmware in mac80211 suspend handler. Also beacon miss threshold and keep-alive time values are increased to avoid un-necessary disconnection with AP. Signed-off-by: Karun Eagalapati Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 6 + drivers/net/wireless/rsi/rsi_91x_mac80211.c | 118 ++++++++++++++++++++ drivers/net/wireless/rsi/rsi_91x_mgmt.c | 60 ++++++++-- drivers/net/wireless/rsi/rsi_91x_sdio.c | 7 ++ drivers/net/wireless/rsi/rsi_common.h | 1 + drivers/net/wireless/rsi/rsi_main.h | 8 +- drivers/net/wireless/rsi/rsi_mgmt.h | 31 ++++- 7 files changed, 220 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index bc18a191aef9..87e023d601c3 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -379,6 +379,12 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) rsi_dbg(ERR_ZONE, "%s: FSM state not open\n", __func__); goto xmit_fail; } + if (common->wow_flags & RSI_WOW_ENABLED) { + rsi_dbg(ERR_ZONE, + "%s: Blocking Tx_packets when WOWLAN is enabled\n", + __func__); + goto xmit_fail; + } info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 9427c9d7c9c7..c3bd3ca9cf83 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1746,6 +1746,119 @@ static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw) return 0; } +static const struct wiphy_wowlan_support rsi_wowlan_support = { + .flags = WIPHY_WOWLAN_ANY | + WIPHY_WOWLAN_MAGIC_PKT | + WIPHY_WOWLAN_DISCONNECT | + WIPHY_WOWLAN_GTK_REKEY_FAILURE | + WIPHY_WOWLAN_SUPPORTS_GTK_REKEY | + WIPHY_WOWLAN_EAP_IDENTITY_REQ | + WIPHY_WOWLAN_4WAY_HANDSHAKE, +}; + +static u16 rsi_wow_map_triggers(struct rsi_common *common, + struct cfg80211_wowlan *wowlan) +{ + u16 wow_triggers = 0; + + rsi_dbg(INFO_ZONE, "Mapping wowlan triggers\n"); + + if (wowlan->any) + wow_triggers |= RSI_WOW_ANY; + if (wowlan->magic_pkt) + wow_triggers |= RSI_WOW_MAGIC_PKT; + if (wowlan->disconnect) + wow_triggers |= RSI_WOW_DISCONNECT; + if (wowlan->gtk_rekey_failure || wowlan->eap_identity_req || + wowlan->four_way_handshake) + wow_triggers |= RSI_WOW_GTK_REKEY; + + return wow_triggers; +} + +int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan) +{ + struct rsi_common *common = adapter->priv; + u16 triggers = 0; + u16 rx_filter_word = 0; + struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf; + + rsi_dbg(INFO_ZONE, "Config WoWLAN to device\n"); + + if (WARN_ON(!wowlan)) { + rsi_dbg(ERR_ZONE, "WoW triggers not enabled\n"); + return -EINVAL; + } + + triggers = rsi_wow_map_triggers(common, wowlan); + if (!triggers) { + rsi_dbg(ERR_ZONE, "%s:No valid WoW triggers\n", __func__); + return -EINVAL; + } + if (!bss->assoc) { + rsi_dbg(ERR_ZONE, + "Cannot configure WoWLAN (Station not connected)\n"); + common->wow_flags |= RSI_WOW_NO_CONNECTION; + return 0; + } + rsi_dbg(INFO_ZONE, "TRIGGERS %x\n", triggers); + rsi_send_wowlan_request(common, triggers, 1); + + /** + * Increase the beacon_miss threshold & keep-alive timers in + * vap_update frame + */ + rsi_send_vap_dynamic_update(common); + + rx_filter_word = (ALLOW_DATA_ASSOC_PEER | DISALLOW_BEACONS); + rsi_send_rx_filter_frame(common, rx_filter_word); + common->wow_flags |= RSI_WOW_ENABLED; + + return 0; +} + +#ifdef CONFIG_PM +static int rsi_mac80211_suspend(struct ieee80211_hw *hw, + struct cfg80211_wowlan *wowlan) +{ + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + + rsi_dbg(INFO_ZONE, "%s: mac80211 suspend\n", __func__); + mutex_lock(&common->mutex); + if (rsi_config_wowlan(adapter, wowlan)) { + rsi_dbg(ERR_ZONE, "Failed to configure WoWLAN\n"); + mutex_unlock(&common->mutex); + return 1; + } + mutex_unlock(&common->mutex); + + return 0; +} + +static int rsi_mac80211_resume(struct ieee80211_hw *hw) +{ + u16 rx_filter_word = 0; + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + + common->wow_flags = 0; + + rsi_dbg(INFO_ZONE, "%s: mac80211 resume\n", __func__); + + mutex_lock(&common->mutex); + rsi_send_wowlan_request(common, 0, 0); + + rx_filter_word = (ALLOW_DATA_ASSOC_PEER | ALLOW_CTRL_ASSOC_PEER | + ALLOW_MGMT_ASSOC_PEER); + rsi_send_rx_filter_frame(common, rx_filter_word); + mutex_unlock(&common->mutex); + + return 0; +} + +#endif + static const struct ieee80211_ops mac80211_ops = { .tx = rsi_mac80211_tx, .start = rsi_mac80211_start, @@ -1767,6 +1880,10 @@ static const struct ieee80211_ops mac80211_ops = { .rfkill_poll = rsi_mac80211_rfkill_poll, .remain_on_channel = rsi_mac80211_roc, .cancel_remain_on_channel = rsi_mac80211_cancel_roc, +#ifdef CONFIG_PM + .suspend = rsi_mac80211_suspend, + .resume = rsi_mac80211_resume, +#endif }; /** @@ -1850,6 +1967,7 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER; wiphy->reg_notifier = rsi_reg_notify; + wiphy->wowlan = &rsi_wowlan_support; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); /* Wi-Fi direct parameters */ diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 4b94190c9797..1446ee3581d0 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1094,9 +1094,18 @@ int rsi_send_vap_dynamic_update(struct rsi_common *common) dynamic_frame->desc_dword0.frame_type = VAP_DYNAMIC_UPDATE; dynamic_frame->desc_dword2.pkt_info = cpu_to_le32(common->rts_threshold); - /* Beacon miss threshold */ - dynamic_frame->frame_body.keep_alive_period = + + if (common->wow_flags & RSI_WOW_ENABLED) { + /* Beacon miss threshold */ + dynamic_frame->desc_dword3.token = + cpu_to_le16(RSI_BCN_MISS_THRESHOLD); + dynamic_frame->frame_body.keep_alive_period = + cpu_to_le16(RSI_WOW_KEEPALIVE); + } else { + dynamic_frame->frame_body.keep_alive_period = cpu_to_le16(RSI_DEF_KEEPALIVE); + } + dynamic_frame->desc_dword3.sta_id = 0; /* vap id */ skb_put(skb, sizeof(struct rsi_dynamic_s)); @@ -1340,13 +1349,12 @@ void rsi_inform_bss_status(struct rsi_common *common, } else { if (opmode == RSI_OPMODE_STA) common->hw_data_qs_blocked = true; - rsi_hal_send_sta_notify_frame(common, - opmode, - STA_DISCONNECTED, - addr, - qos_enable, - aid, sta_id, - vif); + + if (!(common->wow_flags & RSI_WOW_ENABLED)) + rsi_hal_send_sta_notify_frame(common, opmode, + STA_DISCONNECTED, addr, + qos_enable, aid, sta_id, + vif); if (opmode == RSI_OPMODE_STA) rsi_send_block_unblock_frame(common, true); } @@ -1589,6 +1597,40 @@ static int rsi_send_beacon(struct rsi_common *common) return 0; } +int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, + u16 sleep_status) +{ + struct rsi_wowlan_req *cmd_frame; + struct sk_buff *skb; + u8 length; + + rsi_dbg(ERR_ZONE, "%s: Sending wowlan request frame\n", __func__); + + length = sizeof(*cmd_frame); + skb = dev_alloc_skb(length); + if (!skb) + return -ENOMEM; + memset(skb->data, 0, length); + cmd_frame = (struct rsi_wowlan_req *)skb->data; + + rsi_set_len_qno(&cmd_frame->desc.desc_dword0.len_qno, + (length - FRAME_DESC_SZ), + RSI_WIFI_MGMT_Q); + cmd_frame->desc.desc_dword0.frame_type = WOWLAN_CONFIG_PARAMS; + cmd_frame->host_sleep_status = sleep_status; + if (common->secinfo.security_enable && + common->secinfo.gtk_cipher) + flags |= RSI_WOW_GTK_REKEY; + if (sleep_status) + cmd_frame->wow_flags = flags; + rsi_dbg(INFO_ZONE, "Host_Sleep_Status : %d Flags : %d\n", + cmd_frame->host_sleep_status, cmd_frame->wow_flags); + + skb_put(skb, length); + + return rsi_send_internal_mgmt_frame(common, skb); +} + /** * rsi_handle_ta_confirm_type() - This function handles the confirm frames. * @common: Pointer to the driver private structure. diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index b3f8006c0e9b..fa6af7be61f4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1124,6 +1124,8 @@ static int rsi_sdio_enable_interrupts(struct sdio_func *pfunc) { u8 data; int ret; + struct rsi_hw *adapter = sdio_get_drvdata(pfunc); + struct rsi_common *common = adapter->priv; sdio_claim_host(pfunc); ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); @@ -1143,6 +1145,11 @@ static int rsi_sdio_enable_interrupts(struct sdio_func *pfunc) goto done; } + if ((common->wow_flags & RSI_WOW_ENABLED) && + (common->wow_flags & RSI_WOW_NO_CONNECTION)) + rsi_dbg(ERR_ZONE, + "##### Device can not wake up through WLAN\n"); + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); if (ret < 0) { rsi_dbg(ERR_ZONE, diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 29acaea3636d..70b8b4b49d04 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -83,6 +83,7 @@ u16 rsi_get_connected_channel(struct ieee80211_vif *vif); struct rsi_hw *rsi_91x_init(void); void rsi_91x_deinit(struct rsi_hw *adapter); int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len); +int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan); struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr); struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac); void rsi_roc_timeout(struct timer_list *t); diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index a118b7aaeca0..44a199f2116f 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -66,6 +66,8 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); #define FRAME_DESC_SZ 16 #define MIN_802_11_HDR_LEN 24 #define RSI_DEF_KEEPALIVE 90 +#define RSI_WOW_KEEPALIVE 5 +#define RSI_BCN_MISS_THRESHOLD 24 #define DATA_QUEUE_WATER_MARK 400 #define MIN_DATA_QUEUE_WATER_MARK 300 @@ -108,6 +110,10 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); ((_q) == VI_Q) ? IEEE80211_AC_VI : \ IEEE80211_AC_VO) +/* WoWLAN flags */ +#define RSI_WOW_ENABLED BIT(0) +#define RSI_WOW_NO_CONNECTION BIT(1) + #define RSI_DEV_9113 1 struct version_info { @@ -266,7 +272,7 @@ struct rsi_common { u8 obm_ant_sel_val; int tx_power; u8 ant_in_use; - + u8 wow_flags; u16 beacon_interval; u8 dtim_cnt; diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index e21723013f8d..76337ce817e0 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -45,6 +45,17 @@ #define MAGIC_WORD 0x5A #define WLAN_EEPROM_RFTYPE_ADDR 424 +/*WOWLAN RESUME WAKEUP TYPES*/ +#define RSI_UNICAST_MAGIC_PKT BIT(0) +#define RSI_BROADCAST_MAGICPKT BIT(1) +#define RSI_EAPOL_PKT BIT(2) +#define RSI_DISCONNECT_PKT BIT(3) +#define RSI_HW_BMISS_PKT BIT(4) +#define RSI_INSERT_SEQ_IN_FW BIT(2) + +#define WOW_MAX_FILTERS_PER_LIST 16 +#define WOW_PATTERN_SIZE 256 + /* Receive Frame Types */ #define TA_CONFIRM_TYPE 0x01 #define RX_DOT11_MGMT 0x02 @@ -201,6 +212,13 @@ #define RSI_DATA_DESC_INSERT_TSF BIT(15) #define RSI_DATA_DESC_INSERT_SEQ_NO BIT(2) +#ifdef CONFIG_PM +#define RSI_WOW_ANY BIT(1) +#define RSI_WOW_GTK_REKEY BIT(3) +#define RSI_WOW_MAGIC_PKT BIT(4) +#define RSI_WOW_DISCONNECT BIT(5) +#endif + enum opmode { RSI_OPMODE_UNSUPPORTED = -1, RSI_OPMODE_AP = 0, @@ -262,7 +280,9 @@ enum cmd_frame_type { ANT_SEL_FRAME = 0x20, VAP_DYNAMIC_UPDATE = 0x27, COMMON_DEV_CONFIG = 0x28, - RADIO_PARAMS_UPDATE = 0x29 + RADIO_PARAMS_UPDATE = 0x29, + WOWLAN_CONFIG_PARAMS = 0x2B, + WOWLAN_WAKEUP_REASON = 0xc5 }; struct rsi_mac_frame { @@ -581,6 +601,13 @@ struct rsi_request_ps { __le16 ps_num_dtim_intervals; } __packed; +struct rsi_wowlan_req { + struct rsi_cmd_desc desc; + u8 sourceid[ETH_ALEN]; + u16 wow_flags; + u16 host_sleep_status; +} __packed; + static inline u32 rsi_get_queueno(u8 *addr, u16 offset) { return (le16_to_cpu(*(__le16 *)&addr[offset]) & 0x7000) >> 12; @@ -641,6 +668,8 @@ int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); +int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, + u16 sleep_status); int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, struct ieee80211_vif *vif); #endif From b6c8d06c8a6465c054befd416d8b067ad495fa06 Mon Sep 17 00:00:00 2001 From: Karun Eagalapati Date: Fri, 27 Oct 2017 16:55:56 +0530 Subject: [PATCH 1591/2177] rsi: sdio: Add WOWLAN support for S4 hibernate state We are disabling of interrupts from firmware in freeze handler. Also setting power management capability KEEP_MMC_POWER to make device wakeup for WoWLAN trigger. At restore, we observed a device reset on some platforms. Hence reloading of firmware and device initialization is performed. Signed-off-by: Karun Eagalapati Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 2 + drivers/net/wireless/rsi/rsi_91x_mac80211.c | 9 ++ drivers/net/wireless/rsi/rsi_91x_main.c | 1 + drivers/net/wireless/rsi/rsi_91x_mgmt.c | 7 +- drivers/net/wireless/rsi/rsi_91x_sdio.c | 111 ++++++++++++++++++++ drivers/net/wireless/rsi/rsi_main.h | 4 + drivers/net/wireless/rsi/rsi_sdio.h | 1 + 7 files changed, 134 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 87e023d601c3..d0d2201830e8 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -276,6 +276,8 @@ void rsi_core_qos_processor(struct rsi_common *common) rsi_dbg(DATA_TX_ZONE, "%s: No More Pkt\n", __func__); break; } + if (common->hibernate_resume) + break; mutex_lock(&common->tx_lock); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index c3bd3ca9cf83..95eb5e63999f 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -17,6 +17,7 @@ #include #include "rsi_debugfs.h" #include "rsi_mgmt.h" +#include "rsi_sdio.h" #include "rsi_common.h" #include "rsi_ps.h" @@ -325,6 +326,11 @@ static int rsi_mac80211_start(struct ieee80211_hw *hw) rsi_dbg(ERR_ZONE, "===> Interface UP <===\n"); mutex_lock(&common->mutex); + if (common->hibernate_resume) { + common->reinit_hw = true; + adapter->host_intf_ops->reinit_device(adapter); + wait_for_completion(&adapter->priv->wlan_init_completion); + } common->iface_down = false; wiphy_rfkill_start_polling(hw->wiphy); rsi_send_rx_filter_frame(common, 0); @@ -1846,6 +1852,9 @@ static int rsi_mac80211_resume(struct ieee80211_hw *hw) rsi_dbg(INFO_ZONE, "%s: mac80211 resume\n", __func__); + if (common->hibernate_resume) + return 0; + mutex_lock(&common->mutex); rsi_send_wowlan_request(common, 0, 0); diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 2a1fbb7db6c4..0cb8e68bab58 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -263,6 +263,7 @@ struct rsi_hw *rsi_91x_init(void) rsi_default_ps_params(adapter); spin_lock_init(&adapter->ps_lock); timer_setup(&common->roc_timer, rsi_roc_timeout, 0); + init_completion(&common->wlan_init_completion); common->init_done = true; return adapter; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 1446ee3581d0..d38a09f15742 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1761,7 +1761,11 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common, common->bb_rf_prog_count--; if (!common->bb_rf_prog_count) { common->fsm_state = FSM_MAC_INIT_DONE; - return rsi_mac80211_attach(common); + if (common->reinit_hw) { + complete(&common->wlan_init_completion); + } else { + return rsi_mac80211_attach(common); + } } } else { rsi_dbg(INFO_ZONE, @@ -1839,6 +1843,7 @@ int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg) case TA_CONFIRM_TYPE: return rsi_handle_ta_confirm_type(common, msg); case CARD_READY_IND: + common->hibernate_resume = false; rsi_dbg(FSM_ZONE, "%s: Card ready indication received\n", __func__); return rsi_handle_card_ready(common, msg); diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index fa6af7be61f4..3f683d873472 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -880,6 +880,7 @@ static struct rsi_host_intf_ops sdio_host_intf_ops = { .master_reg_read = rsi_sdio_master_reg_read, .master_reg_write = rsi_sdio_master_reg_write, .load_data_master_write = rsi_sdio_load_data_master_write, + .reinit_device = rsi_sdio_reinit_device, }; /** @@ -936,6 +937,8 @@ static int rsi_probe(struct sdio_func *pfunction, return -EIO; } + adapter->priv->hibernate_resume = false; + adapter->priv->reinit_hw = false; return 0; fail: rsi_91x_deinit(adapter); @@ -1198,9 +1201,117 @@ static int rsi_resume(struct device *dev) return 0; } +static int rsi_freeze(struct device *dev) +{ + int ret; + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common; + struct rsi_91x_sdiodev *sdev; + + rsi_dbg(INFO_ZONE, "SDIO Bus freeze ===>\n"); + + if (!adapter) { + rsi_dbg(ERR_ZONE, "Device is not ready\n"); + return -ENODEV; + } + common = adapter->priv; + sdev = (struct rsi_91x_sdiodev *)adapter->rsi_dev; + +#ifdef CONFIG_RSI_WOW + if ((common->wow_flags & RSI_WOW_ENABLED) && + (common->wow_flags & RSI_WOW_NO_CONNECTION)) + rsi_dbg(ERR_ZONE, + "##### Device can not wake up through WLAN\n"); +#endif + ret = rsi_sdio_disable_interrupts(pfunction); + + if (sdev->write_fail) + rsi_dbg(INFO_ZONE, "###### Device is not ready #######\n"); + + ret = rsi_set_sdio_pm_caps(adapter); + if (ret) + rsi_dbg(INFO_ZONE, "Setting power management caps failed\n"); + + rsi_dbg(INFO_ZONE, "***** RSI module freezed *****\n"); + + return 0; +} + +static int rsi_thaw(struct device *dev) +{ + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common = adapter->priv; + + rsi_dbg(ERR_ZONE, "SDIO Bus thaw =====>\n"); + + common->hibernate_resume = true; + common->fsm_state = FSM_CARD_NOT_READY; + common->iface_down = true; + + rsi_sdio_enable_interrupts(pfunction); + + rsi_dbg(INFO_ZONE, "***** RSI module thaw done *****\n"); + + return 0; +} + +int rsi_sdio_reinit_device(struct rsi_hw *adapter) +{ + struct rsi_91x_sdiodev *sdev = adapter->rsi_dev; + struct sdio_func *pfunction = sdev->pfunction; + int ii; + + for (ii = 0; ii < NUM_SOFT_QUEUES; ii++) + skb_queue_purge(&adapter->priv->tx_queue[ii]); + + /* Initialize device again */ + sdio_claim_host(pfunction); + + sdio_release_irq(pfunction); + rsi_reset_card(pfunction); + + sdio_enable_func(pfunction); + rsi_setupcard(adapter); + rsi_init_sdio_slave_regs(adapter); + sdio_claim_irq(pfunction, rsi_handle_interrupt); + rsi_hal_device_init(adapter); + + sdio_release_host(pfunction); + + return 0; +} + +static int rsi_restore(struct device *dev) +{ + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common = adapter->priv; + + rsi_dbg(INFO_ZONE, "SDIO Bus restore ======>\n"); + common->hibernate_resume = true; + common->fsm_state = FSM_FW_NOT_LOADED; + common->iface_down = true; + + adapter->sc_nvifs = 0; + ieee80211_restart_hw(adapter->hw); + +#ifdef CONFIG_RSI_WOW + common->wow_flags = 0; +#endif + common->iface_down = false; + + rsi_dbg(INFO_ZONE, "RSI module restored\n"); + + return 0; +} static const struct dev_pm_ops rsi_pm_ops = { .suspend = rsi_suspend, .resume = rsi_resume, + .freeze = rsi_freeze, + .thaw = rsi_thaw, + .restore = rsi_restore, }; #endif diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index 44a199f2116f..8cab630af4a5 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -214,6 +214,7 @@ struct rsi_common { struct rsi_thread tx_thread; struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 2]; + struct completion wlan_init_completion; /* Mutex declaration */ struct mutex mutex; /* Mutex used for tx thread */ @@ -272,6 +273,8 @@ struct rsi_common { u8 obm_ant_sel_val; int tx_power; u8 ant_in_use; + bool hibernate_resume; + bool reinit_hw; u8 wow_flags; u16 beacon_interval; u8 dtim_cnt; @@ -362,5 +365,6 @@ struct rsi_host_intf_ops { int (*load_data_master_write)(struct rsi_hw *adapter, u32 addr, u32 instructions_size, u16 block_size, u8 *fw); + int (*reinit_device)(struct rsi_hw *adapter); }; #endif diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h index 49c549ba6682..8fbf90eb7b42 100644 --- a/drivers/net/wireless/rsi/rsi_sdio.h +++ b/drivers/net/wireless/rsi/rsi_sdio.h @@ -131,4 +131,5 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word); void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit); int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter); int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num); +int rsi_sdio_reinit_device(struct rsi_hw *adapter); #endif From 063848c3e1558e40879522562aaf905fdcf0d7f1 Mon Sep 17 00:00:00 2001 From: Karun Eagalapati Date: Fri, 27 Oct 2017 16:55:57 +0530 Subject: [PATCH 1592/2177] rsi: sdio: Add WOWLAN support for S5 shutdown state Unlike other power states, WoWLAN configuration does not come from mac80211 for shutdown. Hence configuring the WoWLAN from shut down callback it self. Remaining steps of disabling SDIO interrupts, setting 'MMC_PM_KEEP_POWER' flag are same as other power states. Signed-off-by: Karun Eagalapati Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 1 + drivers/net/wireless/rsi/rsi_91x_sdio.c | 31 ++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 95eb5e63999f..36c63e953f84 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1822,6 +1822,7 @@ int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan) return 0; } +EXPORT_SYMBOL(rsi_config_wowlan); #ifdef CONFIG_PM static int rsi_mac80211_suspend(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 3f683d873472..3288fb6888b9 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1218,12 +1218,11 @@ static int rsi_freeze(struct device *dev) common = adapter->priv; sdev = (struct rsi_91x_sdiodev *)adapter->rsi_dev; -#ifdef CONFIG_RSI_WOW if ((common->wow_flags & RSI_WOW_ENABLED) && (common->wow_flags & RSI_WOW_NO_CONNECTION)) rsi_dbg(ERR_ZONE, "##### Device can not wake up through WLAN\n"); -#endif + ret = rsi_sdio_disable_interrupts(pfunction); if (sdev->write_fail) @@ -1257,6 +1256,31 @@ static int rsi_thaw(struct device *dev) return 0; } +static void rsi_shutdown(struct device *dev) +{ + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_91x_sdiodev *sdev = + (struct rsi_91x_sdiodev *)adapter->rsi_dev; + struct ieee80211_hw *hw = adapter->hw; + struct cfg80211_wowlan *wowlan = hw->wiphy->wowlan_config; + + rsi_dbg(ERR_ZONE, "SDIO Bus shutdown =====>\n"); + + if (rsi_config_wowlan(adapter, wowlan)) + rsi_dbg(ERR_ZONE, "Failed to configure WoWLAN\n"); + + rsi_sdio_disable_interrupts(sdev->pfunction); + + if (sdev->write_fail) + rsi_dbg(INFO_ZONE, "###### Device is not ready #######\n"); + + if (rsi_set_sdio_pm_caps(adapter)) + rsi_dbg(INFO_ZONE, "Setting power management caps failed\n"); + + rsi_dbg(INFO_ZONE, "***** RSI module shut down *****\n"); +} + int rsi_sdio_reinit_device(struct rsi_hw *adapter) { struct rsi_91x_sdiodev *sdev = adapter->rsi_dev; @@ -1297,9 +1321,7 @@ static int rsi_restore(struct device *dev) adapter->sc_nvifs = 0; ieee80211_restart_hw(adapter->hw); -#ifdef CONFIG_RSI_WOW common->wow_flags = 0; -#endif common->iface_down = false; rsi_dbg(INFO_ZONE, "RSI module restored\n"); @@ -1331,6 +1353,7 @@ static struct sdio_driver rsi_driver = { #ifdef CONFIG_PM .drv = { .pm = &rsi_pm_ops, + .shutdown = rsi_shutdown, } #endif }; From e9931f984dd1e80adb3b5e095ef175fe383bc92d Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 30 Oct 2017 13:13:46 +0300 Subject: [PATCH 1593/2177] qtnfmac: modify full Tx queue error reporting Under heavy load it is normal that h/w Tx queue is almost full all the time and reclaim should be done before transmitting next packet. Warning still should be reported as well as s/w Tx queues should be stopped in the case when reclaim failed. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c index 69131965a298..146e42a132e7 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c @@ -643,11 +643,11 @@ static int qtnf_tx_queue_ready(struct qtnf_pcie_bus_priv *priv) { if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index, priv->tx_bd_num)) { - pr_err_ratelimited("reclaim full Tx queue\n"); qtnf_pcie_data_tx_reclaim(priv); if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index, priv->tx_bd_num)) { + pr_warn_ratelimited("reclaim full Tx queue\n"); priv->tx_full_count++; return 0; } From 3dd06cecb1b84c07c231859c40b55d5ac0516349 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 30 Oct 2017 13:13:47 +0300 Subject: [PATCH 1594/2177] qtnfmac: enable registration of more mgmt frames Support registration for more mgmt frame types for debug and monitoring purposes. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 63a540d1216e..64db4082c9d2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -73,7 +73,10 @@ qtnf_mgmt_stypes[NUM_NL80211_IFTYPES] = { [NL80211_IFTYPE_AP] = { .tx = BIT(IEEE80211_STYPE_ACTION >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | - BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4), }, }; @@ -353,6 +356,13 @@ qtnf_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev, return; switch (frame_type & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_REASSOC_REQ: + case IEEE80211_STYPE_ASSOC_REQ: + qlink_frame_type = QLINK_MGMT_FRAME_ASSOC_REQ; + break; + case IEEE80211_STYPE_AUTH: + qlink_frame_type = QLINK_MGMT_FRAME_AUTH; + break; case IEEE80211_STYPE_PROBE_REQ: qlink_frame_type = QLINK_MGMT_FRAME_PROBE_REQ; break; From bf024645ac9df78292e65e939b60c9a58e7b9fbb Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 30 Oct 2017 13:13:48 +0300 Subject: [PATCH 1595/2177] qtnfmac: drop nonexistent function declaration Function qtnf_classify_skb_no_mbss has been used for debug during early stage of development. Drop its declaration. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/core.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 44a2cbb19310..49ae700f66f0 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -153,9 +153,6 @@ int qtnf_cmd_send_get_phy_params(struct qtnf_wmac *mac); struct qtnf_wmac *qtnf_core_get_mac(const struct qtnf_bus *bus, u8 macid); struct net_device *qtnf_classify_skb(struct qtnf_bus *bus, struct sk_buff *skb); -struct net_device *qtnf_classify_skb_no_mbss(struct qtnf_bus *bus, - struct sk_buff *skb); - void qtnf_virtual_intf_cleanup(struct net_device *ndev); void qtnf_netdev_updown(struct net_device *ndev, bool up); From c35c0d54a77d0ff9c101cb3f4e975793f8cb7067 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 30 Oct 2017 13:13:49 +0300 Subject: [PATCH 1596/2177] qtnfmac: modify full Tx queue recovery Current recovery approach is to wake s/w Tx queues for skb->dev netdevice. However this approach doesn't cover the case when h/w queue is full of packets from a single wireless interface. Suppose xmit attempt from the second wireless interface fails due to failed reclaim. Then the second interface will not have a chance to recover even if subsequent reclaims succeed. Possible solution is to attempt to wake all the s/w queues belonging to driver interfaces. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/core.c | 27 +++++++++++++++++++ drivers/net/wireless/quantenna/qtnfmac/core.h | 1 + .../wireless/quantenna/qtnfmac/pearl/pcie.c | 13 +++++---- .../quantenna/qtnfmac/pearl/pcie_bus_priv.h | 1 + 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index aa7f146278a7..6a6e5ffb0348 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -618,6 +618,33 @@ out: } EXPORT_SYMBOL_GPL(qtnf_classify_skb); +void qtnf_wake_all_queues(struct net_device *ndev) +{ + struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); + struct qtnf_wmac *mac; + struct qtnf_bus *bus; + int macid; + int i; + + if (unlikely(!vif || !vif->mac || !vif->mac->bus)) + return; + + bus = vif->mac->bus; + + for (macid = 0; macid < QTNF_MAX_MAC; macid++) { + if (!(bus->hw_info.mac_bitmap & BIT(macid))) + continue; + + mac = bus->mac[macid]; + for (i = 0; i < QTNF_MAX_INTF; i++) { + vif = &mac->iflist[i]; + if (vif->netdev && netif_queue_stopped(vif->netdev)) + netif_tx_wake_all_queues(vif->netdev); + } + } +} +EXPORT_SYMBOL_GPL(qtnf_wake_all_queues); + MODULE_AUTHOR("Quantenna Communications"); MODULE_DESCRIPTION("Quantenna 802.11 wireless LAN FullMAC driver."); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 49ae700f66f0..da2c24e2271d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -153,6 +153,7 @@ int qtnf_cmd_send_get_phy_params(struct qtnf_wmac *mac); struct qtnf_wmac *qtnf_core_get_mac(const struct qtnf_bus *bus, u8 macid); struct net_device *qtnf_classify_skb(struct qtnf_bus *bus, struct sk_buff *skb); +void qtnf_wake_all_queues(struct net_device *ndev); void qtnf_virtual_intf_cleanup(struct net_device *ndev); void qtnf_netdev_updown(struct net_device *ndev, bool up); diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c index 146e42a132e7..7e487622d87d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c @@ -617,9 +617,10 @@ static void qtnf_pcie_data_tx_reclaim(struct qtnf_pcie_bus_priv *priv) if (skb->dev) { skb->dev->stats.tx_packets++; skb->dev->stats.tx_bytes += skb->len; - - if (netif_queue_stopped(skb->dev)) - netif_wake_queue(skb->dev); + if (unlikely(priv->tx_stopped)) { + qtnf_wake_all_queues(skb->dev); + priv->tx_stopped = 0; + } } dev_kfree_skb_any(skb); @@ -669,8 +670,10 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb) spin_lock_irqsave(&priv->tx0_lock, flags); if (!qtnf_tx_queue_ready(priv)) { - if (skb->dev) - netif_stop_queue(skb->dev); + if (skb->dev) { + netif_tx_stop_all_queues(skb->dev); + priv->tx_stopped = 1; + } spin_unlock_irqrestore(&priv->tx0_lock, flags); return NETDEV_TX_BUSY; diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h index 86ac1ccedb52..397875a50fc2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h +++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h @@ -37,6 +37,7 @@ struct qtnf_pcie_bus_priv { /* lock for tx0 operations */ spinlock_t tx0_lock; u8 msi_enabled; + u8 tx_stopped; int mps; struct workqueue_struct *workqueue; From db5c6d4a9b92f0bc64cedb667043af23a81dae7c Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 13:13:50 +0300 Subject: [PATCH 1597/2177] qtnfmac: advertise support of inactivity timeout Wireless device may implement a logic to kick-out STA due to inactivity for a certain period of time. This feature needs to be advertised to higher layers if supported. Timeout value is still taken from parameters to START_AP command, nothing changes here. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 5 ++++- drivers/net/wireless/quantenna/qtnfmac/commands.c | 5 +++-- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 11 ++++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 64db4082c9d2..e70f5bd5e498 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -956,7 +956,10 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac) ether_addr_copy(wiphy->perm_addr, mac->macaddr); - if (hw_info->hw_capab & QLINK_HW_SUPPORTS_REG_UPDATE) { + if (hw_info->hw_capab & QLINK_HW_CAPAB_STA_INACT_TIMEOUT) + wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER; + + if (hw_info->hw_capab & QLINK_HW_CAPAB_REG_UPDATE) { wiphy->regulatory_flags |= REGULATORY_STRICT_REG | REGULATORY_CUSTOM_REG; wiphy->reg_notifier = qtnf_cfg80211_reg_notifier; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index babdc600c193..b81f81bd1411 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -975,10 +975,11 @@ qtnf_cmd_resp_proc_hw_info(struct qtnf_bus *bus, return -EINVAL; } - pr_info("fw_version=%d, MACs map %#x, alpha2=\"%c%c\", chains Tx=%u Rx=%u\n", + pr_info("fw_version=%d, MACs map %#x, alpha2=\"%c%c\", chains Tx=%u Rx=%u, capab=0x%x\n", hwinfo->fw_ver, hwinfo->mac_bitmap, hwinfo->rd->alpha2[0], hwinfo->rd->alpha2[1], - hwinfo->total_tx_chain, hwinfo->total_rx_chain); + hwinfo->total_tx_chain, hwinfo->total_rx_chain, + hwinfo->hw_capab); return 0; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 7b313d38c30b..0f582782682f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -61,8 +61,17 @@ struct qlink_msg_header { /* Generic definitions of data and information carried in QLINK messages */ +/** + * enum qlink_hw_capab - device capabilities. + * + * @QLINK_HW_CAPAB_REG_UPDATE: device can update it's regulatory region. + * @QLINK_HW_CAPAB_STA_INACT_TIMEOUT: device implements a logic to kick-out + * associated STAs due to inactivity. Inactivity timeout period is taken + * from QLINK_CMD_START_AP parameters. + */ enum qlink_hw_capab { - QLINK_HW_SUPPORTS_REG_UPDATE = BIT(0), + QLINK_HW_CAPAB_REG_UPDATE = BIT(0), + QLINK_HW_CAPAB_STA_INACT_TIMEOUT = BIT(1), }; enum qlink_phy_mode { From 0338b1b393ec7910898e8f7b25b3bf31a7282e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Wed, 25 Oct 2017 22:15:19 -0700 Subject: [PATCH 1598/2177] Bluetooth: hci_ldisc: Fix another race when closing the tty. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following race condition still existed: P1 P2 cancel_work_sync() hci_uart_tx_wakeup() hci_uart_write_work() hci_uart_dequeue() clear_bit(HCI_UART_PROTO_READY) hci_unregister_dev(hdev) hci_free_dev(hdev) hu->proto->close(hu) kfree(hu) access to hdev and hu Cancelling the work after clearing the HCI_UART_PROTO_READY bit avoids this as any hci_uart_tx_wakeup() issued after the flag is cleared will detect that and not schedule further work. Signed-off-by: Ronald Tschalär Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ldisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 31def781a562..c823914b3a80 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -523,13 +523,13 @@ static void hci_uart_tty_close(struct tty_struct *tty) if (hdev) hci_uart_close(hdev); - cancel_work_sync(&hu->write_work); - if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); percpu_up_write(&hu->proto_lock); + cancel_work_sync(&hu->write_work); + if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) hci_unregister_dev(hdev); From aa2bc739ef4a181a7589eb009be96a870cc1788f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 30 Oct 2017 13:46:47 -0700 Subject: [PATCH 1599/2177] net: filter: remove unused variable and fix warning bpf_getsockopt bpf call sets the ret variable to zero and never changes it. What's worse in case CONFIG_INET is not selected the variable is completely unused generating a warning. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Lawrence Brakmo Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 721c30889033..a0112168d6f9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3288,7 +3288,6 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { struct sock *sk = bpf_sock->sk; - int ret = 0; if (!sk_fullsock(sk)) goto err_clear; @@ -3308,7 +3307,7 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, } else { goto err_clear; } - return ret; + return 0; #endif err_clear: memset(optval, 0, optlen); From 254d152a216750f508442cc3e502130e5f539ab4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Oct 2017 10:17:38 +0200 Subject: [PATCH 1600/2177] i40e: mark PM functions as __maybe_unused A cleanup of the PM code left an incorrect #ifdef in place, leading to a harmless build warning: drivers/net/ethernet/intel/i40e/i40e_main.c:12223:12: error: 'i40e_resume' defined but not used [-Werror=unused-function] drivers/net/ethernet/intel/i40e/i40e_main.c:12185:12: error: 'i40e_suspend' defined but not used [-Werror=unused-function] It's easier to use __maybe_unused attributes here, since you can't pick the wrong one. Fixes: 0e5d3da40055 ("i40e: use newer generic PM support instead of legacy PM callbacks") Signed-off-by: Arnd Bergmann Tested-by: Andrew Bowers Acked-by: Jacob Keller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 54ff34faca37..2a087319c09b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9557,7 +9557,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return 0; } -#ifdef CONFIG_PM /** * i40e_restore_interrupt_scheme - Restore the interrupt scheme * @pf: private board data structure @@ -9606,7 +9605,6 @@ err_unwind: return err; } -#endif /* CONFIG_PM */ /** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events @@ -13285,12 +13283,11 @@ static void i40e_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** * i40e_suspend - PM callback for moving to D3 * @dev: generic device information structure **/ -static int i40e_suspend(struct device *dev) +static int __maybe_unused i40e_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); @@ -13328,7 +13325,7 @@ static int i40e_suspend(struct device *dev) * i40e_resume - PM callback for waking up from D3 * @dev: generic device information structure **/ -static int i40e_resume(struct device *dev) +static int __maybe_unused i40e_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); @@ -13360,8 +13357,6 @@ static int i40e_resume(struct device *dev) return 0; } -#endif /* CONFIG_PM */ - static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, @@ -13377,11 +13372,9 @@ static struct pci_driver i40e_driver = { .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, -#ifdef CONFIG_PM .driver = { .pm = &i40e_pm_ops, }, -#endif /* CONFIG_PM */ .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, From 3e6b1cf7613393a7c8648133daf0e853c4e05220 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 10 Oct 2017 14:56:58 -0700 Subject: [PATCH 1601/2177] i40e: only redistribute MSI-X vectors when needed Whether or not there are vectors_left, we only need to redistribute our vectors if we didn't get as many as we requested. With the current check, the code will try to redistribute even if we did in fact get all the vectors we requested - this can happen when we have more CPUs than we do vectors. This restores an earlier check to be sure we only redistribute if we didn't get the full count we requested. Fixes: 4ce20abc645f (i40e: fix MSI-X vector redistribution if hw limit is reached) Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2a087319c09b..1cf9ba2d9a41 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9346,7 +9346,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (!vectors_left) { + } else if (v_actual != v_budget) { /* If we have limited resources, we will start with no vectors * for the special features and then allocate vectors to some * of these features based on the policy and at the end disable @@ -9355,7 +9355,8 @@ static int i40e_init_msix(struct i40e_pf *pf) int vec; dev_info(&pf->pdev->dev, - "MSI-X vector limit reached, attempting to redistribute vectors\n"); + "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n", + v_actual, v_budget); /* reserve the misc vector */ vec = v_actual - 1; From aa250f1186319f1e0b9b4a1d99022fe32251b8b6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 21 Oct 2017 17:51:50 -0700 Subject: [PATCH 1602/2177] i40e/i40evf: Revert "i40e/i40evf: bump tail only in multiples of 8" This reverts commit 11f29003d6376fb123b7c3779dba49bb56fb0815. I am reverting this as I am fairly certain this can result in a memory leak when combined with the current page recycling scheme. Specifically we end up attempting to allocate fewer buffers than we recycled and this results in us rewinding the next to alloc pointer which leads to leaks when we overwrite the rx_buffer_info when processing the next frame. Fixes: 11f29003d637 ("i40e/i40evf: bump tail only in multiples of 8") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 --------- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 9 --------- 2 files changed, 18 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index c5cd233c8fee..d6d352a6e6ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1407,15 +1407,6 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; - /* Hardware only fetches new descriptors in cache lines of 8, - * essentially ignoring the lower 3 bits of the tail register. We want - * to ensure our tail writes are aligned to avoid unnecessary work. We - * can't simply round down the cleaned count, since we might fail to - * allocate some buffers. What we really want is to ensure that - * next_to_used + cleaned_count produces an aligned value. - */ - cleaned_count -= (ntu + cleaned_count) & 0x7; - /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 6806ada11490..fe817e2b6fef 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -711,15 +711,6 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; - /* Hardware only fetches new descriptors in cache lines of 8, - * essentially ignoring the lower 3 bits of the tail register. We want - * to ensure our tail writes are aligned to avoid unnecessary work. We - * can't simply round down the cleaned count, since we might fail to - * allocate some buffers. What we really want is to ensure that - * next_to_used + cleaned_count produces an aligned value. - */ - cleaned_count -= (ntu + cleaned_count) & 0x7; - /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; From 384c181e3780ddc45e70483e29d84495b484730d Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:34 -0700 Subject: [PATCH 1603/2177] net: sched: Identify hardware traffic classes using classid This patch offloads the classid to hardware and uses the classid reserved in the range :ffe0 - :ffef to identify hardware traffic classes reported via dev->num_tc. tcf_result structure contains the class ID of the class to which the packet belongs and is offloaded to hardware via flower filter. A new helper function is introduced to represent HW traffic classes 0 through 15 using the reserved classid values :ffe0 - :ffef. Signed-off-by: Amritha Nambiar Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/net/pkt_cls.h | 1 + include/net/sch_generic.h | 7 +++++++ net/sched/cls_flower.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bf73e1675519..37c5ef766655 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -666,6 +666,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; + u32 classid; }; enum tc_matchall_command { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 07c179dab478..c23e938f5b19 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -411,6 +411,13 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) return NULL; } +static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) +{ + u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; + + return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; +} + int qdisc_class_hash_init(struct Qdisc_class_hash *); void qdisc_class_hash_insert(struct Qdisc_class_hash *, struct Qdisc_class_common *); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 35cb6d684e44..c99fa9e5be46 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -241,6 +241,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.mask = mask; cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); @@ -266,6 +267,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); From aa5cb02ae938d450be882adac4023d8116a5acd5 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:40 -0700 Subject: [PATCH 1604/2177] i40e: Map TCs with the VSI seids Add mapping of TCs with the seids of the channel VSIs. TC0 will be mapped to the main VSI seid and all other TCs are mapped to the seid of the corresponding channel VSI. Signed-off-by: Amritha Nambiar Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index eb017763646d..f3c501efddc8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -739,6 +739,7 @@ struct i40e_vsi { u16 next_base_queue; /* next queue to be used for channel setup */ struct list_head ch_list; + u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS]; void *priv; /* client driver data reference. */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1cf9ba2d9a41..2ff7384cb24b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6100,6 +6100,7 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) int ret = 0, i; /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ + vsi->tc_seid_map[0] = vsi->seid; for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (vsi->tc_config.enabled_tc & BIT(i)) { ch = kzalloc(sizeof(*ch), GFP_KERNEL); @@ -6130,6 +6131,7 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) i, ch->num_queue_pairs); goto err_free; } + vsi->tc_seid_map[i] = ch->seid; } } return ret; From 5efe0c6c2cafa7f458d793c85a1298e713af50e4 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:45 -0700 Subject: [PATCH 1605/2177] i40e: Cloud filter mode for set_switch_config command Add definitions for L4 filters and switch modes based on cloud filters modes and extend the set switch config command to include the additional cloud filter mode. Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 30 ++++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++- .../net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- .../net/ethernet/intel/i40e/i40e_prototype.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_type.h | 9 ++++++ 6 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 6a5db1b33fa2..444447d1a93f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -790,7 +790,35 @@ struct i40e_aqc_set_switch_config { */ __le16 first_tag; __le16 second_tag; - u8 reserved[6]; + /* Next byte is split into following: + * Bit 7 : 0 : No action, 1: Switch to mode defined by bits 6:0 + * Bit 6 : 0 : Destination Port, 1: source port + * Bit 5..4 : L4 type + * 0: rsvd + * 1: TCP + * 2: UDP + * 3: Both TCP and UDP + * Bits 3:0 Mode + * 0: default mode + * 1: L4 port only mode + * 2: non-tunneled mode + * 3: tunneled mode + */ +#define I40E_AQ_SET_SWITCH_BIT7_VALID 0x80 + +#define I40E_AQ_SET_SWITCH_L4_SRC_PORT 0x40 + +#define I40E_AQ_SET_SWITCH_L4_TYPE_RSVD 0x00 +#define I40E_AQ_SET_SWITCH_L4_TYPE_TCP 0x10 +#define I40E_AQ_SET_SWITCH_L4_TYPE_UDP 0x20 +#define I40E_AQ_SET_SWITCH_L4_TYPE_BOTH 0x30 + +#define I40E_AQ_SET_SWITCH_MODE_DEFAULT 0x00 +#define I40E_AQ_SET_SWITCH_MODE_L4_PORT 0x01 +#define I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL 0x02 +#define I40E_AQ_SET_SWITCH_MODE_TUNNEL 0x03 + u8 mode; + u8 rsvd5[5]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 8d0ee006606b..a9460e0e9cb7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2407,13 +2407,14 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw, * @hw: pointer to the hardware structure * @flags: bit flag values to set * @valid_flags: which bit flags to set + * @mode: cloud filter mode * @cmd_details: pointer to command details structure or NULL * * Set switch configuration bits **/ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, u16 flags, - u16 valid_flags, + u16 valid_flags, u8 mode, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -2425,6 +2426,7 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, i40e_aqc_opc_set_switch_config); scfg->flags = cpu_to_le16(flags); scfg->valid_flags = cpu_to_le16(valid_flags); + scfg->mode = mode; if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) { scfg->switch_tag = cpu_to_le16(hw->switch_tag); scfg->first_tag = cpu_to_le16(hw->first_tag); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9eb618799a30..dc9b8dcf4a1e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4343,7 +4343,7 @@ flags_complete: sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags, - NULL); + 0, NULL); if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, "couldn't set switch config bits, err %s aq_err %s\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2ff7384cb24b..128f2595fcd1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12166,7 +12166,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) u16 valid_flags; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; - ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, + ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0, NULL); if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 01502561035c..92869f57b52b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -190,7 +190,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, u16 flags, - u16 valid_flags, + u16 valid_flags, u8 mode, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_request_resource(struct i40e_hw *hw, enum i40e_aq_resources_ids resource, diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 17a99b53acd9..e4e5a0c864b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -283,6 +283,15 @@ struct i40e_hw_capabilities { #define I40E_NVM_IMAGE_TYPE_CLOUD 0x2 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD 0x3 + /* Cloud filter modes: + * Mode1: Filter on L4 port only + * Mode2: Filter for non-tunneled traffic + * Mode3: Filter for tunnel traffic + */ +#define I40E_CLOUD_FILTER_MODE1 0x6 +#define I40E_CLOUD_FILTER_MODE2 0x7 +#define I40E_CLOUD_FILTER_MODE3 0x8 + u32 management_mode; u32 mng_protocols_over_mctp; #define I40E_MNG_PROTOCOL_PLDM 0x2 From 2c0015238f7d357f179249f101d6ed0327bc642a Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:51 -0700 Subject: [PATCH 1606/2177] i40e: Admin queue definitions for cloud filters Add new admin queue definitions and extended fields for cloud filter support. Define big buffer for extended general fields in Add/Remove Cloud filters command. Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Signed-off-by: Jingjing Wu Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 107 +++++++++++++++++- .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 107 +++++++++++++++++- 2 files changed, 210 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 444447d1a93f..9f1f5786dcc2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1371,14 +1371,16 @@ struct i40e_aqc_add_remove_cloud_filters { #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT 0 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK (0x3FF << \ I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT) - u8 reserved2[4]; + u8 big_buffer_flag; +#define I40E_AQC_ADD_CLOUD_CMD_BB 1 + u8 reserved2[3]; __le32 addr_high; __le32 addr_low; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters); -struct i40e_aqc_add_remove_cloud_filters_element_data { +struct i40e_aqc_cloud_filters_element_data { u8 outer_mac[6]; u8 inner_mac[6]; __le16 inner_vlan; @@ -1408,6 +1410,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B #define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C +/* 0x0010 to 0x0017 is for custom filters */ +#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT 0x0010 /* Dest IP + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT 0x0011 /* Dest MAC + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT 0x0012 /* Dest MAC + VLAN + L4 Port */ #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6 @@ -1442,6 +1448,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { u8 response_reserved[7]; }; +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data); + +/* i40e_aqc_cloud_filters_element_bb is used when + * I40E_AQC_CLOUD_CMD_BB flag is set. + */ +struct i40e_aqc_cloud_filters_element_bb { + struct i40e_aqc_cloud_filters_element_data element; + u16 general_fields[32]; +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0 0 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1 1 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2 2 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0 3 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1 4 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2 5 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0 6 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1 7 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2 8 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0 9 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1 10 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2 11 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0 12 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1 13 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2 14 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0 15 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1 16 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2 17 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3 18 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4 19 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5 20 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6 21 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7 22 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0 23 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1 24 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2 25 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3 26 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4 27 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5 28 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6 29 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7 30 +}; + +I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb); + struct i40e_aqc_remove_cloud_filters_completion { __le16 perfect_ovlan_used; __le16 perfect_ovlan_free; @@ -1453,6 +1502,60 @@ struct i40e_aqc_remove_cloud_filters_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion); +/* Replace filter Command 0x025F + * uses the i40e_aqc_replace_cloud_filters, + * and the generic indirect completion structure + */ +struct i40e_filter_data { + u8 filter_type; + u8 input[3]; +}; + +I40E_CHECK_STRUCT_LEN(4, i40e_filter_data); + +struct i40e_aqc_replace_cloud_filters_cmd { + u8 valid_flags; +#define I40E_AQC_REPLACE_L1_FILTER 0x0 +#define I40E_AQC_REPLACE_CLOUD_FILTER 0x1 +#define I40E_AQC_GET_CLOUD_FILTERS 0x2 +#define I40E_AQC_MIRROR_CLOUD_FILTER 0x4 +#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER 0x8 + u8 old_filter_type; + u8 new_filter_type; + u8 tr_bit; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd); + +struct i40e_aqc_replace_cloud_filters_cmd_buf { + u8 data[32]; +/* Filter type INPUT codes*/ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX 3 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED BIT(7) + +/* Field Vector offsets */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA 0 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH 6 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG 7 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN 8 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN 9 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN 10 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY 11 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC 12 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA 14 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA 15 + +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN 37 + struct i40e_filter_data filters[8]; +}; + +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf); + /* Add Mirror Rule (indirect or direct 0x0260) * Delete Mirror Rule (indirect or direct 0x0261) * note: some rule types (4,5) do not use an external buffer. diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 463e331a70a9..af82c303de7b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1339,14 +1339,16 @@ struct i40e_aqc_add_remove_cloud_filters { #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT 0 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK (0x3FF << \ I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT) - u8 reserved2[4]; + u8 big_buffer_flag; +#define I40E_AQC_ADD_CLOUD_CMD_BB 1 + u8 reserved2[3]; __le32 addr_high; __le32 addr_low; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters); -struct i40e_aqc_add_remove_cloud_filters_element_data { +struct i40e_aqc_cloud_filters_element_data { u8 outer_mac[6]; u8 inner_mac[6]; __le16 inner_vlan; @@ -1376,6 +1378,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B #define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C +/* 0x0010 to 0x0017 is for custom filters */ +#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT 0x0010 /* Dest IP + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT 0x0011 /* Dest MAC + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT 0x0012 /* Dest MAC + VLAN + L4 Port */ #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6 @@ -1410,6 +1416,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { u8 response_reserved[7]; }; +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data); + +/* i40e_aqc_cloud_filters_element_bb is used when + * I40E_AQC_ADD_CLOUD_CMD_BB flag is set. + */ +struct i40e_aqc_cloud_filters_element_bb { + struct i40e_aqc_cloud_filters_element_data element; + u16 general_fields[32]; +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0 0 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1 1 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2 2 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0 3 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1 4 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2 5 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0 6 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1 7 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2 8 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0 9 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1 10 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2 11 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0 12 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1 13 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2 14 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0 15 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1 16 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2 17 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3 18 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4 19 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5 20 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6 21 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7 22 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0 23 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1 24 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2 25 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3 26 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4 27 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5 28 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6 29 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7 30 +}; + +I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb); + struct i40e_aqc_remove_cloud_filters_completion { __le16 perfect_ovlan_used; __le16 perfect_ovlan_free; @@ -1421,6 +1470,60 @@ struct i40e_aqc_remove_cloud_filters_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion); +/* Replace filter Command 0x025F + * uses the i40e_aqc_replace_cloud_filters, + * and the generic indirect completion structure + */ +struct i40e_filter_data { + u8 filter_type; + u8 input[3]; +}; + +I40E_CHECK_STRUCT_LEN(4, i40e_filter_data); + +struct i40e_aqc_replace_cloud_filters_cmd { + u8 valid_flags; +#define I40E_AQC_REPLACE_L1_FILTER 0x0 +#define I40E_AQC_REPLACE_CLOUD_FILTER 0x1 +#define I40E_AQC_GET_CLOUD_FILTERS 0x2 +#define I40E_AQC_MIRROR_CLOUD_FILTER 0x4 +#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER 0x8 + u8 old_filter_type; + u8 new_filter_type; + u8 tr_bit; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd); + +struct i40e_aqc_replace_cloud_filters_cmd_buf { + u8 data[32]; +/* Filter type INPUT codes*/ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX 3 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED BIT(7) + +/* Field Vector offsets */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA 0 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH 6 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG 7 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN 8 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN 9 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN 10 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY 11 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC 12 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA 14 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA 15 + +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN 37 + struct i40e_filter_data filters[8]; +}; + +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf); + /* Add Mirror Rule (indirect or direct 0x0260) * Delete Mirror Rule (indirect or direct 0x0261) * note: some rule types (4,5) do not use an external buffer. From aaf66502b624784c2ff3cd54834e2598d1c40027 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:35:56 -0700 Subject: [PATCH 1607/2177] i40e: Clean up of cloud filters Introduce the cloud filter data structure and cleanup of cloud filters associated with the device. Signed-off-by: Amritha Nambiar Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 9 ++++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 24 +++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index f3c501efddc8..b938bb4a70f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -253,6 +253,12 @@ struct i40e_fdir_filter { u32 fd_id; }; +struct i40e_cloud_filter { + struct hlist_node cloud_node; + unsigned long cookie; + u16 seid; /* filter control */ +}; + #define I40E_ETH_P_LLDP 0x88cc #define I40E_DCB_PRIO_TYPE_STRICT 0 @@ -420,6 +426,9 @@ struct i40e_pf { struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; u16 pending_udp_bitmap; + struct hlist_head cloud_filter_list; + u16 num_cloud_filters; + enum i40e_interrupt_policy int_policy; u16 rx_itr_default; u16 tx_itr_default; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 128f2595fcd1..fbe34500ded2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6936,6 +6936,26 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf) I40E_L3_SRC_MASK | I40E_L3_DST_MASK); } +/** + * i40e_cloud_filter_exit - Cleans up the cloud filters + * @pf: Pointer to PF + * + * This function destroys the hlist where all the cloud filters + * were saved. + **/ +static void i40e_cloud_filter_exit(struct i40e_pf *pf) +{ + struct i40e_cloud_filter *cfilter; + struct hlist_node *node; + + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + } + pf->num_cloud_filters = 0; +} + /** * i40e_close - Disables a network interface * @netdev: network interface device structure @@ -12196,6 +12216,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]); if (!vsi) { dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n"); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); return -EAGAIN; } @@ -13030,6 +13051,8 @@ static void i40e_remove(struct pci_dev *pdev) if (pf->vsi[pf->lan_vsi]) i40e_vsi_release(pf->vsi[pf->lan_vsi]); + i40e_cloud_filter_exit(pf); + /* remove attached clients */ if (pf->flags & I40E_FLAG_IWARP_ENABLED) { ret_code = i40e_lan_del_device(pf); @@ -13261,6 +13284,7 @@ static void i40e_shutdown(struct pci_dev *pdev) del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); /* Client close must be called explicitly here because the timer From 2f4b411a3d6766e6362ffbf00e0495a2dfe92507 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 27 Oct 2017 02:36:01 -0700 Subject: [PATCH 1608/2177] i40e: Enable cloud filters via tc-flower This patch enables tc-flower based hardware offloads. tc flower filter provided by the kernel is configured as driver specific cloud filter. The patch implements functions and admin queue commands needed to support cloud filters in the driver and adds cloud filters to configure these tc-flower filters. The classification function of the filter is to direct matched packets to a traffic class. The hardware traffic class is set based on the the classid reserved in the range :ffe0 - :ffef. Match Dst MAC and route to TC0: prio 1 flower dst_mac 3c:fd:fe:a0:d6:70 skip_sw\ hw_tc 1 Match Dst IPv4,Dst Port and route to TC1: prio 2 flower dst_ip 192.168.3.5/32\ ip_proto udp dst_port 25 skip_sw\ hw_tc 2 Match Dst IPv6,Dst Port and route to TC1: prio 3 flower dst_ip fe8::200:1\ ip_proto udp dst_port 66 skip_sw\ hw_tc 2 Delete tc flower filter: Example: Flow Director Sideband is disabled while configuring cloud filters via tc-flower and until any cloud filter exists. Unsupported matches when cloud filters are added using enhanced big buffer cloud filter mode of underlying switch include: 1. source port and source IP 2. Combined MAC address and IP fields. 3. Not specifying L4 port These filter matches can however be used to redirect traffic to the main VSI (tc 0) which does not require the enhanced big buffer cloud filter support. Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Signed-off-by: Anjali Singhai Jain Signed-off-by: Jingjing Wu Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 54 +- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 3 + drivers/net/ethernet/intel/i40e/i40e_common.c | 189 ++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 957 +++++++++++++++++- .../net/ethernet/intel/i40e/i40e_prototype.h | 16 + drivers/net/ethernet/intel/i40e/i40e_type.h | 1 + .../ethernet/intel/i40evf/i40e_adminq_cmd.h | 3 + 7 files changed, 1192 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b938bb4a70f7..5829715fa342 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -55,6 +55,8 @@ #include #include #include +#include +#include #include "i40e_type.h" #include "i40e_prototype.h" #include "i40e_client.h" @@ -253,10 +255,56 @@ struct i40e_fdir_filter { u32 fd_id; }; +#define I40E_CLOUD_FIELD_OMAC 0x01 +#define I40E_CLOUD_FIELD_IMAC 0x02 +#define I40E_CLOUD_FIELD_IVLAN 0x04 +#define I40E_CLOUD_FIELD_TEN_ID 0x08 +#define I40E_CLOUD_FIELD_IIP 0x10 + +#define I40E_CLOUD_FILTER_FLAGS_OMAC I40E_CLOUD_FIELD_OMAC +#define I40E_CLOUD_FILTER_FLAGS_IMAC I40E_CLOUD_FIELD_IMAC +#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_IVLAN) +#define I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC (I40E_CLOUD_FIELD_OMAC | \ + I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_IVLAN | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_IIP I40E_CLOUD_FIELD_IIP + struct i40e_cloud_filter { struct hlist_node cloud_node; unsigned long cookie; - u16 seid; /* filter control */ + /* cloud filter input set follows */ + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + __be16 vlan_id; + u16 seid; /* filter control */ + __be16 dst_port; + __be16 src_port; + u32 tenant_id; + union { + struct { + struct in_addr dst_ip; + struct in_addr src_ip; + } v4; + struct { + struct in6_addr dst_ip6; + struct in6_addr src_ip6; + } v6; + } ip; +#define dst_ipv6 ip.v6.dst_ip6.s6_addr32 +#define src_ipv6 ip.v6.src_ip6.s6_addr32 +#define dst_ipv4 ip.v4.dst_ip.s_addr +#define src_ipv4 ip.v4.src_ip.s_addr + u16 n_proto; /* Ethernet Protocol */ + u8 ip_proto; /* IPPROTO value */ + u8 flags; +#define I40E_CLOUD_TNL_TYPE_NONE 0xff + u8 tunnel_type; }; #define I40E_ETH_P_LLDP 0x88cc @@ -492,6 +540,8 @@ struct i40e_pf { #define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27) #define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28) #define I40E_FLAG_TC_MQPRIO BIT(29) +#define I40E_FLAG_FD_SB_INACTIVE BIT(30) +#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(31) struct i40e_client_instance *cinst; bool stat_offsets_loaded; @@ -574,6 +624,8 @@ struct i40e_pf { u16 phy_led_val; u16 override_q_count; + u16 last_sw_conf_flags; + u16 last_sw_conf_valid_flags; }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 9f1f5786dcc2..b0188b8f91ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1392,6 +1392,9 @@ struct i40e_aqc_cloud_filters_element_data { struct { u8 data[16]; } v6; + struct { + __le16 data[8]; + } raw_v6; } ipaddr; __le16 flags; #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0 diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index a9460e0e9cb7..0203665cb53c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -5436,5 +5436,194 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw, status = i40e_aq_write_ppp(hw, (void *)sec, sec->data_end, track_id, &offset, &info, NULL); + + return status; +} + +/** + * i40e_aq_add_cloud_filters + * @hw: pointer to the hardware structure + * @seid: VSI seid to add cloud filters from + * @filters: Buffer which contains the filters to be added + * @filter_count: number of filters contained in the buffer + * + * Set the cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_data are filled in by the caller + * of the function. + * + **/ +enum i40e_status_code +i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + enum i40e_status_code status; + u16 buff_len; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_add_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_add_cloud_filters_bb + * @hw: pointer to the hardware structure + * @seid: VSI seid to add cloud filters from + * @filters: Buffer which contains the filters in big buffer to be added + * @filter_count: number of filters contained in the buffer + * + * Set the big buffer cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the + * function. + * + **/ +i40e_status +i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + i40e_status status; + u16 buff_len; + int i; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_add_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB; + + for (i = 0; i < filter_count; i++) { + u16 tnl_type; + u32 ti; + + tnl_type = (le16_to_cpu(filters[i].element.flags) & + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + + /* Due to hardware eccentricities, the VNI for Geneve is shifted + * one more byte further than normally used for Tenant ID in + * other tunnel types. + */ + if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { + ti = le32_to_cpu(filters[i].element.tenant_id); + filters[i].element.tenant_id = cpu_to_le32(ti << 8); + } + } + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_rem_cloud_filters + * @hw: pointer to the hardware structure + * @seid: VSI seid to remove cloud filters from + * @filters: Buffer which contains the filters to be removed + * @filter_count: number of filters contained in the buffer + * + * Remove the cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_data are filled in by the caller + * of the function. + * + **/ +enum i40e_status_code +i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + enum i40e_status_code status; + u16 buff_len; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_remove_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_rem_cloud_filters_bb + * @hw: pointer to the hardware structure + * @seid: VSI seid to remove cloud filters from + * @filters: Buffer which contains the filters in big buffer to be removed + * @filter_count: number of filters contained in the buffer + * + * Remove the big buffer cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the + * function. + * + **/ +i40e_status +i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + i40e_status status; + u16 buff_len; + int i; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_remove_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB; + + for (i = 0; i < filter_count; i++) { + u16 tnl_type; + u32 ti; + + tnl_type = (le16_to_cpu(filters[i].element.flags) & + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + + /* Due to hardware eccentricities, the VNI for Geneve is shifted + * one more byte further than normally used for Tenant ID in + * other tunnel types. + */ + if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { + ti = le32_to_cpu(filters[i].element.tenant_id); + filters[i].element.tenant_id = cpu_to_le32(ti << 8); + } + } + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fbe34500ded2..dfecaeda0654 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -69,6 +69,15 @@ static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); +static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type); + /* i40e_pci_tbl - PCI Device ID Table * @@ -5480,7 +5489,11 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) **/ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) { + enum i40e_admin_queue_err last_aq_status; + struct i40e_cloud_filter *cfilter; struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; int ret, i; /* Reset rss size that was stored when reconfiguring rss for @@ -5521,6 +5534,29 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) "Failed to reset tx rate for ch->seid %u\n", ch->seid); + /* delete cloud filters associated with this channel */ + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + if (cfilter->seid != ch->seid) + continue; + + hash_del(&cfilter->cloud_node); + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, + cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, + false); + last_aq_status = pf->hw.aq.asq_last_status; + if (ret) + dev_info(&pf->pdev->dev, + "Failed to delete cloud filter, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, last_aq_status)); + kfree(cfilter); + } + /* delete VSI from FW */ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, NULL); @@ -5971,6 +6007,63 @@ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, return ch->initialized ? true : false; } +/** + * i40e_validate_and_set_switch_mode - sets up switch mode correctly + * @vsi: ptr to VSI which has PF backing + * + * Sets up switch mode correctly if it needs to be changed and perform + * what are allowed modes. + **/ +static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) +{ + u8 mode; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities); + if (ret) + return -EINVAL; + + if (hw->dev_caps.switch_mode) { + /* if switch mode is set, support mode2 (non-tunneled for + * cloud filter) for now + */ + u32 switch_mode = hw->dev_caps.switch_mode & + I40E_SWITCH_MODE_MASK; + if (switch_mode >= I40E_CLOUD_FILTER_MODE1) { + if (switch_mode == I40E_CLOUD_FILTER_MODE2) + return 0; + dev_err(&pf->pdev->dev, + "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n", + hw->dev_caps.switch_mode); + return -EINVAL; + } + } + + /* Set Bit 7 to be valid */ + mode = I40E_AQ_SET_SWITCH_BIT7_VALID; + + /* Set L4type to both TCP and UDP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + + /* Set cloud filter mode */ + mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; + + /* Prep mode field for set_switch_config */ + ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags, + pf->last_sw_conf_valid_flags, + mode, NULL); + if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH) + dev_err(&pf->pdev->dev, + "couldn't set switch config bits, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, + hw->aq.asq_last_status)); + + return ret; +} + /** * i40e_create_queue_channel - function to create channel * @vsi: VSI to be configured @@ -6750,13 +6843,720 @@ exit: return ret; } +/** + * i40e_set_cld_element - sets cloud filter element data + * @filter: cloud filter rule + * @cld: ptr to cloud filter element data + * + * This is helper function to copy data into cloud filter element + **/ +static inline void +i40e_set_cld_element(struct i40e_cloud_filter *filter, + struct i40e_aqc_cloud_filters_element_data *cld) +{ + int i, j; + u32 ipa; + + memset(cld, 0, sizeof(*cld)); + ether_addr_copy(cld->outer_mac, filter->dst_mac); + ether_addr_copy(cld->inner_mac, filter->src_mac); + + if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6) + return; + + if (filter->n_proto == ETH_P_IPV6) { +#define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) + for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6); + i++, j += 2) { + ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); + ipa = cpu_to_le32(ipa); + memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa)); + } + } else { + ipa = be32_to_cpu(filter->dst_ipv4); + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); + } + + cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id)); + + /* tenant_id is not supported by FW now, once the support is enabled + * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id) + */ + if (filter->tenant_id) + return; +} + +/** + * i40e_add_del_cloud_filter - Add/del cloud filter + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec. + * Returns 0 if the filter were successfully added. + **/ +static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) +{ + struct i40e_aqc_cloud_filters_element_data cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + static const u16 flag_table[128] = { + [I40E_CLOUD_FILTER_FLAGS_OMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN, + [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_IIP] = + I40E_AQC_ADD_CLOUD_FILTER_IIP, + }; + + if (filter->flags >= ARRAY_SIZE(flag_table)) + return I40E_ERR_CONFIG; + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter); + + if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE) + cld_filter.flags = cpu_to_le16(filter->tunnel_type << + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT); + + if (filter->n_proto == ETH_P_IPV6) + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + + if (add) + ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + else + ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n", + add ? "add" : "delete", filter->dst_port, ret, + pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d\n", + add ? "Added" : "Deleted", filter->seid); + return ret; +} + +/** + * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec using big buffer. + * Returns 0 if the filter were successfully added. + **/ +static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) +{ + struct i40e_aqc_cloud_filters_element_bb cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + + /* Both (src/dst) valid mac_addr are not supported */ + if ((is_valid_ether_addr(filter->dst_mac) && + is_valid_ether_addr(filter->src_mac)) || + (is_multicast_ether_addr(filter->dst_mac) && + is_multicast_ether_addr(filter->src_mac))) + return -EINVAL; + + /* Make sure port is specified, otherwise bail out, for channel + * specific cloud filter needs 'L4 port' to be non-zero + */ + if (!filter->dst_port) + return -EINVAL; + + /* adding filter using src_port/src_ip is not supported at this stage */ + if (filter->src_port || filter->src_ipv4 || + !ipv6_addr_any(&filter->ip.v6.src_ip6)) + return -EINVAL; + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter.element); + + if (is_valid_ether_addr(filter->dst_mac) || + is_valid_ether_addr(filter->src_mac) || + is_multicast_ether_addr(filter->dst_mac) || + is_multicast_ether_addr(filter->src_mac)) { + /* MAC + IP : unsupported mode */ + if (filter->dst_ipv4) + return -EINVAL; + + /* since we validated that L4 port must be valid before + * we get here, start with respective "flags" value + * and update if vlan is present or not + */ + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT); + + if (filter->vlan_id) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); + } + + } else if (filter->dst_ipv4 || + !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); + if (filter->n_proto == ETH_P_IPV6) + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + } else { + dev_err(&pf->pdev->dev, + "either mac or ip has to be valid for cloud filter\n"); + return -EINVAL; + } + + /* Now copy L4 port in Byte 6..7 in general fields */ + cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] = + be16_to_cpu(filter->dst_port); + + if (add) { + /* Validate current device switch mode, change if necessary */ + ret = i40e_validate_and_set_switch_mode(vsi); + if (ret) { + dev_err(&pf->pdev->dev, + "failed to set switch mode, ret %d\n", + ret); + return ret; + } + + ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } else { + ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } + + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter(big buffer) err %d aq_err %d\n", + add ? "add" : "delete", ret, pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d, L4 port: %d\n", + add ? "add" : "delete", filter->seid, + ntohs(filter->dst_port)); + return ret; +} + +/** + * i40e_parse_cls_flower - Parse tc flower filters provided by kernel + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_parse_cls_flower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *f, + struct i40e_cloud_filter *filter) +{ + u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; + struct i40e_pf *pf = vsi->back; + u8 field_flags = 0; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= I40E_CLOUD_FIELD_TEN_ID; + + filter->tenant_id = be32_to_cpu(key->keyid); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + filter->n_proto = n_proto_key & n_proto_mask; + filter->ip_proto = key->ip_proto; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= I40E_CLOUD_FIELD_OMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= I40E_CLOUD_FIELD_IMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + ether_addr_copy(filter->dst_mac, key->dst); + ether_addr_copy(filter->src_mac, key->src); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40E_CLOUD_FIELD_IVLAN; + + } else { + dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + + filter->vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + mask->dst = be32_to_cpu(mask->dst); + dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n", + &mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + mask->src = be32_to_cpu(mask->src); + dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n", + &mask->src); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & I40E_CLOUD_FIELD_TEN_ID) { + dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + filter->dst_ipv4 = key->dst; + filter->src_ipv4 = key->src; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* src and dest IPV6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1), which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&pf->pdev->dev, + "Bad ipv6, addr is LOOPBACK\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= I40E_CLOUD_FIELD_IIP; + + memcpy(&filter->src_ipv6, &key->src.s6_addr32, + sizeof(filter->src_ipv6)); + memcpy(&filter->dst_ipv6, &key->dst.s6_addr32, + sizeof(filter->dst_ipv6)); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + filter->dst_port = key->dst; + filter->src_port = key->src; + + switch (filter->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + dev_err(&pf->pdev->dev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + } + filter->flags = field_flags; + return 0; +} + +/** + * i40e_handle_tclass: Forward to a traffic class on the device + * @vsi: Pointer to VSI + * @tc: traffic class index on the device + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, + struct i40e_cloud_filter *filter) +{ + struct i40e_channel *ch, *ch_tmp; + + /* direct to a traffic class on the same device */ + if (tc == 0) { + filter->seid = vsi->seid; + return 0; + } else if (vsi->tc_config.enabled_tc & BIT(tc)) { + if (!filter->dst_port) { + dev_err(&vsi->back->pdev->dev, + "Specify destination port to direct to traffic class that is not default\n"); + return -EINVAL; + } + if (list_empty(&vsi->ch_list)) + return -EINVAL; + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, + list) { + if (ch->seid == vsi->tc_seid_map[tc]) + filter->seid = ch->seid; + } + return 0; + } + dev_err(&vsi->back->pdev->dev, "TC is not enabled\n"); + return -EINVAL; +} + +/** + * i40e_configure_clsflower - Configure tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * + **/ +static int i40e_configure_clsflower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + if (tc < 0) { + dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) + return -EBUSY; + + if (pf->fdir_pf_active_filters || + (!hlist_empty(&pf->fdir_filter_list))) { + dev_err(&vsi->back->pdev->dev, + "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n"); + return -EINVAL; + } + + if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) { + dev_err(&vsi->back->pdev->dev, + "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n"); + vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED; + vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->cookie = cls_flower->cookie; + + err = i40e_parse_cls_flower(vsi, cls_flower, filter); + if (err < 0) + goto err; + + err = i40e_handle_tclass(vsi, tc, filter); + if (err < 0) + goto err; + + /* Add cloud filter */ + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true); + else + err = i40e_add_del_cloud_filter(vsi, filter, true); + + if (err) { + dev_err(&pf->pdev->dev, + "Failed to add cloud filter, err %s\n", + i40e_stat_str(&pf->hw, err)); + err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); + goto err; + } + + /* add filter to the ordered list */ + INIT_HLIST_NODE(&filter->cloud_node); + + hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list); + + pf->num_cloud_filters++; + + return err; +err: + kfree(filter); + return err; +} + +/** + * i40e_find_cloud_filter - Find the could filter in the list + * @vsi: Pointer to VSI + * @cookie: filter specific cookie + * + **/ +static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, + unsigned long *cookie) +{ + struct i40e_cloud_filter *filter = NULL; + struct hlist_node *node2; + + hlist_for_each_entry_safe(filter, node2, + &vsi->back->cloud_filter_list, cloud_node) + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + return NULL; +} + +/** + * i40e_delete_clsflower - Remove tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * + **/ +static int i40e_delete_clsflower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie); + + if (!filter) + return -EINVAL; + + hash_del(&filter->cloud_node); + + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false); + else + err = i40e_add_del_cloud_filter(vsi, filter, false); + + kfree(filter); + if (err) { + dev_err(&pf->pdev->dev, + "Failed to delete cloud filter, err %s\n", + i40e_stat_str(&pf->hw, err)); + return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); + } + + pf->num_cloud_filters--; + if (!pf->num_cloud_filters) + if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && + !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { + pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } + return 0; +} + +/** + * i40e_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + **/ +static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40e_vsi *vsi = np->vsi; + + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return i40e_configure_clsflower(vsi, cls_flower); + case TC_CLSFLOWER_DESTROY: + return i40e_delete_clsflower(vsi, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct i40e_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40e_setup_tc_cls_flower(np, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int i40e_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb, + np, np); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { - if (type != TC_SETUP_MQPRIO) + switch (type) { + case TC_SETUP_MQPRIO: + return i40e_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return i40e_setup_tc_block(netdev, type_data); + default: return -EOPNOTSUPP; - - return i40e_setup_tc(netdev, type_data); + } } /** @@ -6954,6 +7754,13 @@ static void i40e_cloud_filter_exit(struct i40e_pf *pf) kfree(cfilter); } pf->num_cloud_filters = 0; + + if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && + !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { + pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } } /** @@ -8061,7 +8868,8 @@ end_reconstitute: * i40e_get_capabilities - get info about the HW * @pf: the PF struct **/ -static int i40e_get_capabilities(struct i40e_pf *pf) +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type) { struct i40e_aqc_list_capabilities_element_resp *cap_buf; u16 data_size; @@ -8076,9 +8884,8 @@ static int i40e_get_capabilities(struct i40e_pf *pf) /* this loads the data into the hw struct for us */ err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len, - &data_size, - i40e_aqc_opc_list_func_capabilities, - NULL); + &data_size, list_type, + NULL); /* data loaded, buffer no longer needed */ kfree(cap_buf); @@ -8095,26 +8902,44 @@ static int i40e_get_capabilities(struct i40e_pf *pf) } } while (err); - if (pf->hw.debug_mask & I40E_DEBUG_USER) - dev_info(&pf->pdev->dev, - "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", - pf->hw.pf_id, pf->hw.func_caps.num_vfs, - pf->hw.func_caps.num_msix_vectors, - pf->hw.func_caps.num_msix_vectors_vf, - pf->hw.func_caps.fd_filters_guaranteed, - pf->hw.func_caps.fd_filters_best_effort, - pf->hw.func_caps.num_tx_qp, - pf->hw.func_caps.num_vsis); - + if (pf->hw.debug_mask & I40E_DEBUG_USER) { + if (list_type == i40e_aqc_opc_list_func_capabilities) { + dev_info(&pf->pdev->dev, + "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", + pf->hw.pf_id, pf->hw.func_caps.num_vfs, + pf->hw.func_caps.num_msix_vectors, + pf->hw.func_caps.num_msix_vectors_vf, + pf->hw.func_caps.fd_filters_guaranteed, + pf->hw.func_caps.fd_filters_best_effort, + pf->hw.func_caps.num_tx_qp, + pf->hw.func_caps.num_vsis); + } else if (list_type == i40e_aqc_opc_list_dev_capabilities) { + dev_info(&pf->pdev->dev, + "switch_mode=0x%04x, function_valid=0x%08x\n", + pf->hw.dev_caps.switch_mode, + pf->hw.dev_caps.valid_functions); + dev_info(&pf->pdev->dev, + "SR-IOV=%d, num_vfs for all function=%u\n", + pf->hw.dev_caps.sr_iov_1_1, + pf->hw.dev_caps.num_vfs); + dev_info(&pf->pdev->dev, + "num_vsis=%u, num_rx:%u, num_tx=%u\n", + pf->hw.dev_caps.num_vsis, + pf->hw.dev_caps.num_rx_qp, + pf->hw.dev_caps.num_tx_qp); + } + } + if (list_type == i40e_aqc_opc_list_func_capabilities) { #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \ + pf->hw.func_caps.num_vfs) - if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) { - dev_info(&pf->pdev->dev, - "got num_vsis %d, setting num_vsis to %d\n", - pf->hw.func_caps.num_vsis, DEF_NUM_VSI); - pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + if (pf->hw.revision_id == 0 && + pf->hw.func_caps.num_vsis < DEF_NUM_VSI) { + dev_info(&pf->pdev->dev, + "got num_vsis %d, setting num_vsis to %d\n", + pf->hw.func_caps.num_vsis, DEF_NUM_VSI); + pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + } } - return 0; } @@ -8156,6 +8981,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; return; } } @@ -8177,6 +9003,45 @@ static void i40e_fdir_teardown(struct i40e_pf *pf) i40e_vsi_release(vsi); } +/** + * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs + * @vsi: PF main vsi + * @seid: seid of main or channel VSIs + * + * Rebuilds cloud filters associated with main VSI and channel VSIs if they + * existed before reset + **/ +static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid) +{ + struct i40e_cloud_filter *cfilter; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; + i40e_status ret; + + /* Add cloud filters back if they exist */ + hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, + cloud_node) { + if (cfilter->seid != seid) + continue; + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + + if (ret) { + dev_dbg(&pf->pdev->dev, + "Failed to rebuild cloud filter, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + return 0; +} + /** * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset * @vsi: PF main vsi @@ -8216,6 +9081,13 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) credits, ch->seid); } + ret = i40e_rebuild_cloud_filters(vsi, ch->seid); + if (ret) { + dev_dbg(&vsi->back->pdev->dev, + "Failed to rebuild cloud filters for channel VSI %u\n", + ch->seid); + return ret; + } } return 0; } @@ -8382,7 +9254,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_verify_eeprom(pf); i40e_clear_pxe_mode(hw); - ret = i40e_get_capabilities(pf); + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (ret) goto end_core_reset; @@ -8503,6 +9375,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) vsi->seid); } + ret = i40e_rebuild_cloud_filters(vsi, vsi->seid); + if (ret) + goto end_unlock; + /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs * for this main VSI if they exist */ @@ -9426,6 +10302,7 @@ static int i40e_init_msix(struct i40e_pf *pf) (pf->num_fdsb_msix == 0)) { dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && (pf->num_vmdq_msix == 0)) { @@ -9543,6 +10420,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; /* rework the queue expectations without MSIX */ i40e_determine_queue_usage(pf); @@ -10283,9 +11161,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* Enable filters and mark for reset */ if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) need_reset = true; - /* enable FD_SB only if there is MSI-X vector */ - if (pf->num_fdsb_msix > 0) + /* enable FD_SB only if there is MSI-X vector and no cloud + * filters exist + */ + if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) { pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } } else { /* turn off filters, mark for reset and clear SW filter list */ if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { @@ -10294,6 +11176,8 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) } pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_SB_AUTO_DISABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; @@ -10355,6 +11239,12 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); + if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + dev_err(&pf->pdev->dev, + "Offloaded tc filters active, can't turn hw_tc_offload off"); + return -EINVAL; + } + need_reset = i40e_set_ntuple(pf, features); if (need_reset) @@ -10874,7 +11764,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; @@ -12179,8 +13070,10 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) */ if ((pf->hw.pf_id == 0) && - !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) + !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) { flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; + pf->last_sw_conf_flags = flags; + } if (pf->hw.pf_id == 0) { u16 valid_flags; @@ -12196,6 +13089,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) pf->hw.aq.asq_last_status)); /* not a fatal problem, just keep going */ } + pf->last_sw_conf_valid_flags = valid_flags; } /* first time setup */ @@ -12293,6 +13187,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_DCB_ENABLED | I40E_FLAG_SRIOV_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED | I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | @@ -12307,6 +13202,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } else { /* Not enough queues for all TCs */ if ((pf->flags & I40E_FLAG_DCB_CAPABLE) && @@ -12330,6 +13226,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left -= 1; /* save 1 queue for FD */ } else { pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n"); } } @@ -12633,7 +13530,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); i40e_clear_pxe_mode(hw); - err = i40e_get_capabilities(pf); + err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 92869f57b52b..3bb6659db822 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -283,6 +283,22 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count); +enum i40e_status_code +i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count); +enum i40e_status_code +i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count); +i40e_status +i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count); i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw, struct i40e_lldp_variables *lldp_cfg); /* i40e_common */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index e4e5a0c864b7..00d4833e9925 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -291,6 +291,7 @@ struct i40e_hw_capabilities { #define I40E_CLOUD_FILTER_MODE1 0x6 #define I40E_CLOUD_FILTER_MODE2 0x7 #define I40E_CLOUD_FILTER_MODE3 0x8 +#define I40E_SWITCH_MODE_MASK 0xF u32 management_mode; u32 mng_protocols_over_mctp; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index af82c303de7b..06b04572c518 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1360,6 +1360,9 @@ struct i40e_aqc_cloud_filters_element_data { struct { u8 data[16]; } v6; + struct { + __le16 data[8]; + } raw_v6; } ipaddr; __le16 flags; #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0 From c0752f2bd6ee77aa9334da4f69e8f54a325d282b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:42:03 +0300 Subject: [PATCH 1609/2177] net/mlx5e: Introduce stats group API Currently the mlx5e driver has multiple groups of stats, each group is used for different purposes and it may depend on hardware capabilities or not. The problem with the current implementation is that there is no clear API to create a new group of stats. This change define a new API to create a group of stats and simplifies the way of handling them by defining a new struct "mlx5e_stats_grp" which have the following three function pointers: - get_num_stats() - return the number of counters in the group. - fill_strings() - fill counters strings within the group. - fill_stats() - fill counters values within the group. The above function pointers are used within the ethtool callbaks while calling "ethtool -S" from userspace. This change also switch the SW group to use the new API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 16 +-- .../ethernet/mellanox/mlx5/core/en_stats.c | 107 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 50 ++------ 4 files changed, 127 insertions(+), 48 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 100fe4ecad9b..f391c7cb7656 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ - en_tx.o en_rx.o en_rx_am.o en_txrx.o vxlan.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o en_stats.o vxlan.o \ en_arfs.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 81a112e40fe3..dfc440d7278d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -176,9 +176,13 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { + int i, num_stats = 0; + switch (sset) { case ETH_SS_STATS: - return NUM_SW_COUNTERS + + for (i = 0; i < mlx5e_num_stats_grps; i++) + num_stats += mlx5e_stats_grps[i].get_num_stats(priv); + return num_stats + MLX5E_NUM_Q_CNTRS(priv) + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + NUM_PCIE_COUNTERS(priv) + @@ -211,9 +215,8 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) int i, j, tc, prio, idx = 0; unsigned long pfc_combined; - /* SW counters */ - for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); /* Q counters */ for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) @@ -354,9 +357,8 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, channels = &priv->channels; mutex_unlock(&priv->state_lock); - for (i = 0; i < NUM_SW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, - sw_stats_desc, i); + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c new file mode 100644 index 000000000000..25a1a6d8aa9d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) + +static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_SW_COUNTERS; +} + +static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); + return idx; +} + +const struct mlx5e_stats_grp mlx5e_stats_grps[] = { + { + .get_num_stats = mlx5e_grp_sw_get_num_stats, + .fill_strings = mlx5e_grp_sw_fill_strings, + .fill_stats = mlx5e_grp_sw_fill_stats, + } +}; + +const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f8637213afc0..6d2d8abf1929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -91,45 +91,6 @@ struct mlx5e_sw_stats { u64 link_down_events_phy; }; -static const struct counter_desc sw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, -}; - struct mlx5e_qcounter_stats { u32 rx_out_of_buffer; }; @@ -423,7 +384,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) #define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) #define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) @@ -470,6 +430,16 @@ struct mlx5e_stats { struct mlx5e_pcie_stats pcie; }; +struct mlx5e_priv; +struct mlx5e_stats_grp { + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); +}; + +extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; +extern const int mlx5e_num_stats_grps; + static const struct counter_desc mlx5e_pme_status_desc[] = { { "module_unplug", 8 }, }; From fd8dcdb8d2e72f6ab7caf63392bece01344b4b47 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:46:33 +0300 Subject: [PATCH 1610/2177] net/mlx5e: Switch Q counters to use the stats group API Switch the Q counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 10 ------ .../ethernet/mellanox/mlx5/core/en_stats.c | 36 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/en_stats.h | 5 --- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index dfc440d7278d..35ba40af2a73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -166,7 +166,6 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) return err ? false : rx_pause | tx_pause; } -#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) #define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) #define MLX5E_NUM_SQ_STATS(priv) \ (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) @@ -183,7 +182,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) for (i = 0; i < mlx5e_num_stats_grps; i++) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats + - MLX5E_NUM_Q_CNTRS(priv) + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + @@ -218,10 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - /* Q counters */ - for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); - /* VPORT counters */ for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, @@ -360,10 +354,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, - q_stats_desc, i); - for (i = 0; i < NUM_VPORT_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, vport_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 25a1a6d8aa9d..64b344720a31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -96,12 +96,46 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) + +static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) +{ + return priv->q_counter ? NUM_Q_COUNTERS : 0; +} + +static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, .fill_strings = mlx5e_grp_sw_fill_strings, .fill_stats = mlx5e_grp_sw_fill_stats, - } + }, + { + .get_num_stats = mlx5e_grp_q_get_num_stats, + .fill_strings = mlx5e_grp_q_fill_strings, + .fill_stats = mlx5e_grp_q_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6d2d8abf1929..b82ecb1fa353 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -95,10 +95,6 @@ struct mlx5e_qcounter_stats { u32 rx_out_of_buffer; }; -static const struct counter_desc q_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, -}; - #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ vstats->query_vport_out, c) @@ -384,7 +380,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) #define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) From 40cab9f16cc378f61f5cff0710cdd6caa7db549b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:47:58 +0300 Subject: [PATCH 1611/2177] net/mlx5e: Switch vport counters to use the stats group API Switch the vport counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 11 +-- .../ethernet/mellanox/mlx5/core/en_stats.c | 77 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 45 ----------- 3 files changed, 78 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 35ba40af2a73..08089f1d2c4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -182,7 +182,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) for (i = 0; i < mlx5e_num_stats_grps; i++) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats + - NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + + NUM_PPORT_COUNTERS(priv) + NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + @@ -216,11 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_stats_desc[i].format); - /* PPORT counters */ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, @@ -354,10 +349,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, - vport_stats_desc, i); - for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, pport_802_3_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 64b344720a31..199e34f204a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -125,6 +125,78 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) + +static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_VPORT_COUNTERS; +} + +static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -136,6 +208,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_q_fill_strings, .fill_stats = mlx5e_grp_q_fill_stats, }, + { + .get_num_stats = mlx5e_grp_vport_get_num_stats, + .fill_strings = mlx5e_grp_vport_fill_strings, + .fill_stats = mlx5e_grp_vport_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index b82ecb1fa353..610208aed767 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -95,7 +95,6 @@ struct mlx5e_qcounter_stats { u32 rx_out_of_buffer; }; -#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ vstats->query_vport_out, c) @@ -103,49 +102,6 @@ struct mlx5e_vport_stats { __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; }; -static const struct counter_desc vport_stats_desc[] = { - { "rx_vport_unicast_packets", - VPORT_COUNTER_OFF(received_eth_unicast.packets) }, - { "rx_vport_unicast_bytes", - VPORT_COUNTER_OFF(received_eth_unicast.octets) }, - { "tx_vport_unicast_packets", - VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, - { "tx_vport_unicast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, - { "rx_vport_multicast_packets", - VPORT_COUNTER_OFF(received_eth_multicast.packets) }, - { "rx_vport_multicast_bytes", - VPORT_COUNTER_OFF(received_eth_multicast.octets) }, - { "tx_vport_multicast_packets", - VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, - { "tx_vport_multicast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, - { "rx_vport_broadcast_packets", - VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, - { "rx_vport_broadcast_bytes", - VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, - { "tx_vport_broadcast_packets", - VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, - { "tx_vport_broadcast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, - { "rx_vport_rdma_unicast_packets", - VPORT_COUNTER_OFF(received_ib_unicast.packets) }, - { "rx_vport_rdma_unicast_bytes", - VPORT_COUNTER_OFF(received_ib_unicast.octets) }, - { "tx_vport_rdma_unicast_packets", - VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, - { "tx_vport_rdma_unicast_bytes", - VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, - { "rx_vport_rdma_multicast_packets", - VPORT_COUNTER_OFF(received_ib_multicast.packets) }, - { "rx_vport_rdma_multicast_bytes", - VPORT_COUNTER_OFF(received_ib_multicast.octets) }, - { "tx_vport_rdma_multicast_packets", - VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, - { "tx_vport_rdma_multicast_bytes", - VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, -}; - #define PPORT_802_3_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) @@ -380,7 +336,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) From 6e6ef814d27b1fb3cd30fe62d700208065656401 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:49:57 +0300 Subject: [PATCH 1612/2177] net/mlx5e: Switch IEEE 802.3 counters to use stats group API Switch the IEEE 802.3 counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 9 --- .../ethernet/mellanox/mlx5/core/en_stats.c | 57 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 28 +-------- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 08089f1d2c4c..65e7c0a986ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -216,11 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_802_3_stats_desc[i].format); - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); @@ -349,10 +344,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, - pport_802_3_stats_desc, i); - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, pport_2863_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 199e34f204a6..1ce296e4c9f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -197,6 +197,58 @@ static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) + +static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_802_3_COUNTERS; +} + +static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -213,6 +265,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_vport_fill_strings, .fill_stats = mlx5e_grp_vport_fill_stats, }, + { + .get_num_stats = mlx5e_grp_802_3_get_num_stats, + .fill_strings = mlx5e_grp_802_3_fill_strings, + .fill_stats = mlx5e_grp_802_3_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 610208aed767..5a489aa8fef3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -102,9 +102,6 @@ struct mlx5e_vport_stats { __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; }; -#define PPORT_802_3_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) #define PPORT_802_3_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) @@ -150,27 +147,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_802_3_stats_desc[] = { - { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, - { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, - { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, - { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, - { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, - { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, - { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, - { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, - { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, - { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, - { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, - { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, - { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, - { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, - { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, - { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, - { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, - { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, -}; - static const struct counter_desc pport_2863_stats_desc[] = { { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, @@ -336,7 +312,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ @@ -358,8 +333,7 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ - NUM_PPORT_2863_COUNTERS + \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ From fc8e64a3118ee13bc4cafa6a31ea74daf2d644d9 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:51:27 +0300 Subject: [PATCH 1613/2177] net/mlx5e: Switch RFC 2863 counters to use stats group API Switch the RFC 2863 counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ---- .../ethernet/mellanox/mlx5/core/en_stats.c | 42 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 13 +----- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 65e7c0a986ca..79ac92a2834f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -216,10 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2863_stats_desc[i].format); - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); @@ -344,10 +340,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, - pport_2863_stats_desc, i); - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1ce296e4c9f5..1b2fc0eb1b3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -249,6 +249,43 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) + +static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2863_COUNTERS; +} + +static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -270,6 +307,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_802_3_fill_strings, .fill_stats = mlx5e_grp_802_3_fill_stats, }, + { + .get_num_stats = mlx5e_grp_2863_get_num_stats, + .fill_strings = mlx5e_grp_2863_fill_strings, + .fill_stats = mlx5e_grp_2863_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 5a489aa8fef3..58dc22e5ac53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -105,9 +105,6 @@ struct mlx5e_vport_stats { #define PPORT_802_3_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) -#define PPORT_2863_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_2863_cntrs_grp_data_layout.c##_high) #define PPORT_2863_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -147,12 +144,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_2863_stats_desc[] = { - { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, - { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, - { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, -}; - static const struct counter_desc pport_2819_stats_desc[] = { { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, @@ -312,7 +303,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ @@ -333,8 +323,7 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_2863_COUNTERS + \ - NUM_PPORT_2819_COUNTERS + \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO + \ From e0e0def9e2a8c6320b5d8cb5b79b795a5a7df732 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:56:48 +0300 Subject: [PATCH 1614/2177] net/mlx5e: Switch RFC 2819 counters to use stats group API Switch the RFC 2819 counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 8 --- .../ethernet/mellanox/mlx5/core/en_stats.c | 52 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 23 +------- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 79ac92a2834f..b5a9de1150f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -216,10 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2819_stats_desc[i].format); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); @@ -340,10 +336,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, - pport_2819_stats_desc, i); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1b2fc0eb1b3f..7cdcc60e913b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -286,6 +286,53 @@ static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) + +static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2819_COUNTERS; +} + +static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -312,6 +359,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_2863_fill_strings, .fill_stats = mlx5e_grp_2863_fill_stats, }, + { + .get_num_stats = mlx5e_grp_2819_get_num_stats, + .fill_strings = mlx5e_grp_2819_fill_strings, + .fill_stats = mlx5e_grp_2819_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 58dc22e5ac53..168fad3ab2aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -108,9 +108,6 @@ struct mlx5e_vport_stats { #define PPORT_2863_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) -#define PPORT_2819_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_2819_cntrs_grp_data_layout.c##_high) #define PPORT_2819_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) @@ -144,22 +141,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_2819_stats_desc[] = { - { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, - { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, - { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, - { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, - { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, - { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, - { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, - { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, - { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, - { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, - { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, - { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, - { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, -}; - static const struct counter_desc pport_phy_statistical_stats_desc[] = { { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, @@ -303,7 +284,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) @@ -323,8 +303,7 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_2819_COUNTERS + \ - NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO + \ NUM_PPORT_ETH_EXT_COUNTERS(priv)) From 2e4df0b2415744548f612f26a33c701bd2ce37d8 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 14:58:47 +0300 Subject: [PATCH 1615/2177] net/mlx5e: Switch physical statistical counters to use stats group API Switch the physical statistical counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ---- .../ethernet/mellanox/mlx5/core/en_stats.c | 45 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 14 +----- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b5a9de1150f9..547b7fa48637 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -216,10 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_phy_statistical_stats_desc[i].format); - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_eth_ext_stats_desc[i].format); @@ -336,10 +332,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_stats_desc, i); - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, pport_eth_ext_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 7cdcc60e913b..3838f109ceb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -333,6 +333,46 @@ static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + +#define NUM_PPORT_PHY_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc) + +static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) +{ + return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_COUNTERS : 0; +} + +static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -364,6 +404,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_2819_fill_strings, .fill_stats = mlx5e_grp_2819_fill_stats, }, + { + .get_num_stats = mlx5e_grp_phy_get_num_stats, + .fill_strings = mlx5e_grp_phy_fill_strings, + .fill_stats = mlx5e_grp_phy_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 168fad3ab2aa..964a1c69cb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -111,9 +111,6 @@ struct mlx5e_vport_stats { #define PPORT_2819_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) -#define PPORT_PHY_STATISTICAL_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.phys_layer_statistical_cntrs.c##_high) #define PPORT_PHY_STATISTICAL_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -141,11 +138,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_phy_statistical_stats_desc[] = { - { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, - { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, -}; - static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, @@ -284,9 +276,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ - (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ - MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) #define NUM_PCIE_PERF_COUNTERS(priv) \ (ARRAY_SIZE(pcie_perf_stats_desc) * \ MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) @@ -303,8 +292,7 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ - NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO + \ NUM_PPORT_ETH_EXT_COUNTERS(priv)) #define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ From 3488bd4c3549ea805c48de522a8ede1edc902e4b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:00:40 +0300 Subject: [PATCH 1616/2177] net/mlx5e: Switch ethernet extended counters to use stats group API Switch the ethernet extended counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ---- .../ethernet/mellanox/mlx5/core/en_stats.c | 47 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 13 +---- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 547b7fa48637..2da0bb88fa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -216,10 +216,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_eth_ext_stats_desc[i].format); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pcie_perf_stats_desc[i].format); @@ -332,10 +328,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, - pport_eth_ext_stats_desc, i); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, pcie_perf_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 3838f109ceb1..a4edfd8726f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -373,6 +373,48 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + +#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) + +static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv) +{ + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + return NUM_PPORT_ETH_EXT_COUNTERS; + + return 0; +} + +static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -409,6 +451,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_phy_fill_strings, .fill_stats = mlx5e_grp_phy_fill_stats, }, + { + .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, + .fill_strings = mlx5e_grp_eth_ext_fill_strings, + .fill_stats = mlx5e_grp_eth_ext_fill_stats, + } }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 964a1c69cb51..c0e84394c3fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -121,9 +121,6 @@ struct mlx5e_vport_stats { MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) #define NUM_PPORT_PRIO 8 -#define PPORT_ETH_EXT_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout.c##_high) #define PPORT_ETH_EXT_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -154,10 +151,6 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; -static const struct counter_desc pport_eth_ext_stats_desc[] = { - { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, -}; - #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ @@ -289,12 +282,8 @@ static const struct counter_desc sq_stats_desc[] = { ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) -#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ - (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ - MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) #define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO + \ - NUM_PPORT_ETH_EXT_COUNTERS(priv)) + NUM_PPORT_PRIO) #define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ NUM_PCIE_PERF_COUNTERS64(priv) +\ NUM_PCIE_PERF_STALL_COUNTERS(priv)) From 9fd2b5f137f5f723c03f5018acb822dcebfba8f0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:01:58 +0300 Subject: [PATCH 1617/2177] net/mlx5e: Switch pcie counters to use stats group API Switch the pcie counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 25 ----- .../ethernet/mellanox/mlx5/core/en_stats.c | 94 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/en_stats.h | 32 ------- 3 files changed, 93 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2da0bb88fa39..8515ae815cbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -183,7 +183,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats + NUM_PPORT_COUNTERS(priv) + - NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -216,18 +215,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc64[i].format); - - for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stall_stats_desc[i].format); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -328,18 +315,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc64, i); - - for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stall_stats_desc, i); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a4edfd8726f3..700362a00fd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -415,6 +415,93 @@ static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + +#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) +#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) +#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) + +static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv) +{ + int num_stats = 0; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + num_stats += NUM_PCIE_PERF_COUNTERS; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + num_stats += NUM_PCIE_PERF_COUNTERS64; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + num_stats += NUM_PCIE_PERF_STALL_COUNTERS; + + return num_stats; +} + +static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -455,7 +542,12 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, .fill_strings = mlx5e_grp_eth_ext_fill_strings, .fill_stats = mlx5e_grp_eth_ext_fill_stats, - } + }, + { + .get_num_stats = mlx5e_grp_pcie_get_num_stats, + .fill_strings = mlx5e_grp_pcie_fill_strings, + .fill_stats = mlx5e_grp_pcie_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index c0e84394c3fc..8afff1981b25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -151,14 +151,10 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; -#define PCIE_PERF_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_PERF_OFF64(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) #define PCIE_PERF_GET64(pcie_stats, c) \ MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) @@ -167,22 +163,6 @@ struct mlx5e_pcie_stats { __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; }; -static const struct counter_desc pcie_perf_stats_desc[] = { - { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, - { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, -}; - -static const struct counter_desc pcie_perf_stats_desc64[] = { - { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, -}; - -static const struct counter_desc pcie_perf_stall_stats_desc[] = { - { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, - { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, - { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, - { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, -}; - struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -269,24 +249,12 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PCIE_PERF_COUNTERS(priv) \ - (ARRAY_SIZE(pcie_perf_stats_desc) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) -#define NUM_PCIE_PERF_COUNTERS64(priv) \ - (ARRAY_SIZE(pcie_perf_stats_desc64) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) -#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \ - (ARRAY_SIZE(pcie_perf_stall_stats_desc) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) #define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ - NUM_PCIE_PERF_COUNTERS64(priv) +\ - NUM_PCIE_PERF_STALL_COUNTERS(priv)) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) From e6000651cf009280fedee6cbf951747a0beaffb4 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:03:03 +0300 Subject: [PATCH 1618/2177] net/mlx5e: Switch per prio traffic counters to use stats group API Switch the per prio traffic counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 13 ----- .../ethernet/mellanox/mlx5/core/en_stats.c | 50 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 11 ---- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8515ae815cbc..b17460e14b29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -182,7 +182,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) for (i = 0; i < mlx5e_num_stats_grps; i++) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats + - NUM_PPORT_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -215,12 +214,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_traffic_stats_desc[i].format, prio); - } - pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { @@ -315,12 +308,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_traffic_stats_desc, i); - } - pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 700362a00fd0..5d00d38f9bd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -502,6 +502,51 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc) + +static int mlx5e_grp_per_prio_traffic_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -548,6 +593,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_pcie_fill_strings, .fill_stats = mlx5e_grp_pcie_fill_stats, }, + { + .get_num_stats = mlx5e_grp_per_prio_traffic_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_traffic_fill_strings, + .fill_stats = mlx5e_grp_per_prio_traffic_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 8afff1981b25..8b5b622306a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -135,13 +135,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { - { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, - { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, - { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, - { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, -}; - static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { /* %s is "global" or "prio{i}" */ { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, @@ -249,12 +242,8 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ - ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) From 4377bea27696f45834d77b8e3c7206874d5a66e6 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:04:47 +0300 Subject: [PATCH 1619/2177] net/mlx5e: Switch per prio pfc counters to use stats group API Switch the per prio pfc counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 73 +----------- .../ethernet/mellanox/mlx5/core/en_stats.c | 111 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 14 --- 3 files changed, 113 insertions(+), 85 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b17460e14b29..6680b24bf1c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -136,42 +136,9 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); } -static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - u8 pfc_en_tx; - u8 pfc_en_rx; - int err; - - if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return 0; - - err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); - - return err ? 0 : pfc_en_tx | pfc_en_rx; -} - -static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - u32 rx_pause; - u32 tx_pause; - int err; - - if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return false; - - err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); - - return err ? false : rx_pause | tx_pause; -} - #define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) #define MLX5E_NUM_SQ_STATS(priv) \ (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) -#define MLX5E_NUM_PFC_COUNTERS(priv) \ - ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ - NUM_PPORT_PER_PRIO_PFC_COUNTERS) int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { @@ -184,7 +151,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) return num_stats + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + - MLX5E_NUM_PFC_COUNTERS(priv) + ARRAY_SIZE(mlx5e_pme_status_desc) + ARRAY_SIZE(mlx5e_pme_error_desc) + mlx5e_ipsec_get_count(priv); @@ -208,30 +174,11 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { - int i, j, tc, prio, idx = 0; - unsigned long pfc_combined; + int i, j, tc, idx = 0; for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - pfc_combined = mlx5e_query_pfc_combined(priv); - for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - char pfc_string[ETH_GSTRING_LEN]; - - snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, pfc_string); - } - } - - if (mlx5e_query_global_pause_combined(priv)) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, "global"); - } - } - /* port module event counters */ for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); @@ -293,8 +240,7 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, { struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; - int i, j, tc, prio, idx = 0; - unsigned long pfc_combined; + int i, j, tc, idx = 0; if (!data) return; @@ -308,21 +254,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - pfc_combined = mlx5e_query_pfc_combined(priv); - for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_pfc_stats_desc, i); - } - } - - if (mlx5e_query_global_pause_combined(priv)) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_per_prio_pfc_stats_desc, i); - } - } - /* port module event counters */ mlx5_priv = &priv->mdev->priv; for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 5d00d38f9bd0..c9f3be940934 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -502,6 +502,9 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, @@ -547,6 +550,109 @@ static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, return idx; } +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) +{ + return (mlx5e_query_global_pause_combined(priv) + + hweight8(mlx5e_query_pfc_combined(priv))) * + NUM_PPORT_PER_PRIO_PFC_COUNTERS; +} + +static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + return idx; +} + +static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -598,6 +704,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_per_prio_traffic_fill_strings, .fill_stats = mlx5e_grp_per_prio_traffic_fill_stats, }, + { + .get_num_stats = mlx5e_grp_per_prio_pfc_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_pfc_fill_strings, + .fill_stats = mlx5e_grp_per_prio_pfc_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 8b5b622306a9..8e92cd80c638 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -114,9 +114,6 @@ struct mlx5e_vport_stats { #define PPORT_PHY_STATISTICAL_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ counter_set.phys_layer_statistical_cntrs.c##_high) -#define PPORT_PER_PRIO_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_per_prio_grp_data_layout.c##_high) #define PPORT_PER_PRIO_GET(pstats, prio, c) \ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -135,15 +132,6 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - /* %s is "global" or "prio{i}" */ - { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, -}; - #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) @@ -242,8 +230,6 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; -#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ - ARRAY_SIZE(pport_per_prio_pfc_stats_desc) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) From 0e6f01a49d9c006e3ee3104ca5d4ccf722e154db Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:05:58 +0300 Subject: [PATCH 1620/2177] net/mlx5e: Switch pme counters to use stats group API Switch the pme counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 20 ------- .../ethernet/mellanox/mlx5/core/en_stats.c | 54 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 10 ---- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6680b24bf1c7..6de948819034 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -151,8 +151,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) return num_stats + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + - ARRAY_SIZE(mlx5e_pme_status_desc) + - ARRAY_SIZE(mlx5e_pme_error_desc) + mlx5e_ipsec_get_count(priv); case ETH_SS_PRIV_FLAGS: @@ -179,13 +177,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - /* port module event counters */ - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); - - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); - /* IPSec counters */ idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); @@ -239,7 +230,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data) { struct mlx5e_channels *channels; - struct mlx5_priv *mlx5_priv; int i, j, tc, idx = 0; if (!data) @@ -254,16 +244,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - /* port module event counters */ - mlx5_priv = &priv->mdev->priv; - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, - mlx5e_pme_status_desc, i); - - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, - mlx5e_pme_error_desc, i); - /* IPSec counters */ idx += mlx5e_ipsec_get_stats(priv, data + idx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index c9f3be940934..b120957ea940 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -653,6 +653,55 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, return idx; } +static const struct counter_desc mlx5e_pme_status_desc[] = { + { "module_unplug", 8 }, +}; + +static const struct counter_desc mlx5e_pme_error_desc[] = { + { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ + { "module_high_temp", 48 }, /* high temperature */ + { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ +}; + +#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) +#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) + +static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; +} + +static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + + return idx; +} + +static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + mlx5e_pme_status_desc, i); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + mlx5e_pme_error_desc, i); + + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -709,6 +758,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_per_prio_pfc_fill_strings, .fill_stats = mlx5e_grp_per_prio_pfc_fill_stats, }, + { + .get_num_stats = mlx5e_grp_pme_get_num_stats, + .fill_strings = mlx5e_grp_pme_fill_strings, + .fill_stats = mlx5e_grp_pme_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 8e92cd80c638..800d72a22d91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -252,14 +252,4 @@ struct mlx5e_stats_grp { extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; extern const int mlx5e_num_stats_grps; -static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, -}; - -static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ -}; - #endif /* __MLX5_EN_STATS_H__ */ From e185d43f59ccde68dff474f1f2b38b62f915d74c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:07:20 +0300 Subject: [PATCH 1621/2177] net/mlx5e: Switch ipsec counters to use stats group API Switch the ipsec counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +------- .../ethernet/mellanox/mlx5/core/en_stats.c | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6de948819034..ff21348b7623 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -31,7 +31,6 @@ */ #include "en.h" -#include "en_accel/ipsec.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -150,8 +149,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats + MLX5E_NUM_RQ_STATS(priv) + - MLX5E_NUM_SQ_STATS(priv) + - mlx5e_ipsec_get_count(priv); + MLX5E_NUM_SQ_STATS(priv); case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(mlx5e_priv_flags); @@ -177,9 +175,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - /* IPSec counters */ - idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -244,9 +239,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - /* IPSec counters */ - idx += mlx5e_ipsec_get_stats(priv, data + idx); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index b120957ea940..930a8224e013 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -31,6 +31,7 @@ */ #include "en.h" +#include "en_accel/ipsec.h" static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, @@ -702,6 +703,24 @@ static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_ipsec_get_count(priv); +} + +static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_strings(priv, + data + idx * ETH_GSTRING_LEN); +} + +static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_stats(priv, data + idx); +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -763,6 +782,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_pme_fill_strings, .fill_stats = mlx5e_grp_pme_fill_stats, }, + { + .get_num_stats = mlx5e_grp_ipsec_get_num_stats, + .fill_strings = mlx5e_grp_ipsec_fill_strings, + .fill_stats = mlx5e_grp_ipsec_fill_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); From 1fe850062c9ee15a3bea1ae90aef386a492a1c5e Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 23 Aug 2017 15:08:19 +0300 Subject: [PATCH 1622/2177] net/mlx5e: Switch channels counters to use stats group API Switch the channels counters to use the new stats group API. Signed-off-by: Kamal Heib Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 47 +------- .../ethernet/mellanox/mlx5/core/en_stats.c | 103 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_stats.h | 45 -------- 3 files changed, 106 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ff21348b7623..b34aa8efb036 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -135,10 +135,6 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); } -#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) -#define MLX5E_NUM_SQ_STATS(priv) \ - (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) - int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { int i, num_stats = 0; @@ -147,10 +143,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) case ETH_SS_STATS: for (i = 0; i < mlx5e_num_stats_grps; i++) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); - return num_stats + - MLX5E_NUM_RQ_STATS(priv) + - MLX5E_NUM_SQ_STATS(priv); - + return num_stats; case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(mlx5e_priv_flags); case ETH_SS_TEST: @@ -170,26 +163,10 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { - int i, j, tc, idx = 0; + int i, idx = 0; for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return; - - /* per channel counters */ - for (i = 0; i < priv->channels.num; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - rq_stats_desc[j].format, i); - - for (tc = 0; tc < priv->channels.params.num_tc; tc++) - for (i = 0; i < priv->channels.num; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - sq_stats_desc[j].format, - priv->channel_tc2txq[i][tc]); } void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) @@ -224,8 +201,7 @@ static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data) { - struct mlx5e_channels *channels; - int i, j, tc, idx = 0; + int i, idx = 0; if (!data) return; @@ -233,27 +209,10 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_stats(priv, true); - channels = &priv->channels; mutex_unlock(&priv->state_lock); for (i = 0; i < mlx5e_num_stats_grps; i++) idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return; - - /* per channel counters */ - for (i = 0; i < channels->num; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, - rq_stats_desc, j); - - for (tc = 0; tc < priv->channels.params.num_tc; tc++) - for (i = 0; i < channels->num; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, - sq_stats_desc, j); } static void mlx5e_get_ethtool_stats(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 930a8224e013..8bc30484ecc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -721,6 +721,104 @@ static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx + mlx5e_ipsec_get_stats(priv, data + idx); } +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, +}; + +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) +{ + return (NUM_RQ_STATS * priv->channels.num) + + (NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc); +} + +static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i, j, tc; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return idx; + + for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + priv->channel_tc2txq[i][tc]); + + return idx; +} + +static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + struct mlx5e_channels *channels = &priv->channels; + int i, j, tc; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return idx; + + for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, + rq_stats_desc, j); + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, + sq_stats_desc, j); + + return idx; +} + const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, @@ -787,6 +885,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .fill_strings = mlx5e_grp_ipsec_fill_strings, .fill_stats = mlx5e_grp_ipsec_fill_stats, }, + { + .get_num_stats = mlx5e_grp_channels_get_num_stats, + .fill_strings = mlx5e_grp_channels_fill_strings, + .fill_stats = mlx5e_grp_channels_fill_stats, + } }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 800d72a22d91..d094663edd9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -169,31 +169,6 @@ struct mlx5e_rq_stats { u64 cache_waive; }; -static const struct counter_desc rq_stats_desc[] = { - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, -}; - struct mlx5e_sq_stats { /* commonly accessed in data path */ u64 packets; @@ -213,26 +188,6 @@ struct mlx5e_sq_stats { u64 dropped; }; -static const struct counter_desc sq_stats_desc[] = { - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, -}; - -#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) -#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) - struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; From a4346210c4e092de50594b728300766121a3b00d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 31 Oct 2017 17:36:42 +0100 Subject: [PATCH 1623/2177] l2tp: remove ->ref() and ->deref() The ->ref() and ->deref() callbacks are unused since PPP stopped using them in ee40fb2e1eb5 ("l2tp: protect sock pointer of struct pppol2tp_session with RCU"). We can thus remove them from struct l2tp_session and drop the do_ref parameter of l2tp_session_get*(). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 52 +++++++---------------------------------- net/l2tp/l2tp_core.h | 10 +++----- net/l2tp/l2tp_debugfs.c | 4 +--- net/l2tp/l2tp_ip.c | 4 +--- net/l2tp/l2tp_ip6.c | 4 +--- net/l2tp/l2tp_netlink.c | 20 +++++++--------- net/l2tp/l2tp_ppp.c | 10 +++----- 7 files changed, 25 insertions(+), 79 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a1d56e143fcd..216f49aec16f 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -216,12 +216,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) } EXPORT_SYMBOL_GPL(l2tp_tunnel_get); -/* Lookup a session. A new reference is held on the returned session. - * Optionally calls session->ref() too if do_ref is true. - */ +/* Lookup a session. A new reference is held on the returned session. */ struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, - u32 session_id, bool do_ref) + u32 session_id) { struct hlist_head *session_list; struct l2tp_session *session; @@ -235,8 +233,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net, hlist_for_each_entry_rcu(session, session_list, global_hlist) { if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); rcu_read_unlock_bh(); return session; @@ -252,8 +248,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net, hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; @@ -265,8 +259,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get); -struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, - bool do_ref) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; struct l2tp_session *session; @@ -277,8 +270,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -295,8 +286,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth); * This is very inefficient but is only used by management interfaces. */ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, - const char *ifname, - bool do_ref) + const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -307,8 +297,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); rcu_read_unlock_bh(); return session; @@ -489,9 +477,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); else kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); } /* Dequeue skbs from the session's reorder_q, subject to packet order. @@ -520,8 +505,6 @@ start: session->reorder_skip = 1; __skb_unlink(skb, &session->reorder_q); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); continue; } @@ -694,9 +677,6 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. - * - * session->ref() must have been called prior to l2tp_recv_common(). - * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -863,9 +843,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, discard: atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -879,8 +856,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session) while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); } return 0; } @@ -972,13 +947,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id); if (!session || !session->recv_skb) { - if (session) { - if (session->deref) - session->deref(session); + if (session) l2tp_session_dec_refcount(session); - } /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, @@ -1322,9 +1294,6 @@ again: if (test_and_set_bit(0, &session->dead)) goto again; - if (session->ref != NULL) - (*session->ref)(session); - write_unlock_bh(&tunnel->hlist_lock); __l2tp_session_unhash(session); @@ -1333,9 +1302,6 @@ again: if (session->session_close != NULL) (*session->session_close)(session); - if (session->deref != NULL) - (*session->deref)(session); - l2tp_session_dec_refcount(session); write_lock_bh(&tunnel->hlist_lock); @@ -1759,15 +1725,13 @@ int l2tp_session_delete(struct l2tp_session *session) if (test_and_set_bit(0, &session->dead)) return 0; - if (session->ref) - (*session->ref)(session); __l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); - if (session->deref) - (*session->deref)(session); + l2tp_session_dec_refcount(session); + return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 77caa5966736..92c90e72259c 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -129,8 +129,6 @@ struct l2tp_session { int (*build_header)(struct l2tp_session *session, void *buf); void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); void (*session_close)(struct l2tp_session *session); - void (*ref)(struct l2tp_session *session); - void (*deref)(struct l2tp_session *session); #if IS_ENABLED(CONFIG_L2TP_DEBUGFS) void (*show)(struct seq_file *m, void *priv); #endif @@ -245,12 +243,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, - u32 session_id, bool do_ref); -struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, - bool do_ref); + u32 session_id); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, - const char *ifname, - bool do_ref); + const char *ifname); struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 53bae54c4d6e..eb69411bcb47 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; if (pd->session == NULL) { @@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) l2tp_dfs_seq_tunnel_show(m, pd->tunnel); } else { l2tp_dfs_seq_session_show(m, pd->session); - if (pd->session->deref) - pd->session->deref(pd->session); l2tp_session_dec_refcount(pd->session); } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4d322c1b7233..6dbe450400a2 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,7 +143,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id, true); + session = l2tp_session_get(net, NULL, session_id); if (!session) goto discard; @@ -205,8 +205,6 @@ pass_up: return sk_receive_skb(sk, skb, 1); discard_sess: - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); goto discard; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 88b397c30d86..59ebb6e4f735 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,7 +156,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id, true); + session = l2tp_session_get(net, NULL, session_id); if (!session) goto discard; @@ -219,8 +219,6 @@ pass_up: return sk_receive_skb(sk, skb, 1); discard_sess: - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); goto discard; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f04fb347d251..a1f24fb2be98 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, - bool do_ref) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info) { u32 tunnel_id; u32 session_id; @@ -60,15 +59,14 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_get_by_ifname(net, ifname, do_ref); + session = l2tp_session_get_by_ifname(net, ifname); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (tunnel) { - session = l2tp_session_get(net, tunnel, session_id, - do_ref); + session = l2tp_session_get(net, tunnel, session_id); l2tp_tunnel_dec_refcount(tunnel); } } @@ -649,7 +647,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf &cfg); if (ret >= 0) { - session = l2tp_session_get(net, tunnel, session_id, false); + session = l2tp_session_get(net, tunnel, session_id); if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); @@ -669,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_get(info, true); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto out; @@ -683,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); out: @@ -696,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_get(info, false); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto out; @@ -828,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_get(info, false); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto err; @@ -874,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_get_nth(tunnel, si, false); + session = l2tp_session_get_nth(tunnel, si); if (session == NULL) { ti++; tunnel = NULL; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 845aba543dce..535db8319769 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -742,7 +742,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + session = l2tp_session_get(sock_net(sk), tunnel, session_id); if (session) { drop_refcnt = true; ps = l2tp_session_priv(session); @@ -1193,13 +1193,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, /* resend to session ioctl handler */ struct l2tp_session *session = l2tp_session_get(sock_net(sk), tunnel, - stats.session_id, true); + stats.session_id); if (session) { err = pppol2tp_session_ioctl(session, cmd, arg); - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); } else { err = -EBADR; @@ -1615,7 +1613,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; if (pd->session == NULL) { @@ -1758,8 +1756,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) pppol2tp_seq_tunnel_show(m, pd->tunnel); } else { pppol2tp_seq_session_show(m, pd->session); - if (pd->session->deref) - pd->session->deref(pd->session); l2tp_session_dec_refcount(pd->session); } From 9ff672ba4eefd0821b4ad696511d7b4e94b7e539 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 31 Oct 2017 17:36:44 +0100 Subject: [PATCH 1624/2177] l2tp: remove l2tp specific refcount debugging With conversion to refcount_t, such manual debugging code doesn't make sense anymore. The tunnel part was already dropped by 54652eb12c1b ("l2tp: hold tunnel while looking up sessions in l2tp_netlink"). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 92c90e72259c..9534e16965cc 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -294,37 +294,17 @@ static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) /* Session reference counts. Incremented when code obtains a reference * to a session. */ -static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session) +static inline void l2tp_session_inc_refcount(struct l2tp_session *session) { refcount_inc(&session->ref_count); } -static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) +static inline void l2tp_session_dec_refcount(struct l2tp_session *session) { if (refcount_dec_and_test(&session->ref_count)) l2tp_session_free(session); } -#ifdef L2TP_REFCNT_DEBUG -#define l2tp_session_inc_refcount(_s) \ -do { \ - pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_s)->name, \ - refcount_read(&_s->ref_count)); \ - l2tp_session_inc_refcount_1(_s); \ -} while (0) -#define l2tp_session_dec_refcount(_s) \ -do { \ - pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_s)->name, \ - refcount_read(&_s->ref_count)); \ - l2tp_session_dec_refcount_1(_s); \ -} while (0) -#else -#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s) -#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s) -#endif - #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ From c7fa745d988812c4dea7dbc645f025c5bfa4917e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 31 Oct 2017 17:36:45 +0100 Subject: [PATCH 1625/2177] l2tp: remove l2tp_tunnel_count and l2tp_session_count These variables have never been used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 216f49aec16f..7c8d1eb757a5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -100,8 +100,6 @@ struct l2tp_skb_cb { #define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) -static atomic_t l2tp_tunnel_count; -static atomic_t l2tp_session_count; static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ @@ -359,10 +357,6 @@ int l2tp_session_register(struct l2tp_session *session, hlist_add_head(&session->hlist, head); write_unlock_bh(&tunnel->hlist_lock); - /* Ignore management session in session count value */ - if (session->session_id != 0) - atomic_inc(&l2tp_session_count); - return 0; err_tlock_pnlock: @@ -1251,7 +1245,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - atomic_dec(&l2tp_tunnel_count); l2tp_tunnel_closeall(tunnel); @@ -1632,7 +1625,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - atomic_inc(&l2tp_tunnel_count); /* Bump the reference count. The tunnel context is deleted * only when this drops to zero. Must be done before list insertion @@ -1678,8 +1670,6 @@ void l2tp_session_free(struct l2tp_session *session) if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); sock_put(tunnel->sock); session->tunnel = NULL; l2tp_tunnel_dec_refcount(tunnel); From 675080f2391f8b6ecb2c807e3eb7693e12847502 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 31 Oct 2017 17:36:46 +0100 Subject: [PATCH 1626/2177] l2tp: remove field 'dev' from struct l2tp_eth This field has never been used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d29bfee291cb..3e2dec1fb0f5 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -41,7 +41,6 @@ /* via netdev_priv() */ struct l2tp_eth { - struct net_device *dev; struct sock *tunnel_sock; struct l2tp_session *session; atomic_long_t tx_bytes; @@ -60,9 +59,6 @@ struct l2tp_eth_sess { static int l2tp_eth_dev_init(struct net_device *dev) { - struct l2tp_eth *priv = netdev_priv(dev); - - priv->dev = dev; eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); netdev_lockdep_set_classes(dev); @@ -315,7 +311,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, l2tp_eth_adjust_mtu(tunnel, session, dev); priv = netdev_priv(dev); - priv->dev = dev; priv->session = session; priv->tunnel_sock = tunnel->sock; From 7dfbe5b64fe8f619bc056a7f81421e8beab373f1 Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:25:46 -0400 Subject: [PATCH 1627/2177] tc-testing: very simple example test cases As part of documentation, supply some very simple test cases to illustrate how test cases work. One test case shows commands in the setup, command, verify and teardown stages. Other test cases show how to have a working test case that does not have commands in the setup, verify and/or teardown stages. Specifically, the command lists for setup and teardown can be empty. And the verify command must have a command, but it can be /bin/true. The regex must have a string, we recommend a single space, and the count of matches must be zero if you do not want to use the match feature of verify. Verify will always look for a return code of success (0) so we give /bin/true when we do not want to make a check there. Also, update the documentation for testcases to be more specific in the cases of: - accepting non-success return codes in setup and teardown stages - how to write the test when no setup, teardown and/or verify are desired. To run the example test cases: $ sudo -E ./tdc.py -f creating-testcases/example.json -l 1f: (example) simple test to test framework 2f: (example) simple test, no need for verify 3f: (example) simple test, no need for setup or teardown (or verify) $ sudo -E ./tdc.py -f creating-testcases/example.json Test 1f: simple test to test framework Test 2f: simple test, no need for verify Test 3f: simple test, no need for setup or teardown (or verify) All test results: 1..3 ok 1 1f simple test to test framework ok 2 2f simple test, no need for verify ok 3 3f simple test, no need for setup or teardown (or verify) $ Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- .../creating-testcases/AddingTestCases.txt | 12 ++++ .../creating-testcases/example.json | 55 +++++++++++++++++++ .../creating-testcases/template.json | 15 ++++- 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/tc-testing/creating-testcases/example.json diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt index 4e09257bc443..00438331ba47 100644 --- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt +++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt @@ -34,6 +34,12 @@ category: A list of single-word descriptions covering what the command setup: The list of commands required to ensure the command under test succeeds. For example: if testing a filter, the command to create the qdisc would appear here. + This list can be empty. + Each command can be a string to be executed, or a list consisting + of a string which is a command to be executed, followed by 1 or + more acceptable exit codes for this command. + If only a string is given for the command, then an exit code of 0 + will be expected. cmdUnderTest: The tc command being tested itself. expExitCode: The code returned by the command under test upon its termination. tdc will compare this value against the actual returned value. @@ -49,6 +55,12 @@ matchCount: How many times the regex in matchPattern should match. A value teardown: The list of commands to clean up after the test is completed. The environment should be returned to the same state as when this test was started: qdiscs deleted, actions flushed, etc. + This list can be empty. + Each command can be a string to be executed, or a list consisting + of a string which is a command to be executed, followed by 1 or + more acceptable exit codes for this command. + If only a string is given for the command, then an exit code of 0 + will be expected. SETUP/TEARDOWN ERRORS diff --git a/tools/testing/selftests/tc-testing/creating-testcases/example.json b/tools/testing/selftests/tc-testing/creating-testcases/example.json new file mode 100644 index 000000000000..5ec501200970 --- /dev/null +++ b/tools/testing/selftests/tc-testing/creating-testcases/example.json @@ -0,0 +1,55 @@ +[ + { + "id": "1f", + "name": "simple test to test framework", + "category": [ + "example" + ], + "setup": [ + "mkdir mytest" + ], + "cmdUnderTest": "touch mytest/blorfl", + "expExitCode": "0", + "verifyCmd": "ls mytest/* | grep '[b]lorfl'", + "matchPattern": "orfl", + "matchCount": "1", + "teardown": [ + "rm -rf mytest" + ] + }, + { + "id": "2f", + "name": "simple test, no need for verify", + "category": [ + "example" + ], + "setup": [ + "mkdir mytest", + "touch mytest/blorfl" + ], + "cmdUnderTest": "ls mytest/blorfl", + "expExitCode": "0", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "rm -rf mytest" + ] + }, + { + "id": "3f", + "name": "simple test, no need for setup or teardown (or verify)", + "category": [ + "example" + ], + "setup": [ + ], + "cmdUnderTest": "ip l l lo", + "expExitCode": "0", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + ] + } +] diff --git a/tools/testing/selftests/tc-testing/creating-testcases/template.json b/tools/testing/selftests/tc-testing/creating-testcases/template.json index 87971744bdd4..8b99b86d65bd 100644 --- a/tools/testing/selftests/tc-testing/creating-testcases/template.json +++ b/tools/testing/selftests/tc-testing/creating-testcases/template.json @@ -26,7 +26,13 @@ "" ], "setup": [ - "" + "", + [ + "", + 0, + 1, + 255 + ] ], "cmdUnderTest": "", "expExitCode": "", @@ -34,7 +40,12 @@ "matchPattern": "", "matchCount": "", "teardown": [ - "" + "", + [ + "", + 0, + 255 + ] ] } ] From a5a5efe9998d6f28263324b8aa3e77a1b6cc465e Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:26:54 -0400 Subject: [PATCH 1628/2177] tc-testing: gitignore, ignore standard python artifacts Ignore .pyc files, "python compiled" files, that get created when a python script is run. They should never be committed. Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index c18dd8d83cee..7a60b85e148f 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1 +1,2 @@ __pycache__/ +*.pyc From 181d6610f5311868f092d033f0273140dd44fb8a Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:27:28 -0400 Subject: [PATCH 1629/2177] tc-testing: split config file Move the config customization into a site-local file tdc_config_local.py, so that updates of the tdc test software does not require hand-editing of the config. This patch includes a template for the site-local customization file. In addition, this makes it easy to revert to a stock tdc environment for testing the test framework and/or the core tests. Also it makes it harder for any custom config to be submitted back to the kernel tdc. Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- .../selftests/tc-testing/tdc_config.py | 14 +++++++++++ .../tc-testing/tdc_config_local_template.py | 23 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tdc_config_local_template.py diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index b6352515c1b5..596558445713 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,3 +17,17 @@ NAMES = { # Name of the namespace to use 'NS': 'tcut' } + + +ENVIR = { } + +# put customizations in tdc_config_local.py +try: + from tdc_config_local import * +except ImportError as ie: + pass + +try: + NAMES.update(EXTRA_NAMES) +except NameError as ne: + pass diff --git a/tools/testing/selftests/tc-testing/tdc_config_local_template.py b/tools/testing/selftests/tc-testing/tdc_config_local_template.py new file mode 100644 index 000000000000..d48fc732a399 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_config_local_template.py @@ -0,0 +1,23 @@ +""" +tdc_config_local.py - tdc plugin-writer-specified values + +Copyright (C) 2017 bjb@mojatatu.com +""" + +import os + +ENVIR = os.environ.copy() + +ENV_LD_LIBRARY_PATH = os.getenv('LD_LIBRARY_PATH', '') +ENV_OTHER_LIB = os.getenv('OTHER_LIB', '') + + +# example adding value to NAMES, without editing tdc_config.py +EXTRA_NAMES = dict() +EXTRA_NAMES['SOME_BIN'] = os.path.join(os.getenv('OTHER_BIN', ''), 'some_bin') + + +# example adding values to ENVIR, without editing tdc_config.py +ENVIR['VALGRIND_LIB'] = '/usr/lib/valgrind' +ENVIR['VALGRIND_BIN'] = '/usr/bin/valgrind' +ENVIR['VGDB_BIN'] = '/usr/bin/vgdb' From 7ae677fb85f46c6b1c0545a0558c507ab940d7c4 Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:28:02 -0400 Subject: [PATCH 1630/2177] tc-testing: correction to docstring in get_unique_item Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index c3254f861fb2..6f99a4efe761 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -15,7 +15,7 @@ def get_categorized_testlist(alltests, ucat): def get_unique_item(lst): - """ For a list, return a set of the unique items in the list. """ + """ For a list, return a list of the unique items in the list. """ return list(set(lst)) From 6c26c3fbc3295ce71f5e03caccc9eeb7369123ca Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:28:35 -0400 Subject: [PATCH 1631/2177] tc-testing: better check if thing is list Check if tcase[k] is an instance of a list (is or is derived from list) instead of checking if it is a list. This will be useful if the data structures change to be something that implements list, instead of being an actual list. In that case, this code will not have to change. Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index 6f99a4efe761..d935cbc34cc3 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -57,7 +57,7 @@ def print_sll(items): def print_test_case(tcase): """ Pretty-printing of a given test case. """ for k in tcase.keys(): - if (type(tcase[k]) == list): + if (isinstance(tcase[k], list)): print(k + ":") print_list(tcase[k]) else: From 170b8ffa71412eeacb20589b807f4bc75a61e507 Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Tue, 31 Oct 2017 14:29:03 -0400 Subject: [PATCH 1632/2177] tc-testing: better test case file error reporting tdc.py reads a bunch of test cases in json files. When a json file cannot be parsed, tdc just exits and does not run any tests. This patch will cause tdc to print a message with the file name and line number, then that file will be ignored and the rest of the tests will be processed. Signed-off-by: Brenda J. Butler Acked-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 5508730ee1ea..a674fe9a7f1e 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -179,15 +179,20 @@ def has_blank_ids(idlist): def load_from_file(filename): """ - Open the JSON file containing the test cases and return them as an - ordered dictionary object. + Open the JSON file containing the test cases and return them + as list of ordered dictionary objects. """ - with open(filename) as test_data: - testlist = json.load(test_data, object_pairs_hook=OrderedDict) - idlist = get_id_list(testlist) - if (has_blank_ids(idlist)): - for k in testlist: - k['filename'] = filename + try: + with open(filename) as test_data: + testlist = json.load(test_data, object_pairs_hook=OrderedDict) + except json.JSONDecodeError as jde: + print('IGNORING test case file {}\n\tBECAUSE: {}'.format(filename, jde)) + testlist = list() + else: + idlist = get_id_list(testlist) + if (has_blank_ids(idlist)): + for k in testlist: + k['filename'] = filename return testlist From 1f233f327913f3dee0602cba9c64df1903772b55 Mon Sep 17 00:00:00 2001 From: Vijaya Mohan Guvva Date: Tue, 31 Oct 2017 16:04:53 -0700 Subject: [PATCH 1633/2177] liquidio: switchdev support for LiquidIO NIC Enable switchdev for SRIOV capable LiquidIO NIC. It registers a representor netdev (with switchdev_ops) for each SRIOV VF created. It also has changes to send representor interface configurations like admin state and MTU to LiquidIO firmware and to retrieve HW counted VF stats for VF representor. Signed-off-by: Vijaya Mohan Guvva Signed-off-by: Satanand Burla Signed-off-by: Raghu Vatsavayi Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/Makefile | 2 +- .../net/ethernet/cavium/liquidio/lio_main.c | 25 + .../net/ethernet/cavium/liquidio/lio_vf_rep.c | 621 ++++++++++++++++++ .../net/ethernet/cavium/liquidio/lio_vf_rep.h | 47 ++ .../cavium/liquidio/liquidio_common.h | 49 ++ .../ethernet/cavium/liquidio/octeon_device.h | 7 + 6 files changed, 750 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c create mode 100644 drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index c4d411d1aa28..cad4fe1ffe55 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -17,7 +17,7 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \ octeon_droq.o \ octeon_nic.o -liquidio-objs := lio_main.o octeon_console.o $(liquidio-y) +liquidio-objs := lio_main.o octeon_console.o lio_vf_rep.o $(liquidio-y) obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 8ea24d68e824..80784122e6e9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -3309,6 +3310,29 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx, return 0; } +static int +lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct lio *lio = GET_LIO(dev); + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, + (void *)&lio->linfo.hw_addr + 2); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct switchdev_ops lio_pf_switchdev_ops = { + .switchdev_port_attr_get = lio_pf_switchdev_attr_get, +}; + static const struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, @@ -3583,6 +3607,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) * netdev tasks. */ netdev->netdev_ops = &lionetdevops; + SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops); lio = GET_LIO(netdev); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c new file mode 100644 index 000000000000..67ff7a143e9e --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -0,0 +1,621 @@ +/********************************************************************** + * Author: Cavium, Inc. + * + * Contact: support@cavium.com + * Please include "LiquidIO" in the subject. + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more details. + ***********************************************************************/ +#include +#include +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "octeon_nic.h" +#include "octeon_main.h" +#include "octeon_network.h" +#include +#include "lio_vf_rep.h" +#include "octeon_network.h" + +static int lio_vf_rep_open(struct net_device *ndev); +static int lio_vf_rep_stop(struct net_device *ndev); +static int lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev); +static void lio_vf_rep_tx_timeout(struct net_device *netdev); +static int lio_vf_rep_phys_port_name(struct net_device *dev, + char *buf, size_t len); +static void lio_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats64); +static int lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu); + +static const struct net_device_ops lio_vf_rep_ndev_ops = { + .ndo_open = lio_vf_rep_open, + .ndo_stop = lio_vf_rep_stop, + .ndo_start_xmit = lio_vf_rep_pkt_xmit, + .ndo_tx_timeout = lio_vf_rep_tx_timeout, + .ndo_get_phys_port_name = lio_vf_rep_phys_port_name, + .ndo_get_stats64 = lio_vf_rep_get_stats64, + .ndo_change_mtu = lio_vf_rep_change_mtu, +}; + +static void +lio_vf_rep_send_sc_complete(struct octeon_device *oct, + u32 status, void *ptr) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct lio_vf_rep_sc_ctx *ctx = + (struct lio_vf_rep_sc_ctx *)sc->ctxptr; + struct lio_vf_rep_resp *resp = + (struct lio_vf_rep_resp *)sc->virtrptr; + + if (status != OCTEON_REQUEST_TIMEOUT && READ_ONCE(resp->status)) + WRITE_ONCE(resp->status, 0); + + complete(&ctx->complete); +} + +static int +lio_vf_rep_send_soft_command(struct octeon_device *oct, + void *req, int req_size, + void *resp, int resp_size) +{ + int tot_resp_size = sizeof(struct lio_vf_rep_resp) + resp_size; + int ctx_size = sizeof(struct lio_vf_rep_sc_ctx); + struct octeon_soft_command *sc = NULL; + struct lio_vf_rep_resp *rep_resp; + struct lio_vf_rep_sc_ctx *ctx; + void *sc_req; + int err; + + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, req_size, + tot_resp_size, ctx_size); + if (!sc) + return -ENOMEM; + + ctx = (struct lio_vf_rep_sc_ctx *)sc->ctxptr; + memset(ctx, 0, ctx_size); + init_completion(&ctx->complete); + + sc_req = (struct lio_vf_rep_req *)sc->virtdptr; + memcpy(sc_req, req, req_size); + + rep_resp = (struct lio_vf_rep_resp *)sc->virtrptr; + memset(rep_resp, 0, tot_resp_size); + WRITE_ONCE(rep_resp->status, 1); + + sc->iq_no = 0; + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_VF_REP_CMD, 0, 0, 0); + sc->callback = lio_vf_rep_send_sc_complete; + sc->callback_arg = sc; + sc->wait_time = LIO_VF_REP_REQ_TMO_MS; + + err = octeon_send_soft_command(oct, sc); + if (err == IQ_SEND_FAILED) + goto free_buff; + + wait_for_completion_timeout(&ctx->complete, + msecs_to_jiffies + (2 * LIO_VF_REP_REQ_TMO_MS)); + err = READ_ONCE(rep_resp->status) ? -EBUSY : 0; + if (err) + dev_err(&oct->pci_dev->dev, "VF rep send config failed\n"); + + if (resp) + memcpy(resp, (rep_resp + 1), resp_size); +free_buff: + octeon_free_soft_command(oct, sc); + + return err; +} + +static int +lio_vf_rep_open(struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATE; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_state.state = LIO_VF_REP_STATE_UP; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + + if (ret) { + dev_err(&oct->pci_dev->dev, + "VF_REP open failed with err %d\n", ret); + return -EIO; + } + + atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) | + LIO_IFSTATE_RUNNING)); + + netif_carrier_on(ndev); + netif_start_queue(ndev); + + return 0; +} + +static int +lio_vf_rep_stop(struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATE; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_state.state = LIO_VF_REP_STATE_DOWN; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + + if (ret) { + dev_err(&oct->pci_dev->dev, + "VF_REP dev stop failed with err %d\n", ret); + return -EIO; + } + + atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) & + ~LIO_IFSTATE_RUNNING)); + + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + return 0; +} + +static void +lio_vf_rep_tx_timeout(struct net_device *ndev) +{ + netif_trans_update(ndev); + + netif_wake_queue(ndev); +} + +static void +lio_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats64) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + + stats64->tx_packets = vf_rep->stats.tx_packets; + stats64->tx_bytes = vf_rep->stats.tx_bytes; + stats64->tx_dropped = vf_rep->stats.tx_dropped; + + stats64->rx_packets = vf_rep->stats.rx_packets; + stats64->rx_bytes = vf_rep->stats.rx_bytes; + stats64->rx_dropped = vf_rep->stats.rx_dropped; +} + +static int +lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_MTU; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_mtu.mtu = cpu_to_be32(new_mtu); + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + if (ret) { + dev_err(&oct->pci_dev->dev, + "Change MTU failed with err %d\n", ret); + return -EIO; + } + + ndev->mtu = new_mtu; + + return 0; +} + +static int +lio_vf_rep_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + struct octeon_device *oct = vf_rep->oct; + int ret; + + ret = snprintf(buf, len, "pf%dvf%d", oct->pf_num, + vf_rep->ifidx - oct->pf_num * 64 - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static struct net_device * +lio_vf_rep_get_ndev(struct octeon_device *oct, int ifidx) +{ + int vf_id, max_vfs = CN23XX_MAX_VFS_PER_PF + 1; + int vfid_mask = max_vfs - 1; + + if (ifidx <= oct->pf_num * max_vfs || + ifidx >= oct->pf_num * max_vfs + max_vfs) + return NULL; + + /* ifidx 1-63 for PF0 VFs + * ifidx 65-127 for PF1 VFs + */ + vf_id = (ifidx & vfid_mask) - 1; + + return oct->vf_rep_list.ndev[vf_id]; +} + +static void +lio_vf_rep_copy_packet(struct octeon_device *oct, + struct sk_buff *skb, + int len) +{ + if (likely(len > MIN_SKB_SIZE)) { + struct octeon_skb_page_info *pg_info; + unsigned char *va; + + pg_info = ((struct octeon_skb_page_info *)(skb->cb)); + if (pg_info->page) { + va = page_address(pg_info->page) + + pg_info->page_offset; + memcpy(skb->data, va, MIN_SKB_SIZE); + skb_put(skb, MIN_SKB_SIZE); + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + pg_info->page, + pg_info->page_offset + MIN_SKB_SIZE, + len - MIN_SKB_SIZE, + LIO_RXBUFFER_SZ); + } else { + struct octeon_skb_page_info *pg_info = + ((struct octeon_skb_page_info *)(skb->cb)); + + skb_copy_to_linear_data(skb, page_address(pg_info->page) + + pg_info->page_offset, len); + skb_put(skb, len); + put_page(pg_info->page); + } +} + +static int +lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, void *buf) +{ + struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt; + struct lio_vf_rep_desc *vf_rep; + struct net_device *vf_ndev; + struct octeon_device *oct; + union octeon_rh *rh; + struct sk_buff *skb; + int i, ifidx; + + oct = lio_get_device(recv_pkt->octeon_id); + if (!oct) + goto free_buffers; + + skb = recv_pkt->buffer_ptr[0]; + rh = &recv_pkt->rh; + ifidx = rh->r.ossp; + + vf_ndev = lio_vf_rep_get_ndev(oct, ifidx); + if (!vf_ndev) + goto free_buffers; + + vf_rep = netdev_priv(vf_ndev); + if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) || + recv_pkt->buffer_count > 1) + goto free_buffers; + + skb->dev = vf_ndev; + + /* Multiple buffers are not used for vf_rep packets. + * So just buffer_size[0] is valid. + */ + lio_vf_rep_copy_packet(oct, skb, recv_pkt->buffer_size[0]); + + skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT); + skb->protocol = eth_type_trans(skb, skb->dev); + skb->ip_summed = CHECKSUM_NONE; + + netif_rx(skb); + + octeon_free_recv_info(recv_info); + + return 0; + +free_buffers: + for (i = 0; i < recv_pkt->buffer_count; i++) + recv_buffer_free(recv_pkt->buffer_ptr[i]); + + octeon_free_recv_info(recv_info); + + return 0; +} + +static void +lio_vf_rep_packet_sent_callback(struct octeon_device *oct, + u32 status, void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct sk_buff *skb = sc->ctxptr; + struct net_device *ndev = skb->dev; + + dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr, + sc->datasize, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + octeon_free_soft_command(oct, sc); + + if (octnet_iq_is_full(oct, sc->iq_no)) + return; + + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); +} + +static int +lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct net_device *parent_ndev = vf_rep->parent_ndev; + struct octeon_device *oct = vf_rep->oct; + struct octeon_instr_pki_ih3 *pki_ih3; + struct octeon_soft_command *sc; + struct lio *parent_lio; + int status; + + parent_lio = GET_LIO(parent_ndev); + + if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) || + skb->len <= 0) + goto xmit_failed; + + if (octnet_iq_is_full(vf_rep->oct, parent_lio->txq)) { + dev_err(&oct->pci_dev->dev, "VF rep: Device IQ full\n"); + netif_stop_queue(ndev); + return NETDEV_TX_BUSY; + } + + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, 0, 0, 0); + if (!sc) { + dev_err(&oct->pci_dev->dev, "VF rep: Soft command alloc failed\n"); + goto xmit_failed; + } + + /* Multiple buffers are not used for vf_rep packets. */ + if (skb_shinfo(skb)->nr_frags != 0) { + dev_err(&oct->pci_dev->dev, "VF rep: nr_frags != 0. Dropping packet\n"); + goto xmit_failed; + } + + sc->dmadptr = dma_map_single(&oct->pci_dev->dev, + skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&oct->pci_dev->dev, sc->dmadptr)) { + dev_err(&oct->pci_dev->dev, "VF rep: DMA mapping failed\n"); + goto xmit_failed; + } + + sc->virtdptr = skb->data; + sc->datasize = skb->len; + sc->ctxptr = skb; + sc->iq_no = parent_lio->txq; + + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_VF_REP_PKT, + vf_rep->ifidx, 0, 0); + pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3; + pki_ih3->tagtype = ORDERED_TAG; + + sc->callback = lio_vf_rep_packet_sent_callback; + sc->callback_arg = sc; + + status = octeon_send_soft_command(oct, sc); + if (status == IQ_SEND_FAILED) { + dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr, + sc->datasize, DMA_TO_DEVICE); + goto xmit_failed; + } + + if (status == IQ_SEND_STOP) + netif_stop_queue(ndev); + + netif_trans_update(ndev); + + return NETDEV_TX_OK; + +xmit_failed: + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + +static int +lio_vf_rep_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + struct net_device *parent_ndev = vf_rep->parent_ndev; + struct lio *lio = GET_LIO(parent_ndev); + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, + (void *)&lio->linfo.hw_addr + 2); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct switchdev_ops lio_vf_rep_switchdev_ops = { + .switchdev_port_attr_get = lio_vf_rep_attr_get, +}; + +static void +lio_vf_rep_fetch_stats(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio_vf_rep_desc *vf_rep = wk->ctxptr; + struct lio_vf_rep_stats stats; + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATS; + rep_cfg.ifidx = vf_rep->ifidx; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, sizeof(rep_cfg), + &stats, sizeof(stats)); + + if (!ret) { + octeon_swap_8B_data((u64 *)&stats, (sizeof(stats) >> 3)); + memcpy(&vf_rep->stats, &stats, sizeof(stats)); + } + + schedule_delayed_work(&vf_rep->stats_wk.work, + msecs_to_jiffies(LIO_VF_REP_STATS_POLL_TIME_MS)); +} + +int +lio_vf_rep_create(struct octeon_device *oct) +{ + struct lio_vf_rep_desc *vf_rep; + struct net_device *ndev; + int i, num_vfs; + + if (!oct->sriov_info.sriov_enabled) + return 0; + + num_vfs = oct->sriov_info.num_vfs_alloced; + + oct->vf_rep_list.num_vfs = 0; + for (i = 0; i < num_vfs; i++) { + ndev = alloc_etherdev(sizeof(struct lio_vf_rep_desc)); + + if (!ndev) { + dev_err(&oct->pci_dev->dev, + "VF rep device %d creation failed\n", i); + goto cleanup; + } + + ndev->min_mtu = LIO_MIN_MTU_SIZE; + ndev->max_mtu = LIO_MAX_MTU_SIZE; + ndev->netdev_ops = &lio_vf_rep_ndev_ops; + SWITCHDEV_SET_OPS(ndev, &lio_vf_rep_switchdev_ops); + + vf_rep = netdev_priv(ndev); + memset(vf_rep, 0, sizeof(*vf_rep)); + + vf_rep->ndev = ndev; + vf_rep->oct = oct; + vf_rep->parent_ndev = oct->props[0].netdev; + vf_rep->ifidx = (oct->pf_num * 64) + i + 1; + + eth_hw_addr_random(ndev); + + if (register_netdev(ndev)) { + dev_err(&oct->pci_dev->dev, "VF rep nerdev registration failed\n"); + + free_netdev(ndev); + goto cleanup; + } + + netif_carrier_off(ndev); + + INIT_DELAYED_WORK(&vf_rep->stats_wk.work, + lio_vf_rep_fetch_stats); + vf_rep->stats_wk.ctxptr = (void *)vf_rep; + schedule_delayed_work(&vf_rep->stats_wk.work, + msecs_to_jiffies + (LIO_VF_REP_STATS_POLL_TIME_MS)); + oct->vf_rep_list.num_vfs++; + oct->vf_rep_list.ndev[i] = ndev; + } + + if (octeon_register_dispatch_fn(oct, OPCODE_NIC, + OPCODE_NIC_VF_REP_PKT, + lio_vf_rep_pkt_recv, oct)) { + dev_err(&oct->pci_dev->dev, "VF rep Dispatch func registration failed\n"); + + goto cleanup; + } + + return 0; + +cleanup: + for (i = 0; i < oct->vf_rep_list.num_vfs; i++) { + ndev = oct->vf_rep_list.ndev[i]; + oct->vf_rep_list.ndev[i] = NULL; + if (ndev) { + vf_rep = netdev_priv(ndev); + cancel_delayed_work_sync + (&vf_rep->stats_wk.work); + unregister_netdev(ndev); + free_netdev(ndev); + } + } + + oct->vf_rep_list.num_vfs = 0; + + return -1; +} + +void +lio_vf_rep_destroy(struct octeon_device *oct) +{ + struct lio_vf_rep_desc *vf_rep; + struct net_device *ndev; + int i; + + if (!oct->sriov_info.sriov_enabled) + return; + + for (i = 0; i < oct->vf_rep_list.num_vfs; i++) { + ndev = oct->vf_rep_list.ndev[i]; + oct->vf_rep_list.ndev[i] = NULL; + if (ndev) { + vf_rep = netdev_priv(ndev); + cancel_delayed_work_sync + (&vf_rep->stats_wk.work); + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + unregister_netdev(ndev); + free_netdev(ndev); + } + } + + oct->vf_rep_list.num_vfs = 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h new file mode 100644 index 000000000000..5a9ec9851426 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h @@ -0,0 +1,47 @@ +/********************************************************************** + * Author: Cavium, Inc. + * + * Contact: support@cavium.com + * Please include "LiquidIO" in the subject. + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more + * details. + * + * This file may also be available under a different license from Cavium. + * Contact Cavium, Inc. for more information + **********************************************************************/ + +/*! \file octeon_vf_main.h + * \brief Host Driver: This file defines vf_rep related macros and structures + */ +#ifndef __LIO_VF_REP_H__ +#define __LIO_VF_REP_H__ +#define LIO_VF_REP_REQ_TMO_MS 5000 +#define LIO_VF_REP_STATS_POLL_TIME_MS 200 + +struct lio_vf_rep_desc { + struct net_device *parent_ndev; + struct net_device *ndev; + struct octeon_device *oct; + struct lio_vf_rep_stats stats; + struct cavium_wk stats_wk; + atomic_t ifstate; + int ifidx; +}; + +struct lio_vf_rep_sc_ctx { + struct completion complete; +}; + +int lio_vf_rep_create(struct octeon_device *oct); +void lio_vf_rep_destroy(struct octeon_device *oct); +#endif diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 2033a65cd97a..3aceb78caa86 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -89,6 +89,9 @@ enum octeon_tag_type { #define VF_DRV_REMOVED -1 #define VF_DRV_MACADDR_CHANGED 2 +#define OPCODE_NIC_VF_REP_PKT 0x15 +#define OPCODE_NIC_VF_REP_CMD 0x16 + #define CORE_DRV_TEST_SCATTER_OP 0xFFF5 /* Application codes advertised by the core driver initialization packet. */ @@ -909,4 +912,50 @@ struct lio_time { s64 sec; /* seconds */ s64 nsec; /* nanoseconds */ }; + +struct lio_vf_rep_stats { + u64 tx_packets; + u64 tx_bytes; + u64 tx_dropped; + + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; +}; + +enum lio_vf_rep_req_type { + LIO_VF_REP_REQ_NONE, + LIO_VF_REP_REQ_STATE, + LIO_VF_REP_REQ_MTU, + LIO_VF_REP_REQ_STATS +}; + +enum { + LIO_VF_REP_STATE_DOWN, + LIO_VF_REP_STATE_UP +}; + +struct lio_vf_rep_req { + u8 req_type; + u8 ifidx; + u8 rsvd[6]; + + union { + struct lio_vf_rep_mtu { + u32 mtu; + u32 rsvd; + } rep_mtu; + + struct lio_vf_rep_state { + u8 state; + u8 rsvd[7]; + } rep_state; + }; +}; + +struct lio_vf_rep_resp { + u64 rh; + u8 status; + u8 rsvd[7]; +}; #endif diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 33d19c4509bc..c97e067a8974 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -391,6 +391,11 @@ struct octeon_ioq_vector { u32 ioq_num; }; +struct lio_vf_rep_list { + int num_vfs; + struct net_device *ndev[CN23XX_MAX_VFS_PER_PF]; +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -568,6 +573,8 @@ struct octeon_device { atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */ bool ptp_enable; + + struct lio_vf_rep_list vf_rep_list; }; #define OCT_DRV_ONLINE 1 From d4be8ebefb449c43b7daa5c9d23b22cd20c17258 Mon Sep 17 00:00:00 2001 From: Vijaya Mohan Guvva Date: Tue, 31 Oct 2017 16:04:57 -0700 Subject: [PATCH 1634/2177] liquidio: Configure switchdev with devlink Enable and disable switchdev on SRIOV capable LiquidIO NIC with devlink. Create representor netdev for each SRIOV VF function on SRIOV enable and and do the cleanup on SRIOV disable. Signed-off-by: Vijaya Mohan Guvva Signed-off-by: Satanand Burla Signed-off-by: Raghu Vatsavayi Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 1 + .../net/ethernet/cavium/liquidio/lio_main.c | 93 +++++++++++++++++++ .../net/ethernet/cavium/liquidio/lio_vf_rep.c | 6 ++ .../cavium/liquidio/liquidio_common.h | 1 + .../ethernet/cavium/liquidio/octeon_device.h | 7 ++ 5 files changed, 108 insertions(+) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index dcbce6cac63e..63be75eb34d2 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -53,6 +53,7 @@ config THUNDER_NIC_RGX config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT + depends on MAY_USE_DEVLINK imply PTP_1588_CLOCK select FW_LOADER select LIBCRC32C diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 80784122e6e9..f27f0afd0ecf 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -35,6 +35,7 @@ #include "cn68xx_device.h" #include "cn23xx_pf_device.h" #include "liquidio_image.h" +#include "lio_vf_rep.h" MODULE_AUTHOR("Cavium Networks, "); MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Driver"); @@ -1603,6 +1604,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct) oct->cmd_resp_state = OCT_DRV_OFFLINE; spin_unlock_bh(&oct->cmd_resp_wqlock); + lio_vf_rep_destroy(oct); + for (i = 0; i < oct->ifcount; i++) { lio = GET_LIO(oct->props[i].netdev); for (j = 0; j < oct->num_oqs; j++) @@ -1613,6 +1616,12 @@ static int liquidio_stop_nic_module(struct octeon_device *oct) for (i = 0; i < oct->ifcount; i++) liquidio_destroy_nic_device(oct, i); + if (oct->devlink) { + devlink_unregister(oct->devlink); + devlink_free(oct->devlink); + oct->devlink = NULL; + } + dev_dbg(&oct->pci_dev->dev, "Network interfaces stopped\n"); return 0; } @@ -3310,10 +3319,67 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx, return 0; } +static int +liquidio_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct lio_devlink_priv *priv; + struct octeon_device *oct; + + priv = devlink_priv(devlink); + oct = priv->oct; + + *mode = oct->eswitch_mode; + + return 0; +} + +static int +liquidio_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct lio_devlink_priv *priv; + struct octeon_device *oct; + int ret = 0; + + priv = devlink_priv(devlink); + oct = priv->oct; + + if (!(oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP)) + return -EINVAL; + + if (oct->eswitch_mode == mode) + return 0; + + switch (mode) { + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + oct->eswitch_mode = mode; + ret = lio_vf_rep_create(oct); + break; + + case DEVLINK_ESWITCH_MODE_LEGACY: + lio_vf_rep_destroy(oct); + oct->eswitch_mode = mode; + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct devlink_ops liquidio_devlink_ops = { + .eswitch_mode_get = liquidio_eswitch_mode_get, + .eswitch_mode_set = liquidio_eswitch_mode_set, +}; + static int lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct lio *lio = GET_LIO(dev); + struct octeon_device *oct = lio->oct_dev; + + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return -EOPNOTSUPP; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: @@ -3462,6 +3528,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) u32 resp_size, ctx_size, data_size; u32 ifidx_or_pfnum; struct lio_version *vdata; + struct devlink *devlink; + struct lio_devlink_priv *lio_devlink; /* This is to handle link status changes */ octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, @@ -3794,6 +3862,26 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) octeon_free_soft_command(octeon_dev, sc); } + devlink = devlink_alloc(&liquidio_devlink_ops, + sizeof(struct lio_devlink_priv)); + if (!devlink) { + dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); + goto setup_nic_wait_intr; + } + + lio_devlink = devlink_priv(devlink); + lio_devlink->oct = octeon_dev; + + if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + devlink_free(devlink); + dev_err(&octeon_dev->pci_dev->dev, + "devlink registration failed\n"); + goto setup_nic_wait_intr; + } + + octeon_dev->devlink = devlink; + octeon_dev->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; setup_nic_dev_fail: @@ -3888,6 +3976,7 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs) } if (!num_vfs) { + lio_vf_rep_destroy(oct); ret = lio_pci_sriov_disable(oct); } else if (num_vfs > oct->sriov_info.max_vfs) { dev_err(&oct->pci_dev->dev, @@ -3899,6 +3988,10 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs) ret = octeon_enable_sriov(oct); dev_info(&oct->pci_dev->dev, "oct->pf_num:%d num_vfs:%d\n", oct->pf_num, num_vfs); + ret = lio_vf_rep_create(oct); + if (ret) + dev_info(&oct->pci_dev->dev, + "vf representor create failed"); } return ret; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 67ff7a143e9e..de0c80d150f3 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -515,6 +515,9 @@ lio_vf_rep_create(struct octeon_device *oct) struct net_device *ndev; int i, num_vfs; + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return 0; + if (!oct->sriov_info.sriov_enabled) return 0; @@ -599,6 +602,9 @@ lio_vf_rep_destroy(struct octeon_device *oct) struct net_device *ndev; int i; + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + if (!oct->sriov_info.sriov_enabled) return; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 3aceb78caa86..441cc78faff1 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -114,6 +114,7 @@ enum octeon_tag_type { /* App specific capabilities from firmware to pf driver */ #define LIQUIDIO_TIME_SYNC_CAP 0x1 +#define LIQUIDIO_SWITCHDEV_CAP 0x2 static inline u32 incr_index(u32 index, u32 count, u32 max) { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index c97e067a8974..63b0c758a0a6 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -23,6 +23,7 @@ #define _OCTEON_DEVICE_H_ #include +#include /** PCI VendorId Device Id */ #define OCTEON_CN68XX_PCIID 0x91177d @@ -396,6 +397,10 @@ struct lio_vf_rep_list { struct net_device *ndev[CN23XX_MAX_VFS_PER_PF]; }; +struct lio_devlink_priv { + struct octeon_device *oct; +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -575,6 +580,8 @@ struct octeon_device { bool ptp_enable; struct lio_vf_rep_list vf_rep_list; + struct devlink *devlink; + enum devlink_eswitch_mode eswitch_mode; }; #define OCT_DRV_ONLINE 1 From 638f5b90d46016372a8e3e0a434f199cc5e12b8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 31 Oct 2017 18:16:05 -0700 Subject: [PATCH 1635/2177] bpf: reduce verifier memory consumption the verifier got progressively smarter over time and size of its internal state grew as well. Time to reduce the memory consumption. Before: sizeof(struct bpf_verifier_state) = 6520 After: sizeof(struct bpf_verifier_state) = 896 It's done by observing that majority of BPF programs use little to no stack whereas verifier kept all of 512 stack slots ready always. Instead dynamically reallocate struct verifier state when stack access is detected. Runtime difference before vs after is within a noise. The number of processed instructions stays the same. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/bpf/verifier.c | 8 +- include/linux/bpf_verifier.h | 16 +- kernel/bpf/verifier.c | 437 ++++++++++++------ 3 files changed, 305 insertions(+), 156 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 3d3dcac1c942..a8c7615546a9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int nfp_bpf_check_exit(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env) + struct bpf_verifier_env *env) { - const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; u64 imm; if (nfp_prog->act == NN_ACT_XDP) @@ -144,9 +144,9 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, static int nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_verifier_env *env, u8 reg_no) + struct bpf_verifier_env *env, u8 reg_no) { - const struct bpf_reg_state *reg = &env->cur_state.regs[reg_no]; + const struct bpf_reg_state *reg = cur_regs(env) + reg_no; int err; if (reg->type != PTR_TO_CTX && diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index feeaea93d959..3b0976aaac75 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -88,14 +88,19 @@ enum bpf_stack_slot_type { #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +struct bpf_stack_state { + struct bpf_reg_state spilled_ptr; + u8 slot_type[BPF_REG_SIZE]; +}; + /* state of the program: * type of all registers and stack info */ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; struct bpf_verifier_state *parent; + int allocated_stack; + struct bpf_stack_state *stack; }; /* linked list of verifier states used to prune search */ @@ -145,7 +150,7 @@ struct bpf_verifier_env { struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ - struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ void *analyzer_priv; /* pointer to external analyzer's private data */ @@ -159,6 +164,11 @@ struct bpf_verifier_env { struct bpf_verifer_log log; }; +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return env->cur_state->regs; +} + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d906775e12c1..5f26f7ad124f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -276,43 +276,132 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, ")"); } } - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] == STACK_SPILL) - verbose(env, " fp%d=%s", -MAX_BPF_STACK + i, - reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]); + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] == STACK_SPILL) + verbose(env, " fp%d=%s", + -MAX_BPF_STACK + i * BPF_REG_SIZE, + reg_type_str[state->stack[i].spilled_ptr.type]); } verbose(env, "\n"); } -static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) +static int copy_stack_state(struct bpf_verifier_state *dst, + const struct bpf_verifier_state *src) { - struct bpf_verifier_stack_elem *elem; - int insn_idx; + if (!src->stack) + return 0; + if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) { + /* internal bug, make state invalid to reject the program */ + memset(dst, 0, sizeof(*dst)); + return -EFAULT; + } + memcpy(dst->stack, src->stack, + sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE)); + return 0; +} + +/* do_check() starts with zero-sized stack in struct bpf_verifier_state to + * make it consume minimal amount of memory. check_stack_write() access from + * the program calls into realloc_verifier_state() to grow the stack size. + * Note there is a non-zero 'parent' pointer inside bpf_verifier_state + * which this function copies over. It points to previous bpf_verifier_state + * which is never reallocated + */ +static int realloc_verifier_state(struct bpf_verifier_state *state, int size, + bool copy_old) +{ + u32 old_size = state->allocated_stack; + struct bpf_stack_state *new_stack; + int slot = size / BPF_REG_SIZE; + + if (size <= old_size || !size) { + if (copy_old) + return 0; + state->allocated_stack = slot * BPF_REG_SIZE; + if (!size && old_size) { + kfree(state->stack); + state->stack = NULL; + } + return 0; + } + new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state), + GFP_KERNEL); + if (!new_stack) + return -ENOMEM; + if (copy_old) { + if (state->stack) + memcpy(new_stack, state->stack, + sizeof(*new_stack) * (old_size / BPF_REG_SIZE)); + memset(new_stack + old_size / BPF_REG_SIZE, 0, + sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE); + } + state->allocated_stack = slot * BPF_REG_SIZE; + kfree(state->stack); + state->stack = new_stack; + return 0; +} + +static void free_verifier_state(struct bpf_verifier_state *state) +{ + kfree(state->stack); + kfree(state); +} + +/* copy verifier state from src to dst growing dst stack space + * when necessary to accommodate larger src stack + */ +static int copy_verifier_state(struct bpf_verifier_state *dst, + const struct bpf_verifier_state *src) +{ + int err; + + err = realloc_verifier_state(dst, src->allocated_stack, false); + if (err) + return err; + memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); + return copy_stack_state(dst, src); +} + +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, + int *insn_idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_verifier_stack_elem *elem, *head = env->head; + int err; if (env->head == NULL) - return -1; + return -ENOENT; - memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state)); - insn_idx = env->head->insn_idx; + if (cur) { + err = copy_verifier_state(cur, &head->st); + if (err) + return err; + } + if (insn_idx) + *insn_idx = head->insn_idx; if (prev_insn_idx) - *prev_insn_idx = env->head->prev_insn_idx; - elem = env->head->next; - kfree(env->head); + *prev_insn_idx = head->prev_insn_idx; + elem = head->next; + kfree(head); env->head = elem; env->stack_size--; - return insn_idx; + return 0; } static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { + struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem; + int err; - elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; - memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state)); + err = copy_verifier_state(&elem->st, cur); + if (err) + return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; @@ -325,7 +414,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, return &elem->st; err: /* pop all elements and return */ - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); return NULL; } @@ -550,7 +639,7 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state->regs; if (regno >= MAX_BPF_REG) { verbose(env, "R%d is invalid\n", regno); @@ -563,7 +652,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, verbose(env, "R%d !read_ok\n", regno); return -EACCES; } - mark_reg_read(&env->cur_state, regno); + mark_reg_read(env->cur_state, regno); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { @@ -601,10 +690,21 @@ static int check_stack_write(struct bpf_verifier_env *env, struct bpf_verifier_state *state, int off, int size, int value_regno) { - int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; + + err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE), + true); + if (err) + return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ + if (!env->allow_ptr_leaks && + state->stack[spi].slot_type[0] == STACK_SPILL && + size != BPF_REG_SIZE) { + verbose(env, "attempt to corrupt spilled pointer on stack\n"); + return -EACCES; + } if (value_regno >= 0 && is_spillable_regtype(state->regs[value_regno].type)) { @@ -616,17 +716,18 @@ static int check_stack_write(struct bpf_verifier_env *env, } /* save register state */ - state->spilled_regs[spi] = state->regs[value_regno]; - state->spilled_regs[spi].live |= REG_LIVE_WRITTEN; + state->stack[spi].spilled_ptr = state->regs[value_regno]; + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; for (i = 0; i < BPF_REG_SIZE; i++) - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + state->stack[spi].slot_type[i] = STACK_SPILL; } else { /* regular write of data into stack */ - state->spilled_regs[spi] = (struct bpf_reg_state) {}; + state->stack[spi].spilled_ptr = (struct bpf_reg_state) {}; for (i = 0; i < size; i++) - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = + STACK_MISC; } return 0; } @@ -637,10 +738,10 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo while (parent) { /* if read wasn't screened by an earlier write ... */ - if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN) + if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) break; /* ... then we depend on parent's value */ - parent->spilled_regs[slot].live |= REG_LIVE_READ; + parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ; state = parent; parent = state->parent; } @@ -650,34 +751,37 @@ static int check_stack_read(struct bpf_verifier_env *env, struct bpf_verifier_state *state, int off, int size, int value_regno) { - u8 *slot_type; - int i, spi; + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; + u8 *stype; - slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; + if (state->allocated_stack <= slot) { + verbose(env, "invalid read from stack off %d+0 size %d\n", + off, size); + return -EACCES; + } + stype = state->stack[spi].slot_type; - if (slot_type[0] == STACK_SPILL) { + if (stype[0] == STACK_SPILL) { if (size != BPF_REG_SIZE) { verbose(env, "invalid size of register spill\n"); return -EACCES; } for (i = 1; i < BPF_REG_SIZE; i++) { - if (slot_type[i] != STACK_SPILL) { + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { verbose(env, "corrupted spill memory\n"); return -EACCES; } } - spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; - if (value_regno >= 0) { /* restore register state from stack */ - state->regs[value_regno] = state->spilled_regs[spi]; + state->regs[value_regno] = state->stack[spi].spilled_ptr; mark_stack_slot_read(state, spi); } return 0; } else { for (i = 0; i < size; i++) { - if (slot_type[i] != STACK_MISC) { + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) { verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -694,7 +798,8 @@ static int check_stack_read(struct bpf_verifier_env *env, static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_map *map = env->cur_state.regs[regno].map_ptr; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map = regs[regno].map_ptr; if (off < 0 || size <= 0 || off + size > map->value_size) { verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", @@ -706,9 +811,9 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, - int off, int size) + int off, int size) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *reg = &state->regs[regno]; int err; @@ -783,7 +888,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; if (off < 0 || size <= 0 || (u64)off + size > reg->range) { @@ -797,7 +902,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; int err; @@ -866,7 +971,7 @@ static bool __is_pointer_value(bool allow_ptr_leaks, static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { - return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); + return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); } static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, @@ -968,8 +1073,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn int bpf_size, enum bpf_access_type t, int value_regno) { - struct bpf_verifier_state *state = &env->cur_state; - struct bpf_reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = regs + regno; int size, err = 0; size = bpf_size_to_bytes(bpf_size); @@ -993,7 +1099,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(env, state->regs, value_regno); + mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -1028,14 +1134,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * case, we know the offset is zero. */ if (reg_type == SCALAR_VALUE) - mark_reg_unknown(env, state->regs, value_regno); + mark_reg_unknown(env, regs, value_regno); else - mark_reg_known_zero(env, state->regs, + mark_reg_known_zero(env, regs, value_regno); - state->regs[value_regno].id = 0; - state->regs[value_regno].off = 0; - state->regs[value_regno].range = 0; - state->regs[value_regno].type = reg_type; + regs[value_regno].id = 0; + regs[value_regno].off = 0; + regs[value_regno].range = 0; + regs[value_regno].type = reg_type; } } else if (reg->type == PTR_TO_STACK) { @@ -1061,19 +1167,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (env->prog->aux->stack_depth < -off) env->prog->aux->stack_depth = -off; - if (t == BPF_WRITE) { - if (!env->allow_ptr_leaks && - state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && - size != BPF_REG_SIZE) { - verbose(env, "attempt to corrupt spilled pointer on stack\n"); - return -EACCES; - } + if (t == BPF_WRITE) err = check_stack_write(env, state, off, size, value_regno); - } else { + else err = check_stack_read(env, state, off, size, value_regno); - } } else if (reg_is_pkt_pointer(reg)) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { verbose(env, "cannot write into packet\n"); @@ -1087,7 +1186,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } err = check_packet_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(env, state->regs, value_regno); + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -1095,11 +1194,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && - state->regs[value_regno].type == SCALAR_VALUE) { + regs[value_regno].type == SCALAR_VALUE) { /* b/h/w load zero-extends, mark upper bits as known 0 */ - state->regs[value_regno].var_off = tnum_cast( - state->regs[value_regno].var_off, size); - __update_reg_bounds(&state->regs[value_regno]); + regs[value_regno].var_off = + tnum_cast(regs[value_regno].var_off, size); + __update_reg_bounds(®s[value_regno]); } return err; } @@ -1156,9 +1255,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = state->regs; - int off, i; + int off, i, slot, spi; if (regs[regno].type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ @@ -1198,7 +1297,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, } for (i = 0; i < access_size; i++) { - if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { + slot = -(off + i) - 1; + spi = slot / BPF_REG_SIZE; + if (state->allocated_stack <= slot || + state->stack[spi].slot_type[slot % BPF_REG_SIZE] != + STACK_MISC) { verbose(env, "invalid indirect read from stack off %d+%d size %d\n", off, i, access_size); return -EACCES; @@ -1211,7 +1314,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; switch (reg->type) { case PTR_TO_PACKET: @@ -1229,7 +1332,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -1514,7 +1617,7 @@ static int check_raw_mode(const struct bpf_func_proto *fn) */ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = state->regs, *reg; int i; @@ -1522,10 +1625,10 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) if (reg_is_pkt_pointer_any(®s[i])) mark_reg_unknown(env, regs, i); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - reg = &state->spilled_regs[i / BPF_REG_SIZE]; + reg = &state->stack[i].spilled_ptr; if (reg_is_pkt_pointer_any(reg)) __mark_reg_unknown(reg); } @@ -1533,9 +1636,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { - struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1603,6 +1705,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return err; } + regs = cur_regs(env); /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -1691,7 +1794,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg) { - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = cur_regs(env), *dst_reg; bool known = tnum_is_const(off_reg->var_off); s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; @@ -1703,13 +1806,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg = ®s[dst]; if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(env, &env->cur_state); + print_verifier_state(env, env->cur_state); verbose(env, "verifier internal error: known but bad sbounds\n"); return -EINVAL; } if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(env, &env->cur_state); + print_verifier_state(env, env->cur_state); verbose(env, "verifier internal error: known but bad ubounds\n"); return -EINVAL; @@ -1890,7 +1993,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); bool src_known, dst_known; s64 smin_val, smax_val; @@ -2111,7 +2214,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg; + struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code); int rc; @@ -2185,12 +2288,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, &env->cur_state); + print_verifier_state(env, env->cur_state); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, &env->cur_state); + print_verifier_state(env, env->cur_state); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -2200,7 +2303,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); int err; @@ -2421,10 +2524,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, /* keep the maximum range already checked */ regs[i].range = max(regs[i].range, new_range); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - reg = &state->spilled_regs[i / BPF_REG_SIZE]; + reg = &state->stack[i].spilled_ptr; if (reg->type == type && reg->id == dst_reg->id) reg->range = max_t(u16, reg->range, new_range); } @@ -2674,17 +2777,17 @@ static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, for (i = 0; i < MAX_BPF_REG; i++) mark_map_reg(regs, i, id, is_null); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null); + mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); } } static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_verifier_state *other_branch, *this_branch = env->cur_state; struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -2876,7 +2979,7 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) /* verify BPF_LD_IMM64 instruction */ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -2937,7 +3040,7 @@ static bool may_access_skb(enum bpf_prog_type type) */ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 mode = BPF_MODE(insn->code); int i, err; @@ -2999,7 +3102,7 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } - reg = &env->cur_state.regs[BPF_REG_0]; + reg = cur_regs(env) + BPF_REG_0; if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str[reg->type]); @@ -3363,6 +3466,57 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } +static bool stacksafe(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur, + struct idpair *idmap) +{ + int i, spi; + + /* if explored stack has more populated slots than current stack + * such stacks are not equivalent + */ + if (old->allocated_stack > cur->allocated_stack) + return false; + + /* walk slots of the explored stack and ignore any additional + * slots in the current stack, since explored(safe) state + * didn't use them + */ + for (i = 0; i < old->allocated_stack; i++) { + spi = i / BPF_REG_SIZE; + + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) + continue; + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + /* Ex: old explored (safe) state has STACK_SPILL in + * this stack slot, but current has has STACK_MISC -> + * this verifier states are not equivalent, + * return false to continue verification of this path + */ + return false; + if (i % BPF_REG_SIZE) + continue; + if (old->stack[spi].slot_type[0] != STACK_SPILL) + continue; + if (!regsafe(&old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, + idmap)) + /* when explored and current stack slot are both storing + * spilled registers, check that stored pointers types + * are the same as well. + * Ex: explored safe path could have stored + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} + * but current path has stored: + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} + * such verifier states are not equivalent. + * return false to continue verification of this path + */ + return false; + } + return true; +} + /* compare two verifier states * * all states stored in state_list are known to be valid, since @@ -3407,37 +3561,8 @@ static bool states_equal(struct bpf_verifier_env *env, goto out_free; } - for (i = 0; i < MAX_BPF_STACK; i++) { - if (old->stack_slot_type[i] == STACK_INVALID) - continue; - if (old->stack_slot_type[i] != cur->stack_slot_type[i]) - /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> - * this verifier states are not equivalent, - * return false to continue verification of this path - */ - goto out_free; - if (i % BPF_REG_SIZE) - continue; - if (old->stack_slot_type[i] != STACK_SPILL) - continue; - if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE], - &cur->spilled_regs[i / BPF_REG_SIZE], - idmap)) - /* when explored and current stack slot are both storing - * spilled registers, check that stored pointers types - * are the same as well. - * Ex: explored safe path could have stored - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} - * but current path has stored: - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} - * such verifier states are not equivalent. - * return false to continue verification of this path - */ - goto out_free; - else - continue; - } + if (!stacksafe(old, cur, idmap)) + goto out_free; ret = true; out_free: kfree(idmap); @@ -3473,17 +3598,19 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state, } } /* ... and stack slots */ - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) { - if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && + i < parent->allocated_stack / BPF_REG_SIZE; i++) { + if (parent->stack[i].slot_type[0] != STACK_SPILL) continue; - if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - if (parent->spilled_regs[i].live & REG_LIVE_READ) + if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) continue; - if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN)) + if (writes && + (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN)) continue; - if (state->spilled_regs[i].live & REG_LIVE_READ) { - parent->spilled_regs[i].live |= REG_LIVE_READ; + if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) { + parent->stack[i].spilled_ptr.live |= REG_LIVE_READ; touched = true; } } @@ -3513,6 +3640,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *cur = env->cur_state; int i; sl = env->explored_states[insn_idx]; @@ -3523,7 +3651,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; while (sl != STATE_LIST_MARK) { - if (states_equal(env, &sl->state, &env->cur_state)) { + if (states_equal(env, &sl->state, cur)) { /* reached equivalent register/stack state, * prune the search. * Registers read by the continuation are read by us. @@ -3534,7 +3662,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * they'll be immediately forgotten as we're pruning * this state and will pop a new one. */ - propagate_liveness(&sl->state, &env->cur_state); + propagate_liveness(&sl->state, cur); return 1; } sl = sl->next; @@ -3546,16 +3674,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) return -ENOMEM; /* add new state to the head of linked list */ - memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state)); + copy_verifier_state(&new_sl->state, cur); new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; /* connect new state to parentage chain */ - env->cur_state.parent = &new_sl->state; + cur->parent = &new_sl->state; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark @@ -3563,10 +3691,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * explored_states can get read marks.) */ for (i = 0; i < BPF_REG_FP; i++) - env->cur_state.regs[i].live = REG_LIVE_NONE; - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) - if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL) - env->cur_state.spilled_regs[i].live = REG_LIVE_NONE; + cur->regs[i].live = REG_LIVE_NONE; + for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++) + if (cur->stack[i].slot_type[0] == STACK_SPILL) + cur->stack[i].spilled_ptr.live = REG_LIVE_NONE; return 0; } @@ -3581,15 +3709,19 @@ static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, static int do_check(struct bpf_verifier_env *env) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state; struct bpf_insn *insns = env->prog->insnsi; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; - init_reg_state(env, regs); + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); + if (!state) + return -ENOMEM; + env->cur_state = state; + init_reg_state(env, state->regs); state->parent = NULL; insn_idx = 0; for (;;) { @@ -3637,7 +3769,7 @@ static int do_check(struct bpf_verifier_env *env) else verbose(env, "\nfrom %d to %d:", prev_insn_idx, insn_idx); - print_verifier_state(env, &env->cur_state); + print_verifier_state(env, state); do_print_state = false; } @@ -3651,6 +3783,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + regs = cur_regs(env); if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -3818,8 +3951,10 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: - insn_idx = pop_stack(env, &prev_insn_idx); - if (insn_idx < 0) { + err = pop_stack(env, &prev_insn_idx, &insn_idx); + if (err < 0) { + if (err != -ENOENT) + return err; break; } else { do_print_state = true; @@ -4359,9 +4494,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); + free_verifier_state(env->cur_state); + env->cur_state = NULL; skip_full_check: - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); free_states(env); if (ret == 0) @@ -4464,9 +4601,11 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); + free_verifier_state(env->cur_state); + env->cur_state = NULL; skip_full_check: - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); free_states(env); mutex_unlock(&bpf_verifier_lock); From 5b32fe070c2ddf31adc42c26dab8af346b652538 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:13 -0400 Subject: [PATCH 1636/2177] net: dsa: get ports within parsing code There is no point into hiding the -EINVAL error code in ERR_PTR from a dsa_get_ports function, simply get the "ports" node directly from within the dsa_parse_ports_dn function. This also has the effect to make the pdata and device tree handling code symmetrical inside _dsa_register_switch. At the same time, rename dsa_parse_ports_dn to dsa_parse_ports_of because _of is a more common suffix for device tree parsing functions. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 4d1ccf87c59c..9d57f8dee9a1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -590,11 +590,17 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) return 0; } -static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) +static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) { - struct device_node *port; - int err; + struct device_node *ports, *port; u32 reg; + int err; + + ports = of_get_child_by_name(dn, "ports"); + if (!ports) { + dev_err(ds->dev, "no ports child node found\n"); + return -EINVAL; + } for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", ®); @@ -665,26 +671,11 @@ static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) return 0; } -static struct device_node *dsa_get_ports(struct dsa_switch *ds, - struct device_node *np) -{ - struct device_node *ports; - - ports = of_get_child_by_name(np, "ports"); - if (!ports) { - dev_err(ds->dev, "no ports child node found\n"); - return ERR_PTR(-EINVAL); - } - - return ports; -} - static int _dsa_register_switch(struct dsa_switch *ds) { struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; - struct device_node *ports; u32 tree, index; int i, err; @@ -693,11 +684,7 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (err) return err; - ports = dsa_get_ports(ds, np); - if (IS_ERR(ports)) - return PTR_ERR(ports); - - err = dsa_parse_ports_dn(ports, ds); + err = dsa_parse_ports_of(np, ds); if (err) return err; } else { From fd223e2e66eb076b5dda586db9a5a3c99f76f99a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:14 -0400 Subject: [PATCH 1637/2177] net: dsa: add port parse functions Add symmetrical DSA port parsing functions for pdata and device tree, used to parse and validate a given port node or platform data. They don't do much for the moment but will be extended later on to assign a port type and get device references. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9d57f8dee9a1..a0ee91cd3814 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -590,9 +590,17 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) return 0; } +static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) +{ + dp->dn = dn; + + return 0; +} + static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) { struct device_node *ports, *port; + struct dsa_port *dp; u32 reg; int err; @@ -610,22 +618,45 @@ static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) if (reg >= ds->num_ports) return -EINVAL; - ds->ports[reg].dn = port; + dp = &ds->ports[reg]; + + err = dsa_port_parse_of(dp, port); + if (err) + return err; } return 0; } +static int dsa_port_parse(struct dsa_port *dp, const char *name, + struct device *dev) +{ + dp->name = name; + + return 0; +} + static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) { bool valid_name_found = false; + struct dsa_port *dp; + struct device *dev; + const char *name; unsigned int i; + int err; for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!cd->port_names[i]) + name = cd->port_names[i]; + dev = cd->netdev[i]; + dp = &ds->ports[i]; + + if (!name) continue; - ds->ports[i].name = cd->port_names[i]; + err = dsa_port_parse(dp, name, dev); + if (err) + return err; + valid_name_found = true; } From 6d4e5c570c2d66c806ecc6bd851fcf881fe8a38e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:15 -0400 Subject: [PATCH 1638/2177] net: dsa: get port type at parse time Assign a port's type at parsed time instead of waiting for the tree to be completed. Because this is now done earlier, we can use the port's type in dsa_port_is_* helpers instead of digging again in topology description. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a0ee91cd3814..895c38427ef0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -87,23 +87,17 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst, */ static bool dsa_port_is_valid(struct dsa_port *port) { - return !!(port->dn || port->name); + return port->type != DSA_PORT_TYPE_UNUSED; } static bool dsa_port_is_dsa(struct dsa_port *port) { - if (port->name && !strcmp(port->name, "dsa")) - return true; - else - return !!of_parse_phandle(port->dn, "link", 0); + return port->type == DSA_PORT_TYPE_DSA; } static bool dsa_port_is_cpu(struct dsa_port *port) { - if (port->name && !strcmp(port->name, "cpu")) - return true; - else - return !!of_parse_phandle(port->dn, "ethernet", 0); + return port->type == DSA_PORT_TYPE_CPU; } static bool dsa_ds_find_port_dn(struct dsa_switch *ds, @@ -183,8 +177,6 @@ static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) err = dsa_port_complete(dst, ds, port, index); if (err != 0) return err; - - port->type = DSA_PORT_TYPE_DSA; } return 0; @@ -500,8 +492,6 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->master = ethernet_dev; } - port->type = DSA_PORT_TYPE_CPU; - tag_protocol = ds->ops->get_tag_protocol(ds); tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) { @@ -534,8 +524,6 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) err = dsa_cpu_parse(port, index, dst, ds); if (err) return err; - } else { - port->type = DSA_PORT_TYPE_USER; } } @@ -592,6 +580,17 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { + struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); + struct device_node *link = of_parse_phandle(dn, "link", 0); + + if (ethernet) { + dp->type = DSA_PORT_TYPE_CPU; + } else if (link) { + dp->type = DSA_PORT_TYPE_DSA; + } else { + dp->type = DSA_PORT_TYPE_USER; + } + dp->dn = dn; return 0; @@ -631,6 +630,14 @@ static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { + if (!strcmp(name, "cpu")) { + dp->type = DSA_PORT_TYPE_CPU; + } else if (!strcmp(name, "dsa")) { + dp->type = DSA_PORT_TYPE_DSA; + } else { + dp->type = DSA_PORT_TYPE_USER; + } + dp->name = name; return 0; From cbabb0ac01052f79cf96d7b7e3d3451ffd275864 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:17 -0400 Subject: [PATCH 1639/2177] net: dsa: get master device at port parsing time Fetching the master device can be done directly when a port is parsed from device tree or pdata, instead of waiting until dsa_dst_parse. Now that -EPROBE_DEFER is returned before we add the switch to the tree, there is no need to check for this error after dsa_dst_parse. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 895c38427ef0..8cd84c1b3dc0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -470,27 +470,9 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, { const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - struct net_device *ethernet_dev; - struct device_node *ethernet; - if (port->dn) { - ethernet = of_parse_phandle(port->dn, "ethernet", 0); - if (!ethernet) - return -EINVAL; - ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) - return -EPROBE_DEFER; - } else { - ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); - if (!ethernet_dev) - return -EPROBE_DEFER; - dev_put(ethernet_dev); - } - - if (!dst->cpu_dp) { + if (!dst->cpu_dp) dst->cpu_dp = port; - dst->cpu_dp->master = ethernet_dev; - } tag_protocol = ds->ops->get_tag_protocol(ds); tag_ops = dsa_resolve_tag_protocol(tag_protocol); @@ -584,7 +566,14 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) struct device_node *link = of_parse_phandle(dn, "link", 0); if (ethernet) { + struct net_device *master; + + master = of_find_net_device_by_node(ethernet); + if (!master) + return -EPROBE_DEFER; + dp->type = DSA_PORT_TYPE_CPU; + dp->master = master; } else if (link) { dp->type = DSA_PORT_TYPE_DSA; } else { @@ -631,7 +620,16 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { if (!strcmp(name, "cpu")) { + struct net_device *master; + + master = dsa_dev_to_net_device(dev); + if (!master) + return -EPROBE_DEFER; + + dev_put(master); + dp->type = DSA_PORT_TYPE_CPU; + dp->master = master; } else if (!strcmp(name, "dsa")) { dp->type = DSA_PORT_TYPE_DSA; } else { @@ -773,14 +771,8 @@ static int _dsa_register_switch(struct dsa_switch *ds) } err = dsa_dst_parse(dst); - if (err) { - if (err == -EPROBE_DEFER) { - dsa_dst_del_ds(dst, ds, ds->index); - return err; - } - + if (err) goto out_del_dst; - } err = dsa_dst_apply(dst); if (err) { From 1838fa89a22cbc9ec87e995683e241a82d87e6df Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:18 -0400 Subject: [PATCH 1640/2177] net: dsa: get port name at parse time Get the optional "label" property and assign a default one directly at parse time instead of doing it when creating the slave. For legacy, simply assign the port name stored in cd->port_names. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 10 +++++----- net/dsa/legacy.c | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8cd84c1b3dc0..3c134ff26863 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -260,11 +260,6 @@ static int dsa_user_port_apply(struct dsa_port *port) const char *name = port->name; int err; - if (port->dn) - name = of_get_property(port->dn, "label", NULL); - if (!name) - name = "eth%d"; - err = dsa_slave_create(port, name); if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", @@ -564,6 +559,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); struct device_node *link = of_parse_phandle(dn, "link", 0); + const char *name = of_get_property(dn, "label", NULL); if (ethernet) { struct net_device *master; @@ -577,7 +573,11 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) } else if (link) { dp->type = DSA_PORT_TYPE_DSA; } else { + if (!name) + name = "eth%d"; + dp->type = DSA_PORT_TYPE_USER; + dp->name = name; } dp->dn = dn; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index ed7aae342fca..afe6e1539bd0 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -115,6 +115,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, name = cd->port_names[i]; if (name == NULL) continue; + dp->name = name; if (!strcmp(name, "cpu")) { if (dst->cpu_dp) { From 951259aa60180e2897d28b538bf68a3a213da471 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Oct 2017 15:55:19 -0400 Subject: [PATCH 1641/2177] net: dsa: remove name arg from slave create Now that slave dsa_port always have their name set, there is no need to pass it to dsa_slave_create() anymore. Remove this argument. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 3 +-- net/dsa/dsa_priv.h | 2 +- net/dsa/legacy.c | 2 +- net/dsa/slave.c | 3 ++- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3c134ff26863..797d1156b4e6 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -257,10 +257,9 @@ static void dsa_cpu_port_unapply(struct dsa_port *port) static int dsa_user_port_apply(struct dsa_port *port) { struct dsa_switch *ds = port->ds; - const char *name = port->name; int err; - err = dsa_slave_create(port, name); + err = dsa_slave_create(port); if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", port->index, err); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1e65afd6989e..253a613c40cd 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -163,7 +163,7 @@ void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -int dsa_slave_create(struct dsa_port *port, const char *name); +int dsa_slave_create(struct dsa_port *dp); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index afe6e1539bd0..0511fe2feff7 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -197,7 +197,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, if (dsa_is_user_port(ds, i)) continue; - ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]); + ret = dsa_slave_create(&ds->ports[i]); if (ret < 0) netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, cd->port_names[i], ret); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 808e205227c3..48b954a76b0d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1142,11 +1142,12 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val) call_dsa_notifiers(val, dev, &rinfo.info); } -int dsa_slave_create(struct dsa_port *port, const char *name) +int dsa_slave_create(struct dsa_port *port) { struct dsa_port *cpu_dp = port->cpu_dp; struct net_device *master = cpu_dp->master; struct dsa_switch *ds = port->ds; + const char *name = port->name; struct net_device *slave_dev; struct dsa_slave_priv *p; int ret; From 4f6265d485ea0a2507692ded8ed47b323f49587c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 27 Oct 2017 17:37:12 -0700 Subject: [PATCH 1642/2177] netlink: Allow ext_ack to carry non-error messages The NLMSGERR API already carries data (eg, a cookie) on the success path. Allow a message string to be returned as well. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 767c84e10e20..26ded4239611 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2313,17 +2313,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, * requests to cap the error message, and get extra error data if * requested. */ + if (nlk_has_extack && extack && extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (err) { if (!(nlk->flags & NETLINK_F_CAP_ACK)) payload += nlmsg_len(nlh); else flags |= NLM_F_CAPPED; - if (nlk_has_extack && extack) { - if (extack->_msg) - tlvlen += nla_total_size(strlen(extack->_msg) + 1); - if (extack->bad_attr) - tlvlen += nla_total_size(sizeof(u32)); - } + if (nlk_has_extack && extack && extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); } else { flags |= NLM_F_CAPPED; @@ -2348,10 +2347,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); if (nlk_has_extack && extack) { + if (extack->_msg) { + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + } if (err) { - if (extack->_msg) - WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, - extack->_msg)); if (extack->bad_attr && !WARN_ON((u8 *)extack->bad_attr < in_skb->data || (u8 *)extack->bad_attr >= in_skb->data + From 6c31e5a91fde2e718e59c8a627c56451f88be54c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 27 Oct 2017 17:37:13 -0700 Subject: [PATCH 1643/2177] net: Add extack to fib_notifier_info Add extack to fib_notifier_info and plumb through stack to call_fib_rule_notifiers, call_fib_entry_notifiers and call_fib6_entry_notifiers. This allows notifer handlers to return messages to user. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 1 + net/core/fib_rules.c | 9 ++++++--- net/ipv4/fib_trie.c | 13 ++++++++----- net/ipv6/ip6_fib.c | 15 +++++++++------ 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 54cd6b839d2f..c91ec732afd6 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -9,6 +9,7 @@ struct fib_notifier_info { struct net *net; int family; + struct netlink_ext_ack *extack; }; enum fib_event_type { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d810..fafd0a41e3f7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, static int call_fib_rule_notifiers(struct net *net, enum fib_event_type event_type, struct fib_rule *rule, - struct fib_rules_ops *ops) + struct fib_rules_ops *ops, + struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = ops->family, + .info.extack = extack, .rule = rule, }; @@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, + NULL); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index aaa1ba09afaa..5ddc4aefff12 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -102,9 +102,11 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_alias *fa) + int dst_len, struct fib_alias *fa, + struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { + .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, @@ -1214,7 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->fa_default = -1; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - key, plen, new_fa); + key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1269,7 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, event, key, plen, new_fa); + call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1569,7 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, return -ESRCH; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete); + fa_to_delete, extack); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1886,7 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb) call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, - KEYLENGTH - fa->fa_slen, fa); + KEYLENGTH - fa->fa_slen, fa, + NULL); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1ada9672d198..2e2804f5823e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -353,9 +353,11 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, - struct rt6_info *rt) + struct rt6_info *rt, + struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { + .info.extack = extack, .rt = rt, }; @@ -868,7 +870,8 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc) + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack) { struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&rt->rt6i_table->tb6_lock)); @@ -1011,7 +1014,7 @@ add: rcu_assign_pointer(rt->rt6i_node, fn); rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, - rt); + rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -1040,7 +1043,7 @@ add: rt->dst.rt6_next = iter->dst.rt6_next; rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, - rt); + rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { @@ -1225,7 +1228,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, mxc); + err = fib6_add_rt2node(fn, rt, info, mxc, extack); if (!err) { fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); @@ -1686,7 +1689,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fib6_purge_rt(rt, fn, net); - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt); + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); From 1f279233affe115dd3f65c89716a4f4315bb4cfe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 27 Oct 2017 17:37:14 -0700 Subject: [PATCH 1644/2177] mlxsw: spectrum_router: Return extack message on abort due to fib rules Adding a FIB rule on a spectrum platform silently aborts FIB offload: $ ip ru add pref 99 from all to 192.168.1.1 table 10 $ dmesg -c [ 623.144736] mlxsw_spectrum 0000:03:00.0: FIB abort triggered. Note that FIB entries are no longer being offloaded to this device. This patch reworks FIB rule handling to return a message to the user: $ ip ru add pref 99 from all to 8.8.8.8 table 11 Error: spectrum: FIB rules not supported. Aborting offload. spectrum currently only checks whether the fib rule is a default rule or an l3mdev rule, both of which it knows how to handle. Any other it aborts FIB offload. Move the processing to check the rule type inline with the user request. If the rule is an unsupported one, then a work queue entry is used to abort the offload. Change the rule delete handling to just return since it does nothing at the moment. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 108 +++++++++++------- 1 file changed, 66 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 27b632cac991..9fe4cdb23189 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5234,7 +5234,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - struct fib_rule *rule; bool replace, append; int err; @@ -5256,12 +5255,11 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - rule = fib_work->fr_info.rule; - if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_rule_put(rule); + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: @@ -5279,7 +5277,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - struct fib_rule *rule; bool replace; int err; @@ -5298,12 +5295,11 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); mlxsw_sp_rt6_release(fib_work->fen6_info.rt); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - rule = fib_work->fr_info.rule; - if (!fib6_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_rule_put(rule); + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); break; } rtnl_unlock(); @@ -5315,7 +5311,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - struct fib_rule *rule; bool replace; int err; @@ -5347,12 +5342,11 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) &fib_work->ven_info); dev_put(fib_work->ven_info.dev); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - rule = fib_work->fr_info.rule; - if (!ipmr_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_rule_put(rule); + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); break; } rtnl_unlock(); @@ -5363,7 +5357,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { struct fib_entry_notifier_info *fen_info; - struct fib_rule_notifier_info *fr_info; struct fib_nh_notifier_info *fnh_info; switch (fib_work->event) { @@ -5379,13 +5372,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, */ fib_info_hold(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - fr_info = container_of(info, struct fib_rule_notifier_info, - info); - fib_work->fr_info = *fr_info; - fib_rule_get(fib_work->fr_info.rule); - break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: fnh_info = container_of(info, struct fib_nh_notifier_info, @@ -5400,7 +5386,6 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; - struct fib_rule_notifier_info *fr_info; switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ @@ -5411,13 +5396,6 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, fib_work->fen6_info = *fen6_info; rt6_hold(fib_work->fen6_info.rt); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - fr_info = container_of(info, struct fib_rule_notifier_info, - info); - fib_work->fr_info = *fr_info; - fib_rule_get(fib_work->fr_info.rule); - break; } } @@ -5437,12 +5415,47 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); dev_hold(fib_work->ven_info.dev); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); + } +} + +static int mlxsw_sp_router_fib_rule_event(unsigned long event, + struct fib_notifier_info *info, + struct mlxsw_sp *mlxsw_sp) +{ + struct netlink_ext_ack *extack = info->extack; + struct fib_rule_notifier_info *fr_info; + struct fib_rule *rule; + int err = 0; + + /* nothing to do at the moment */ + if (event == FIB_EVENT_RULE_DEL) + return 0; + + if (mlxsw_sp->router->aborted) + return 0; + + fr_info = container_of(info, struct fib_rule_notifier_info, info); + rule = fr_info->rule; + + switch (info->family) { + case AF_INET: + if (!fib4_rule_default(rule) && !rule->l3mdev) + err = -1; + break; + case AF_INET6: + if (!fib6_rule_default(rule) && !rule->l3mdev) + err = -1; + break; + case RTNL_FAMILY_IPMR: + if (!ipmr_rule_default(rule) && !rule->l3mdev) + err = -1; break; } + + if (err < 0) + NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload"); + + return err; } /* Called with rcu_read_lock() */ @@ -5452,17 +5465,28 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; + int err; if (!net_eq(info->net, &init_net) || (info->family != AF_INET && info->family != AF_INET6 && info->family != RTNL_FAMILY_IPMR)) return NOTIFY_DONE; + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + + switch (event) { + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + err = mlxsw_sp_router_fib_rule_event(event, info, + router->mlxsw_sp); + if (!err) + return NOTIFY_DONE; + } + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); if (WARN_ON(!fib_work)) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, fib_nb); fib_work->mlxsw_sp = router->mlxsw_sp; fib_work->event = event; From aa7365e19f8410659ec30503cd8ce866a176c9f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 28 Oct 2017 05:05:46 +0000 Subject: [PATCH 1645/2177] net: bcmgenet: Avoid calling platform_device_put() twice in bcmgenet_mii_exit() Remove platform_device_put() call after platform_device_unregister() from function bcmgenet_mii_exit(), otherwise, we will call platform_device_put() twice. Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver") Signed-off-by: Wei Yongjun Acked-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index ba3fcfdaa0bc..5333274a283c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -571,5 +571,4 @@ void bcmgenet_mii_exit(struct net_device *dev) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); platform_device_unregister(priv->mii_pdev); - platform_device_put(priv->mii_pdev); } From db32919005d81a375093936a1f625f2d08c2d299 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:46:21 +0800 Subject: [PATCH 1646/2177] ip_vti: remove the useless err_count check in vti_xmit Unlike ipip and gre, ip_vti never uses err_count in vti4_err, so no need to check err_count in vti_xmit, it's value always 0. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 58465c0a8682..949f432a5f04 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -198,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - mtu = dst_mtu(dst); if (skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); From e8992e408273334375bfe8f03267dcdef3a8c2e6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 28 Oct 2017 08:25:30 -0400 Subject: [PATCH 1647/2177] forcedeth: replace pci_alloc_consistent with dma_alloc_coherent The functions pci_alloc_consistent is obsolete. So it is replaced with dma_alloc_coherent Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 61 +++++++++++++++++-------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 88128ce61471..31a943860f32 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1024,12 +1024,18 @@ static void free_rings(struct net_device *dev) if (!nv_optimized(np)) { if (np->rx_ring.orig) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size), - np->rx_ring.orig, np->ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (np->rx_ring_size + + np->tx_ring_size), + np->rx_ring.orig, np->ring_addr); } else { if (np->rx_ring.ex) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size), - np->rx_ring.ex, np->ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (np->rx_ring_size + + np->tx_ring_size), + np->rx_ring.ex, np->ring_addr); } kfree(np->rx_skb); kfree(np->tx_skb); @@ -4596,13 +4602,17 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri /* allocate new rings */ if (!nv_optimized(np)) { - rxtx_ring = pci_alloc_consistent(np->pci_dev, - sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending), - &ring_addr); + rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (ring->rx_pending + + ring->tx_pending), + &ring_addr, GFP_ATOMIC); } else { - rxtx_ring = pci_alloc_consistent(np->pci_dev, - sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending), - &ring_addr); + rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (ring->rx_pending + + ring->tx_pending), + &ring_addr, GFP_ATOMIC); } rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL); tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL); @@ -4610,12 +4620,18 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri /* fall back to old rings */ if (!nv_optimized(np)) { if (rxtx_ring) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending), - rxtx_ring, ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (ring->rx_pending + + ring->tx_pending), + rxtx_ring, ring_addr); } else { if (rxtx_ring) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending), - rxtx_ring, ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (ring->rx_pending + + ring->tx_pending), + rxtx_ring, ring_addr); } kfree(rx_skbuff); @@ -5740,16 +5756,21 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) np->tx_ring_size = TX_RING_DEFAULT; if (!nv_optimized(np)) { - np->rx_ring.orig = pci_alloc_consistent(pci_dev, - sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size), - &np->ring_addr); + np->rx_ring.orig = dma_alloc_coherent(&pci_dev->dev, + sizeof(struct ring_desc) * + (np->rx_ring_size + + np->tx_ring_size), + &np->ring_addr, + GFP_ATOMIC); if (!np->rx_ring.orig) goto out_unmap; np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size]; } else { - np->rx_ring.ex = pci_alloc_consistent(pci_dev, - sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size), - &np->ring_addr); + np->rx_ring.ex = dma_alloc_coherent(&pci_dev->dev, + sizeof(struct ring_desc_ex) * + (np->rx_ring_size + + np->tx_ring_size), + &np->ring_addr, GFP_ATOMIC); if (!np->rx_ring.ex) goto out_unmap; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; From 68ad08c4f8ee6254ab814d15b9765f7f82ed0ae7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 28 Oct 2017 14:38:45 -0500 Subject: [PATCH 1648/2177] net: decnet: dn_nsp_in: use swap macro in dn_nsp_rx_packet Make use of the swap macro and remove unnecessary variable tmp. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/decnet/dn_nsp_in.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 7ac086d5c0c0..1b2120645730 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2, * Swap src & dst and look up in the normal way. */ if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { - __le16 tmp = cb->dst_port; - cb->dst_port = cb->src_port; - cb->src_port = tmp; - tmp = cb->dst; - cb->dst = cb->src; - cb->src = tmp; + swap(cb->dst_port, cb->src_port); + swap(cb->dst, cb->src); } /* From 3a7943ba5b7763488753305fddb0a50e911e0617 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 28 Oct 2017 15:39:48 -0500 Subject: [PATCH 1649/2177] net: decnet: dn_nsp_out: use swap macro in dn_mk_ack_header Make use of the swap macro and remove unnecessary variable tmp. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/decnet/dn_nsp_out.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index e50a4adfcf7e..56a52a004c56 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c ackcrs |= 0x8000; /* If this is an "other data/ack" message, swap acknum and ackcrs */ - if (other) { - unsigned short tmp = acknum; - acknum = ackcrs; - ackcrs = tmp; - } + if (other) + swap(acknum, ackcrs); /* Set "cross subchannel" bit in ackcrs */ ackcrs |= 0x2000; From 54df7ef51193641010a934c231c5aba117aea46d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 28 Oct 2017 15:48:47 -0500 Subject: [PATCH 1650/2177] net: dccp: ccids: lib: packet_history: use swap macro in tfrc_rx_hist_swap Make use of the swap macro and remove unnecessary variable tmp. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 08df7a3acb3d..876e18592d71 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { const u8 idx_a = tfrc_rx_hist_index(h, a), idx_b = tfrc_rx_hist_index(h, b); - struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; - h->ring[idx_a] = h->ring[idx_b]; - h->ring[idx_b] = tmp; + swap(h->ring[idx_a], h->ring[idx_b]); } /* From 81ba4e1ac4f1d3e9d980ca0f003dd3038559b3fa Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 28 Oct 2017 17:17:13 -0700 Subject: [PATCH 1651/2177] selftests/bpf: remove useless bpf_trace_printk sockmap test is using two programs that use bpf_trace_printk() which prints into trace_pipe, but nothing is reading it. Remove it. Fixes: 6f6d33f3b3d0 ("bpf: selftests add sockmap tests") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/sockmap_parse_prog.c | 3 --- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c index fae3b96c3aa4..a1dec2b6d9c5 100644 --- a/tools/testing/selftests/bpf/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c @@ -29,9 +29,6 @@ int bpf_prog1(struct __sk_buff *skb) * fields. */ d[7] = 1; - - bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n", - d[0], lport, bpf_ntohl(rport)); return skb->len; } diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 2cd2d552938b..d7bea972cb21 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -58,8 +58,6 @@ int bpf_prog2(struct __sk_buff *skb) d[6] = 0xe; d[7] = 0xf; - bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); - if (!map) return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); From 28ef7de70dde7762cb81eecf411e2c78a25c457d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 29 Oct 2017 13:30:25 +0000 Subject: [PATCH 1652/2177] mkiss: remove redundant assignment of len to ax->mtu Variable len is being assigned a value that is never read, hence the assignment is redundant and can be removed. Cleans up clang warning: drivers/net/hamradio/mkiss.c:443:3: warning: Value stored to 'len' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 54bf8e6e4a09..c180b480f8ef 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -440,7 +440,6 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) ax_changedmtu(ax); if (len > ax->mtu) { /* Sigh, shouldn't occur BUT ... */ - len = ax->mtu; printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name); dev->stats.tx_dropped++; netif_start_queue(dev); From 2d1d7df8a3652697da7f7929791d555e6c5981c2 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:28 +0100 Subject: [PATCH 1653/2177] net: mvpp2: set the Rx FIFO size depending on the port speeds for PPv2.2 The Rx FIFO size was set to the same value for all ports. This patch sets it depending on the maximum speed a given port can handle. This is only working for PPv2.2. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 52 ++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 794a3b6aa573..2b0ae35d2168 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -504,9 +504,13 @@ #define MVPP2_TX_DESC_ALIGN (MVPP2_DESC_ALIGNED_SIZE - 1) /* RX FIFO constants */ -#define MVPP2_RX_FIFO_PORT_DATA_SIZE 0x2000 -#define MVPP2_RX_FIFO_PORT_ATTR_SIZE 0x80 -#define MVPP2_RX_FIFO_PORT_MIN_PKT 0x80 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB 0x8000 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB 0x2000 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB 0x1000 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB 0x200 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB 0x80 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB 0x40 +#define MVPP2_RX_FIFO_PORT_MIN_PKT 0x80 /* RX buffer constants */ #define MVPP2_SKB_SHINFO_SIZE \ @@ -7764,9 +7768,42 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv) for (port = 0; port < MVPP2_MAX_PORTS; port++) { mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port), - MVPP2_RX_FIFO_PORT_DATA_SIZE); + MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB); mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), - MVPP2_RX_FIFO_PORT_ATTR_SIZE); + MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB); + } + + mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG, + MVPP2_RX_FIFO_PORT_MIN_PKT); + mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1); +} + +static void mvpp22_rx_fifo_init(struct mvpp2 *priv) +{ + int port; + + /* The FIFO size parameters are set depending on the maximum speed a + * given port can handle: + * - Port 0: 10Gbps + * - Port 1: 2.5Gbps + * - Ports 2 and 3: 1Gbps + */ + + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(0), + MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(0), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB); + + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(1), + MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(1), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB); + + for (port = 2; port < MVPP2_MAX_PORTS; port++) { + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port), + MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB); } mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG, @@ -7870,7 +7907,10 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) } /* Rx Fifo Init */ - mvpp2_rx_fifo_init(priv); + if (priv->hw_version == MVPP21) + mvpp2_rx_fifo_init(priv); + else + mvpp22_rx_fifo_init(priv); if (priv->hw_version == MVPP21) writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT, From 7c10f9742d76ec18bed5de14f5f4ed08859f7b7a Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:29 +0100 Subject: [PATCH 1654/2177] net: mvpp2: initialize the Tx FIFO size So far only the Rx FIFO size was initialized. For PPv2.2 the Tx FIFO size can be set as well. This patch initializes the Tx FIFO size for PPv2.2 controllers to 3K. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 2b0ae35d2168..7a6e6ae0a074 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -38,11 +38,12 @@ #include #include -/* RX Fifo Registers */ +/* Fifo Registers */ #define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port)) #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port) (0x20 + 4 * (port)) #define MVPP2_RX_MIN_PKT_SIZE_REG 0x60 #define MVPP2_RX_FIFO_INIT_REG 0x64 +#define MVPP22_TX_FIFO_SIZE_REG(port) (0x8860 + 4 * (port)) /* RX DMA Top Registers */ #define MVPP2_RX_CTRL_REG(port) (0x140 + 4 * (port)) @@ -512,6 +513,10 @@ #define MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB 0x40 #define MVPP2_RX_FIFO_PORT_MIN_PKT 0x80 +/* TX FIFO constants */ +#define MVPP22_TX_FIFO_DATA_SIZE_10KB 0xa +#define MVPP22_TX_FIFO_DATA_SIZE_3KB 0x3 + /* RX buffer constants */ #define MVPP2_SKB_SHINFO_SIZE \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) @@ -7811,6 +7816,16 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv) mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1); } +/* Initialize Tx FIFO's */ +static void mvpp22_tx_fifo_init(struct mvpp2 *priv) +{ + int port; + + for (port = 0; port < MVPP2_MAX_PORTS; port++) + mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), + MVPP22_TX_FIFO_DATA_SIZE_3KB); +} + static void mvpp2_axi_init(struct mvpp2 *priv) { u32 val, rdval, wrval; @@ -7906,11 +7921,13 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) return err; } - /* Rx Fifo Init */ - if (priv->hw_version == MVPP21) + /* Fifo Init */ + if (priv->hw_version == MVPP21) { mvpp2_rx_fifo_init(priv); - else + } else { mvpp22_rx_fifo_init(priv); + mvpp22_tx_fifo_init(priv); + } if (priv->hw_version == MVPP21) writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT, From 1d7d15d79fb4660bec3a86e748c50aac7c5d2121 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:30 +0100 Subject: [PATCH 1655/2177] net: mvpp2: initialize the RSS tables This patch initialize the RSS tables to evenly (depending on the packets RSS hashes) distribute the packets across port Rx queues. This helps to handle packets on different CPUs to improve performances, as more queues will be used in parallel. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 7a6e6ae0a074..54d80df1c1ac 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -83,6 +83,16 @@ #define MVPP2_PRS_TCAM_CTRL_REG 0x1230 #define MVPP2_PRS_TCAM_EN_MASK BIT(0) +/* RSS Registers */ +#define MVPP22_RSS_INDEX 0x1500 +#define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) ((idx) << 8) +#define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8) +#define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16) +#define MVPP22_RSS_TABLE_ENTRY 0x1508 +#define MVPP22_RSS_TABLE 0x1510 +#define MVPP22_RSS_TABLE_POINTER(p) (p) +#define MVPP22_RSS_WIDTH 0x150c + /* Classifier Registers */ #define MVPP2_CLS_MODE_REG 0x1800 #define MVPP2_CLS_MODE_ACTIVE_MASK BIT(0) @@ -746,6 +756,10 @@ enum mvpp2_prs_l3_cast { #define MVPP2_CLS_FLOWS_TBL_SIZE 512 #define MVPP2_CLS_FLOWS_TBL_DATA_WORDS 3 #define MVPP2_CLS_LKP_TBL_SIZE 64 +#define MVPP2_CLS_RX_QUEUES 256 + +/* RSS constants */ +#define MVPP22_RSS_TABLE_ENTRIES 32 /* BM constants */ #define MVPP2_BM_POOLS_NUM 8 @@ -6788,6 +6802,39 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) } } +static void mvpp22_init_rss(struct mvpp2_port *port) +{ + struct mvpp2 *priv = port->priv; + int i; + + /* Set the table width: replace the whole classifier Rx queue number + * with the ones configured in RSS table entries. + */ + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(0)); + mvpp2_write(priv, MVPP22_RSS_WIDTH, 8); + + /* Loop through the classifier Rx Queues and map them to a RSS table. + * Map them all to the first table (0) by default. + */ + for (i = 0; i < MVPP2_CLS_RX_QUEUES; i++) { + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(i)); + mvpp2_write(priv, MVPP22_RSS_TABLE, + MVPP22_RSS_TABLE_POINTER(0)); + } + + /* Configure the first table to evenly distribute the packets across + * real Rx Queues. The table entries map a hash to an port Rx Queue. + */ + for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) { + u32 sel = MVPP22_RSS_INDEX_TABLE(0) | + MVPP22_RSS_INDEX_TABLE_ENTRY(i); + mvpp2_write(priv, MVPP22_RSS_INDEX, sel); + + mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, i % port->nrxqs); + } + +} + static int mvpp2_open(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); @@ -6862,6 +6909,9 @@ static int mvpp2_open(struct net_device *dev) mvpp2_start_dev(port); + if (priv->hw_version == MVPP22) + mvpp22_init_rss(port); + return 0; err_free_link_irq: From 1d17db08c056c1f7f4abbff6aff8711b7c3a3b7f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:31 +0100 Subject: [PATCH 1656/2177] net: mvpp2: limit TSO segments and use stop/wake thresholds Too many TSO descriptors can be required for the default queue size, when using small MSS values for example. Prevent this by adding a maximum number of allowed TSO segments (300). In addition set a stop and a wake thresholds to stop the queue when there's no room for a 1 "worst case scenario skb". Wake up the queue when the number of descriptors is low enough. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 54d80df1c1ac..340b4d682951 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -493,6 +493,13 @@ /* Maximum number of TXQs used by single port */ #define MVPP2_MAX_TXQ 8 +/* MVPP2_MAX_TSO_SEGS is the maximum number of fragments to allow in the GSO + * skb. As we need a maxium of two descriptors per fragments (1 header, 1 data), + * multiply this value by two to count the maximum number of skb descs needed. + */ +#define MVPP2_MAX_TSO_SEGS 300 +#define MVPP2_MAX_SKB_DESCS (MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + /* Dfault number of RXQs in use */ #define MVPP2_DEFAULT_RXQ 4 @@ -1045,6 +1052,9 @@ struct mvpp2_txq_pcpu { */ int count; + int wake_threshold; + int stop_threshold; + /* Number of Tx DMA descriptors reserved for each CPU */ int reserved_num; @@ -5393,7 +5403,7 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, txq_pcpu->count -= tx_done; if (netif_tx_queue_stopped(nq)) - if (txq_pcpu->size - txq_pcpu->count >= MAX_SKB_FRAGS + 1) + if (txq_pcpu->count <= txq_pcpu->wake_threshold) netif_tx_wake_queue(nq); } @@ -5636,6 +5646,9 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->txq_put_index = 0; txq_pcpu->txq_get_index = 0; + txq_pcpu->stop_threshold = txq->size - MVPP2_MAX_SKB_DESCS; + txq_pcpu->wake_threshold = txq_pcpu->stop_threshold / 2; + txq_pcpu->tso_headers = dma_alloc_coherent(port->dev->dev.parent, txq_pcpu->size * TSO_HEADER_SIZE, @@ -6508,7 +6521,7 @@ out: wmb(); mvpp2_aggr_txq_pend_desc_add(port, frags); - if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) + if (txq_pcpu->count >= txq_pcpu->stop_threshold) netif_tx_stop_queue(nq); u64_stats_update_begin(&stats->syncp); @@ -7732,6 +7745,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->features = features | NETIF_F_RXCSUM; dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; dev->vlan_features |= features; + dev->gso_max_segs = MVPP2_MAX_TSO_SEGS; /* MTU range: 68 - 9676 */ dev->min_mtu = ETH_MIN_MTU; From 02856a3ba6333c536f13d27cc847fcb442a23d9b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:32 +0100 Subject: [PATCH 1657/2177] net: mvpp2: use the aggr txq size define everywhere Cosmetic patch using the MVPP2_AGGR_TXQ_SIZE everywhere instead of the size field of aggr_txq, as the size never change and is always equal to the MVPP2_AGGR_TXQ_SIZE define. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 340b4d682951..981fedeef67b 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5055,7 +5055,7 @@ static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending) static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv, struct mvpp2_tx_queue *aggr_txq, int num) { - if ((aggr_txq->count + num) > aggr_txq->size) { + if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) { /* Update number of occupied aggregated Tx descriptors */ int cpu = smp_processor_id(); u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu)); @@ -5063,7 +5063,7 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv, aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK; } - if ((aggr_txq->count + num) > aggr_txq->size) + if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) return -ENOMEM; return 0; @@ -5447,7 +5447,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, if (!aggr_txq->descs) return -ENOMEM; - aggr_txq->last_desc = aggr_txq->size - 1; + aggr_txq->last_desc = MVPP2_AGGR_TXQ_SIZE - 1; /* Aggr TXQ no reset WA */ aggr_txq->next_desc_to_proc = mvpp2_read(priv, From 6eb5d375cefcbd60ebb4251b150ea95d47140fe0 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Oct 2017 11:23:33 +0100 Subject: [PATCH 1658/2177] net: mvpp2: simplify the Tx desc set DMA logic Two functions were always used to set the DMA addresses in Tx descriptors, because this address is split into a base+offset in the descriptors. A mask was used to come up with the base and offset addresses and two functions were called, mvpp2_txdesc_dma_addr_set() and mvpp2_txdesc_offset_set(). This patch moves the base+offset calculation logic to mvpp2_txdesc_dma_addr_set(), and removes mvpp2_txdesc_offset_set() to simplify things. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 39 ++++++++++------------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 981fedeef67b..965b6a829a5d 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1290,13 +1290,20 @@ static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port, struct mvpp2_tx_desc *tx_desc, dma_addr_t dma_addr) { + dma_addr_t addr, offset; + + addr = dma_addr & ~MVPP2_TX_DESC_ALIGN; + offset = dma_addr & MVPP2_TX_DESC_ALIGN; + if (port->priv->hw_version == MVPP21) { - tx_desc->pp21.buf_dma_addr = dma_addr; + tx_desc->pp21.buf_dma_addr = addr; + tx_desc->pp21.packet_offset = offset; } else { - u64 val = (u64)dma_addr; + u64 val = (u64)addr; tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0); tx_desc->pp22.buf_dma_addr_ptp |= val; + tx_desc->pp22.packet_offset = offset; } } @@ -1339,16 +1346,6 @@ static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port, tx_desc->pp22.command = command; } -static void mvpp2_txdesc_offset_set(struct mvpp2_port *port, - struct mvpp2_tx_desc *tx_desc, - unsigned int offset) -{ - if (port->priv->hw_version == MVPP21) - tx_desc->pp21.packet_offset = offset; - else - tx_desc->pp22.packet_offset = offset; -} - static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port, struct mvpp2_tx_desc *tx_desc) { @@ -6292,10 +6289,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb, goto cleanup; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); if (i == (skb_shinfo(skb)->nr_frags - 1)) { /* Last descriptor */ @@ -6338,8 +6332,7 @@ static inline void mvpp2_tso_put_hdr(struct sk_buff *skb, addr = txq_pcpu->tso_headers_dma + txq_pcpu->txq_put_index * TSO_HEADER_SIZE; - mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, addr); mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) | MVPP2_TXD_F_DESC | @@ -6368,10 +6361,7 @@ static inline int mvpp2_tso_put_data(struct sk_buff *skb, return -ENOMEM; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); if (!left) { mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC); @@ -6483,10 +6473,7 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) goto out; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); tx_cmd = mvpp2_skb_tx_csum(port, skb); From 1a3fbd3fdec55007f1b18cf96c4a9e486c6e875b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 30 Oct 2017 12:56:33 +0200 Subject: [PATCH 1659/2177] net: bridge: add neigh_suppress to bridge port policies Add an entry for IFLA_BRPORT_NEIGH_SUPPRESS to bridge port policies. Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e732403669c6..26aeb0b5cf30 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -651,6 +651,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, + [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ From 8caae31a8c2759fcdca7edc2bd41bba8d230a5cc Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 30 Oct 2017 21:22:03 +0530 Subject: [PATCH 1660/2177] atm: iphase: Fix space before '[' error. Fix checkpatch.pl error: ERROR: space prohibited before open square bracket '['. Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index fc72b763fdd7..a785c6e69757 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -880,7 +880,7 @@ static void ia_phy_write(struct iadev_priv *iadev, static void ia_suni_pm7345_init_ds3(struct iadev_priv *iadev) { - static const struct ia_reg suni_ds3_init [] = { + static const struct ia_reg suni_ds3_init[] = { { SUNI_DS3_FRM_INTR_ENBL, 0x17 }, { SUNI_DS3_FRM_CFG, 0x01 }, { SUNI_DS3_TRAN_CFG, 0x01 }, @@ -898,7 +898,7 @@ static void ia_suni_pm7345_init_ds3(struct iadev_priv *iadev) static void ia_suni_pm7345_init_e3(struct iadev_priv *iadev) { - static const struct ia_reg suni_e3_init [] = { + static const struct ia_reg suni_e3_init[] = { { SUNI_E3_FRM_FRAM_OPTIONS, 0x04 }, { SUNI_E3_FRM_MAINT_OPTIONS, 0x20 }, { SUNI_E3_FRM_FRAM_INTR_ENBL, 0x1d }, @@ -918,7 +918,7 @@ static void ia_suni_pm7345_init_e3(struct iadev_priv *iadev) static void ia_suni_pm7345_init(struct iadev_priv *iadev) { - static const struct ia_reg suni_init [] = { + static const struct ia_reg suni_init[] = { /* Enable RSOP loss of signal interrupt. */ { SUNI_INTR_ENBL, 0x28 }, /* Clear error counters. */ From 3051fbec206eb6967b7fdecedb63ebb1ed67a1a7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 30 Oct 2017 10:07:17 -0700 Subject: [PATCH 1661/2177] net: sit: Update lookup to handle links set to L3 slave Using SIT tunnels with VRFs works fine if the underlay device is in a VRF and the link parameter is set to the VRF device. e.g., ip tunnel add jtun mode sit remote local dev myvrf Update the device check to allow the link to be the enslaved device as well. e.g., ip tunnel add jtun mode sit remote local dev eth4 where eth4 is enslaved to myvrf. Reported-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/sit.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a799f5258614..d60ddcb0bfe2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -91,29 +91,35 @@ struct sit_net { * Must be invoked with rcu_read_lock */ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, - struct net_device *dev, __be32 remote, __be32 local) + struct net_device *dev, + __be32 remote, __be32 local, + int sifindex) { unsigned int h0 = HASH(remote); unsigned int h1 = HASH(local); struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); + int ifindex = dev ? dev->ifindex : 0; for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } @@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; unsigned int data_len = 0; struct ip_tunnel *t; + int sifindex; int err; switch (type) { @@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - t = ipip6_tunnel_lookup(dev_net(skb->dev), - skb->dev, - iph->daddr, - iph->saddr); + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; + t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->daddr, iph->saddr, sifindex); if (!t) goto out; @@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; + int sifindex; int err; + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, - iph->saddr, iph->daddr); + iph->saddr, iph->daddr, sifindex); if (tunnel) { struct pcpu_sw_netstats *tstats; @@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { const struct iphdr *iph; struct ip_tunnel *tunnel; + int sifindex; + + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, - iph->saddr, iph->daddr); + iph->saddr, iph->daddr, sifindex); if (tunnel) { const struct tnl_ptk_info *tpi; From 07c41a295c5f25928a7cb689fdec816bd0089fe8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 30 Oct 2017 13:50:22 -0700 Subject: [PATCH 1662/2177] bpf: avoid rcu_dereference inside bpf_event_mutex lock region During perf event attaching/detaching bpf programs, the tp_event->prog_array change is protected by the bpf_event_mutex lock in both attaching and deteching functions. Although tp_event->prog_array is a rcu pointer, rcu_derefrence is not needed to access it since mutex lock will guarantee ordering. Verified through "make C=2" that sparse locking check still happy with the new change. Also change the label name in perf_event_{attach,detach}_bpf_prog from "out" to "unlock" to reflect the code action after the label. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 136aa6bb0422..506efe6e8ed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -769,20 +769,19 @@ int perf_event_attach_bpf_prog(struct perf_event *event, mutex_lock(&bpf_event_mutex); if (event->prog) - goto out; + goto unlock; - old_array = rcu_dereference_protected(event->tp_event->prog_array, - lockdep_is_held(&bpf_event_mutex)); + old_array = event->tp_event->prog_array; ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); if (ret < 0) - goto out; + goto unlock; /* set the new array to event->tp_event and set event->prog */ event->prog = prog; rcu_assign_pointer(event->tp_event->prog_array, new_array); bpf_prog_array_free(old_array); -out: +unlock: mutex_unlock(&bpf_event_mutex); return ret; } @@ -796,11 +795,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event) mutex_lock(&bpf_event_mutex); if (!event->prog) - goto out; - - old_array = rcu_dereference_protected(event->tp_event->prog_array, - lockdep_is_held(&bpf_event_mutex)); + goto unlock; + old_array = event->tp_event->prog_array; ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); @@ -812,6 +809,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_put(event->prog); event->prog = NULL; -out: +unlock: mutex_unlock(&bpf_event_mutex); } From 2fd2f61e6d409f66d9f178ad708b9ced8defbf59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Oct 2017 14:05:12 -0700 Subject: [PATCH 1663/2177] drivers/net: ntb_netdev: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jon Mason Cc: Dave Jiang Cc: Allen Hubbe Cc: linux-ntb@googlegroups.com Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ntb_netdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 0250aa9ae2cb..9f6f7ccd44f7 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -230,10 +230,10 @@ err: return NETDEV_TX_BUSY; } -static void ntb_netdev_tx_timer(unsigned long data) +static void ntb_netdev_tx_timer(struct timer_list *t) { - struct net_device *ndev = (struct net_device *)data; - struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev *dev = from_timer(dev, t, tx_timer); + struct net_device *ndev = dev->ndev; if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time)); @@ -269,7 +269,7 @@ static int ntb_netdev_open(struct net_device *ndev) } } - setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev); + timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0); netif_carrier_off(ndev); ntb_transport_link_up(dev->qp); From 1556770a1a071435ba7e67c1bc809099dc1de849 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Oct 2017 14:05:41 -0700 Subject: [PATCH 1664/2177] drivers/net: tundra: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: Philippe Reynes Cc: "yuval.shaia@oracle.com" Cc: Eric Dumazet Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/tundra/tsi108_eth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c2d15d9c0c33..0624b71ab5d4 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -164,7 +164,7 @@ static struct platform_driver tsi_eth_driver = { }, }; -static void tsi108_timed_checker(unsigned long dev_ptr); +static void tsi108_timed_checker(struct timer_list *t); #ifdef DEBUG static void dump_eth_one(struct net_device *dev) @@ -1370,7 +1370,7 @@ static int tsi108_open(struct net_device *dev) napi_enable(&data->napi); - setup_timer(&data->timer, tsi108_timed_checker, (unsigned long)dev); + timer_setup(&data->timer, tsi108_timed_checker, 0); mod_timer(&data->timer, jiffies + 1); tsi108_restart_rx(data, dev); @@ -1666,10 +1666,10 @@ regs_fail: * Thus, we have to do it using a timer. */ -static void tsi108_timed_checker(unsigned long dev_ptr) +static void tsi108_timed_checker(struct timer_list *t) { - struct net_device *dev = (struct net_device *)dev_ptr; - struct tsi108_prv_data *data = netdev_priv(dev); + struct tsi108_prv_data *data = from_timer(data, t, timer); + struct net_device *dev = data->dev; tsi108_check_phy(dev); tsi108_check_rxring(dev); From 31b102bb501bea50ebc10f4aecf9d788305b8b87 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Oct 2017 14:06:45 -0700 Subject: [PATCH 1665/2177] net: tipc: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jon Maloy Cc: Ying Xue Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: tipc-discussion@lists.sourceforge.net Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/tipc/discover.c | 6 +++--- net/tipc/monitor.c | 6 +++--- net/tipc/node.c | 8 ++++---- net/tipc/socket.c | 10 +++++----- net/tipc/subscr.c | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 02462d67d191..92e4828c6b09 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(unsigned long data) +static void disc_timeout(struct timer_list *t) { - struct tipc_link_req *req = (struct tipc_link_req *)data; + struct tipc_link_req *req = from_timer(req, t, timer); struct sk_buff *skb; int max_delay; @@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - setup_timer(&req->timer, disc_timeout, (unsigned long)req); + timer_setup(&req->timer, disc_timeout, 0); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; *skb = skb_clone(req->buf, GFP_ATOMIC); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 9e109bb1a207..b9c32557d73c 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -578,9 +578,9 @@ void tipc_mon_get_state(struct net *net, u32 addr, read_unlock_bh(&mon->lock); } -static void mon_timeout(unsigned long m) +static void mon_timeout(struct timer_list *t) { - struct tipc_monitor *mon = (void *)m; + struct tipc_monitor *mon = from_timer(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; @@ -623,7 +623,7 @@ int tipc_mon_create(struct net *net, int bearer_id) self->is_up = true; self->is_head = true; INIT_LIST_HEAD(&self->list); - setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); + timer_setup(&mon->timer, mon_timeout, 0); mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); mod_timer(&mon->timer, jiffies + mon->timer_intv); return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index 89f8ac73bf65..009a81631280 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -153,7 +153,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); -static void tipc_node_timeout(unsigned long data); +static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static void tipc_node_put(struct tipc_node *node); @@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) goto exit; } tipc_node_get(n); - setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + timer_setup(&n->timer, tipc_node_timeout, 0); n->keepalive_intv = U32_MAX; hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) /* tipc_node_timeout - handle expiration of node timer */ -static void tipc_node_timeout(unsigned long data) +static void tipc_node_timeout(struct timer_list *t) { - struct tipc_node *n = (struct tipc_node *)data; + struct tipc_node *n = from_timer(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; int bearer_id; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ea61c32f6b80..5d18c0caa92b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -125,7 +125,7 @@ static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); -static void tipc_sk_timeout(unsigned long data); +static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, @@ -464,7 +464,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, NAMED_H_SIZE, 0); msg_set_origport(msg, tsk->portid); - setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; @@ -2530,14 +2530,14 @@ static int tipc_shutdown(struct socket *sock, int how) return res; } -static void tipc_sk_timeout(unsigned long data) +static void tipc_sk_timeout(struct timer_list *t) { - struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); + struct tipc_sock *tsk = tipc_sk(sk); u32 peer_port = tsk_peer_port(tsk); u32 peer_node = tsk_peer_node(tsk); u32 own_node = tsk_own_node(tsk); u32 own_port = tsk->portid; - struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct sk_buff *skb = NULL; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index be3d9e3183dc..251065dfd8df 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -133,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, node); } -static void tipc_subscrp_timeout(unsigned long data) +static void tipc_subscrp_timeout(struct timer_list *t) { - struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscription *sub = from_timer(sub, t, timer); struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); @@ -303,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); - setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); + timer_setup(&sub->timer, tipc_subscrp_timeout, 0); timeout = htohl(sub->evt.s.timeout, swap); if (timeout != TIPC_WAIT_FOREVER) From 4c31606920bab227de10d2c9bf9b4dd8d4327638 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 27 Oct 2017 00:51:04 -0500 Subject: [PATCH 1666/2177] net: netrom: nr_route: refactor code in nr_add_node Code refactoring in order to make the code easier to read and maintain. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 59 ++++++++++++------------------------------- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0c59354e280e..fba4b4c8efaf 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, static void nr_remove_neigh(struct nr_neigh *); +/* re-sort the routes in quality order. */ +static void re_sort_routes(struct nr_node *nr_node, int x, int y) +{ + if (nr_node->routes[y].quality > nr_node->routes[x].quality) { + if (nr_node->which == x) + nr_node->which = y; + else if (nr_node->which == y) + nr_node->which = x; + + swap(nr_node->routes[x], nr_node->routes[y]); + } +} + /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. @@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, { struct nr_node *nr_node; struct nr_neigh *nr_neigh; - struct nr_route nr_route; int i, found; struct net_device *odev; @@ -251,49 +263,10 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* Now re-sort the routes in quality order */ switch (nr_node->count) { case 3: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: - nr_node->which = 1; - break; - case 1: - nr_node->which = 0; - break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } - if (nr_node->routes[2].quality > nr_node->routes[1].quality) { - switch (nr_node->which) { - case 1: nr_node->which = 2; - break; - - case 2: nr_node->which = 1; - break; - - default: - break; - } - nr_route = nr_node->routes[1]; - nr_node->routes[1] = nr_node->routes[2]; - nr_node->routes[2] = nr_route; - } + re_sort_routes(nr_node, 0, 1); + re_sort_routes(nr_node, 1, 2); case 2: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: nr_node->which = 1; - break; - - case 1: nr_node->which = 0; - break; - - default: break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } + re_sort_routes(nr_node, 0, 1); case 1: break; } From 31f74f0f4e8dfe5b30ff59d9a38a370fbb725f38 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 27 Oct 2017 00:51:08 -0500 Subject: [PATCH 1667/2177] net: netrom: nr_route: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index fba4b4c8efaf..75e6ba970fde 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -265,6 +265,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, case 3: re_sort_routes(nr_node, 0, 1); re_sort_routes(nr_node, 1, 2); + /* fall through */ case 2: re_sort_routes(nr_node, 0, 1); case 1: @@ -357,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n switch (i) { case 0: nr_node->routes[0] = nr_node->routes[1]; + /* fall through */ case 1: nr_node->routes[1] = nr_node->routes[2]; case 2: @@ -526,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev) switch (i) { case 0: t->routes[0] = t->routes[1]; + /* fall through */ case 1: t->routes[1] = t->routes[2]; case 2: From cdb583cfe7d79b784a74b97deb2997fccb5dcf9b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Oct 2017 15:17:20 -0700 Subject: [PATCH 1668/2177] net: dsa: b53: Have b53_hdr_setup() enable/disable tagging Have b53_hdr_setup() check what kind of tagging protocol is configured (Broadcom or none) and apply the correct settings in both cases. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index fa37f501f10b..a7ca62ba27b7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -545,6 +545,7 @@ EXPORT_SYMBOL(b53_disable_port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) { + bool tag_en = !!(ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_BRCM); struct b53_device *dev = ds->priv; u8 hdr_ctl, val; u16 reg; @@ -567,7 +568,10 @@ void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) /* Enable Broadcom tags for IMP port */ b53_read8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, &hdr_ctl); - hdr_ctl |= val; + if (tag_en) + hdr_ctl |= val; + else + hdr_ctl &= ~val; b53_write8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, hdr_ctl); /* Registers below are only accessible on newer devices */ @@ -578,14 +582,20 @@ void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) * allow us to tag outgoing frames */ b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, ®); - reg &= ~BIT(port); + if (tag_en) + reg &= ~BIT(port); + else + reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, reg); /* Enable transmission of Broadcom tags from the switch (CPU RX) to * allow delivering frames to the per-port net_devices */ b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, ®); - reg &= ~BIT(port); + if (tag_en) + reg &= ~BIT(port); + else + reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, reg); } EXPORT_SYMBOL(b53_brcm_hdr_setup); From da13c59b9936dfedcf9f2203bd29fbf83ad672bf Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Mon, 30 Oct 2017 19:38:52 -0400 Subject: [PATCH 1669/2177] net: display hw address of source machine during ipv6 DAD failure This patch updates the error messages displayed in kernel log to include hwaddress of the source machine that caused ipv6 duplicate address detection failures. Examples: a) When we receive a NA packet from another machine advertising our address: ICMPv6: NA: 34:ab:cd:56:11:e8 advertised our address 2001:db8:: on eth0! b) When we detect DAD failure during address assignment to an interface: IPv6: eth0: IPv6 duplicate address 2001:db8:: used by 34:ab:cd:56:11:e8 detected! v2: Changed %pI6 to %pI6c in ndisc_recv_na() Chaged the v6 address in the commit message to 2001:db8:: Suggested-by: Igor Lubashev Signed-off-by: Vishwanath Pai Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 6 +++--- net/ipv6/ndisc.c | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 15b5ffd7253d..2a616ea53956 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -208,7 +208,7 @@ void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); -void addrconf_dad_failure(struct inet6_ifaddr *ifp); +void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5a8a10229a07..cfa374c8b54c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1987,7 +1987,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) return err; } -void addrconf_dad_failure(struct inet6_ifaddr *ifp) +void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(ifp->idev->dev); @@ -1997,8 +1997,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) return; } - net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", - ifp->idev->dev->name, &ifp->addr); + net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n", + ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source); spin_lock_bh(&ifp->lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 266a530414d7..f9c3ffe04382 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -46,6 +46,7 @@ #endif #include +#include #include #include #include @@ -822,7 +823,7 @@ have_ifp: * who is doing DAD * so fail our DAD process */ - addrconf_dad_failure(ifp); + addrconf_dad_failure(skb, ifp); return; } else { /* @@ -975,7 +976,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (ifp) { if (skb->pkt_type != PACKET_LOOPBACK && (ifp->flags & IFA_F_TENTATIVE)) { - addrconf_dad_failure(ifp); + addrconf_dad_failure(skb, ifp); return; } /* What should we make now? The advertisement @@ -989,8 +990,8 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (skb->pkt_type != PACKET_LOOPBACK) ND_PRINTK(1, warn, - "NA: someone advertises our address %pI6 on %s!\n", - &ifp->addr, ifp->idev->dev->name); + "NA: %pM advertised our address %pI6c on %s!\n", + eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return; } From 2e39748a4231a893f057567e9b880ab34ea47aef Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 30 Oct 2017 19:39:56 -0700 Subject: [PATCH 1670/2177] bpf: document answers to common questions about BPF to address common misconceptions about what BPF is and what it's not add short BPF Q&A that clarifies core BPF design principles and answers some common questions. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/bpf/bpf_design_QA.txt | 156 ++++++++++++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 157 insertions(+) create mode 100644 Documentation/bpf/bpf_design_QA.txt diff --git a/Documentation/bpf/bpf_design_QA.txt b/Documentation/bpf/bpf_design_QA.txt new file mode 100644 index 000000000000..f3e458a0bb2f --- /dev/null +++ b/Documentation/bpf/bpf_design_QA.txt @@ -0,0 +1,156 @@ +BPF extensibility and applicability to networking, tracing, security +in the linux kernel and several user space implementations of BPF +virtual machine led to a number of misunderstanding on what BPF actually is. +This short QA is an attempt to address that and outline a direction +of where BPF is heading long term. + +Q: Is BPF a generic instruction set similar to x64 and arm64? +A: NO. + +Q: Is BPF a generic virtual machine ? +A: NO. + +BPF is generic instruction set _with_ C calling convention. + +Q: Why C calling convention was chosen? +A: Because BPF programs are designed to run in the linux kernel + which is written in C, hence BPF defines instruction set compatible + with two most used architectures x64 and arm64 (and takes into + consideration important quirks of other architectures) and + defines calling convention that is compatible with C calling + convention of the linux kernel on those architectures. + +Q: can multiple return values be supported in the future? +A: NO. BPF allows only register R0 to be used as return value. + +Q: can more than 5 function arguments be supported in the future? +A: NO. BPF calling convention only allows registers R1-R5 to be used + as arguments. BPF is not a standalone instruction set. + (unlike x64 ISA that allows msft, cdecl and other conventions) + +Q: can BPF programs access instruction pointer or return address? +A: NO. + +Q: can BPF programs access stack pointer ? +A: NO. Only frame pointer (register R10) is accessible. + From compiler point of view it's necessary to have stack pointer. + For example LLVM defines register R11 as stack pointer in its + BPF backend, but it makes sure that generated code never uses it. + +Q: Does C-calling convention diminishes possible use cases? +A: YES. BPF design forces addition of major functionality in the form + of kernel helper functions and kernel objects like BPF maps with + seamless interoperability between them. It lets kernel call into + BPF programs and programs call kernel helpers with zero overhead. + As all of them were native C code. That is particularly the case + for JITed BPF programs that are indistinguishable from + native kernel C code. + +Q: Does it mean that 'innovative' extensions to BPF code are disallowed? +A: Soft yes. At least for now until BPF core has support for + bpf-to-bpf calls, indirect calls, loops, global variables, + jump tables, read only sections and all other normal constructs + that C code can produce. + +Q: Can loops be supported in a safe way? +A: It's not clear yet. BPF developers are trying to find a way to + support bounded loops where the verifier can guarantee that + the program terminates in less than 4096 instructions. + +Q: How come LD_ABS and LD_IND instruction are present in BPF whereas + C code cannot express them and has to use builtin intrinsics? +A: This is artifact of compatibility with classic BPF. Modern + networking code in BPF performs better without them. + See 'direct packet access'. + +Q: It seems not all BPF instructions are one-to-one to native CPU. + For example why BPF_JNE and other compare and jumps are not cpu-like? +A: This was necessary to avoid introducing flags into ISA which are + impossible to make generic and efficient across CPU architectures. + +Q: why BPF_DIV instruction doesn't map to x64 div? +A: Because if we picked one-to-one relationship to x64 it would have made + it more complicated to support on arm64 and other archs. Also it + needs div-by-zero runtime check. + +Q: why there is no BPF_SDIV for signed divide operation? +A: Because it would be rarely used. llvm errors in such case and + prints a suggestion to use unsigned divide instead + +Q: Why BPF has implicit prologue and epilogue? +A: Because architectures like sparc have register windows and in general + there are enough subtle differences between architectures, so naive + store return address into stack won't work. Another reason is BPF has + to be safe from division by zero (and legacy exception path + of LD_ABS insn). Those instructions need to invoke epilogue and + return implicitly. + +Q: Why BPF_JLT and BPF_JLE instructions were not introduced in the beginning? +A: Because classic BPF didn't have them and BPF authors felt that compiler + workaround would be acceptable. Turned out that programs lose performance + due to lack of these compare instructions and they were added. + These two instructions is a perfect example what kind of new BPF + instructions are acceptable and can be added in the future. + These two already had equivalent instructions in native CPUs. + New instructions that don't have one-to-one mapping to HW instructions + will not be accepted. + +Q: BPF 32-bit subregisters have a requirement to zero upper 32-bits of BPF + registers which makes BPF inefficient virtual machine for 32-bit + CPU architectures and 32-bit HW accelerators. Can true 32-bit registers + be added to BPF in the future? +A: NO. The first thing to improve performance on 32-bit archs is to teach + LLVM to generate code that uses 32-bit subregisters. Then second step + is to teach verifier to mark operations where zero-ing upper bits + is unnecessary. Then JITs can take advantage of those markings and + drastically reduce size of generated code and improve performance. + +Q: Does BPF have a stable ABI? +A: YES. BPF instructions, arguments to BPF programs, set of helper + functions and their arguments, recognized return codes are all part + of ABI. However when tracing programs are using bpf_probe_read() helper + to walk kernel internal datastructures and compile with kernel + internal headers these accesses can and will break with newer + kernels. The union bpf_attr -> kern_version is checked at load time + to prevent accidentally loading kprobe-based bpf programs written + for a different kernel. Networking programs don't do kern_version check. + +Q: How much stack space a BPF program uses? +A: Currently all program types are limited to 512 bytes of stack + space, but the verifier computes the actual amount of stack used + and both interpreter and most JITed code consume necessary amount. + +Q: Can BPF be offloaded to HW? +A: YES. BPF HW offload is supported by NFP driver. + +Q: Does classic BPF interpreter still exist? +A: NO. Classic BPF programs are converted into extend BPF instructions. + +Q: Can BPF call arbitrary kernel functions? +A: NO. BPF programs can only call a set of helper functions which + is defined for every program type. + +Q: Can BPF overwrite arbitrary kernel memory? +A: NO. Tracing bpf programs can _read_ arbitrary memory with bpf_probe_read() + and bpf_probe_read_str() helpers. Networking programs cannot read + arbitrary memory, since they don't have access to these helpers. + Programs can never read or write arbitrary memory directly. + +Q: Can BPF overwrite arbitrary user memory? +A: Sort-of. Tracing BPF programs can overwrite the user memory + of the current task with bpf_probe_write_user(). Every time such + program is loaded the kernel will print warning message, so + this helper is only useful for experiments and prototypes. + Tracing BPF programs are root only. + +Q: When bpf_trace_printk() helper is used the kernel prints nasty + warning message. Why is that? +A: This is done to nudge program authors into better interfaces when + programs need to pass data to user space. Like bpf_perf_event_output() + can be used to efficiently stream data via perf ring buffer. + BPF maps can be used for asynchronous data sharing between kernel + and user space. bpf_trace_printk() should only be used for debugging. + +Q: Can BPF functionality such as new program or map types, new + helpers, etc be added out of kernel module code? +A: NO. diff --git a/MAINTAINERS b/MAINTAINERS index c4d21b302409..66471f7d77d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2713,6 +2713,7 @@ L: linux-kernel@vger.kernel.org S: Supported F: arch/x86/net/bpf_jit* F: Documentation/networking/filter.txt +F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h F: include/uapi/linux/bpf* From 516b29edc3b3324016ffe90d661463b9ed5ec473 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Oct 2017 21:42:57 -0700 Subject: [PATCH 1671/2177] net: phy: Cosmetic fixes to phylink/sfp/sfp-bus.c Perform a number of stylistic changes to phylink.c, sfp.c and sfp-bus.c: - align with netdev-style comments - align function arguments to the opening parenthesis - remove blank lines - fixup a few lines over 80 columns Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 13 ++++++------- drivers/net/phy/sfp-bus.c | 11 +++-------- drivers/net/phy/sfp.c | 27 +++++++++++++-------------- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index bcb4755bcd95..05c8f1c10e36 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -357,7 +357,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat * 1 1 0 1 TX */ static void phylink_resolve_flow(struct phylink *pl, - struct phylink_link_state *state) + struct phylink_link_state *state) { int new_pause = 0; @@ -506,7 +506,8 @@ static int phylink_register_sfp(struct phylink *pl, struct device_node *np) } struct phylink *phylink_create(struct net_device *ndev, struct device_node *np, - phy_interface_t iface, const struct phylink_mac_ops *ops) + phy_interface_t iface, + const struct phylink_mac_ops *ops) { struct phylink *pl; int ret; @@ -585,7 +586,7 @@ void phylink_phy_change(struct phy_device *phydev, bool up, bool do_carrier) phylink_run_resolve(pl); netdev_dbg(pl->netdev, "phy link %s %s/%s/%s\n", up ? "up" : "down", - phy_modes(phydev->interface), + phy_modes(phydev->interface), phy_speed_to_str(phydev->speed), phy_duplex_to_str(phydev->duplex)); } @@ -823,7 +824,7 @@ static void phylink_get_ksettings(const struct phylink_link_state *state, } int phylink_ethtool_ksettings_get(struct phylink *pl, - struct ethtool_link_ksettings *kset) + struct ethtool_link_ksettings *kset) { struct phylink_link_state link_state; @@ -870,7 +871,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get); int phylink_ethtool_ksettings_set(struct phylink *pl, - const struct ethtool_link_ksettings *kset) + const struct ethtool_link_ksettings *kset) { struct ethtool_link_ksettings our_kset; struct phylink_link_state config; @@ -1337,8 +1338,6 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) } EXPORT_SYMBOL_GPL(phylink_mii_ioctl); - - static int phylink_sfp_module_insert(void *upstream, const struct sfp_eeprom_id *id) { diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 5cb5384697ea..8a1b1f4c1b7c 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -26,7 +26,6 @@ struct sfp_bus { bool started; }; - int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { @@ -208,7 +207,6 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, } EXPORT_SYMBOL_GPL(sfp_parse_support); - static LIST_HEAD(sfp_buses); static DEFINE_MUTEX(sfp_mutex); @@ -295,7 +293,6 @@ static void sfp_unregister_bus(struct sfp_bus *bus) bus->registered = false; } - int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo) { if (!bus->registered) @@ -305,7 +302,7 @@ int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo) EXPORT_SYMBOL_GPL(sfp_get_module_info); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, - u8 *data) + u8 *data) { if (!bus->registered) return -ENOIOCTLCMD; @@ -330,8 +327,8 @@ void sfp_upstream_stop(struct sfp_bus *bus) EXPORT_SYMBOL_GPL(sfp_upstream_stop); struct sfp_bus *sfp_register_upstream(struct device_node *np, - struct net_device *ndev, void *upstream, - const struct sfp_upstream_ops *ops) + struct net_device *ndev, void *upstream, + const struct sfp_upstream_ops *ops) { struct sfp_bus *bus = sfp_bus_get(np); int ret = 0; @@ -368,7 +365,6 @@ void sfp_unregister_upstream(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_unregister_upstream); - /* Socket driver entry points */ int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev) { @@ -395,7 +391,6 @@ void sfp_remove_phy(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_remove_phy); - void sfp_link_up(struct sfp_bus *bus) { const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index baee371bf767..448465da0422 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -88,15 +88,12 @@ static const enum gpiod_flags gpio_flags[] = { #define T_PROBE_INIT msecs_to_jiffies(300) #define T_PROBE_RETRY msecs_to_jiffies(100) -/* - * SFP modules appear to always have their PHY configured for bus address +/* SFP modules appear to always have their PHY configured for bus address * 0x56 (which with mdio-i2c, translates to a PHY address of 22). */ #define SFP_PHY_ADDR 22 -/* - * Give this long for the PHY to reset. - */ +/* Give this long for the PHY to reset. */ #define T_PHY_RESET_MS 50 static DEFINE_MUTEX(sfp_mutex); @@ -150,10 +147,10 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state) /* If the module is present, drive the signals */ if (sfp->gpio[GPIO_TX_DISABLE]) gpiod_direction_output(sfp->gpio[GPIO_TX_DISABLE], - state & SFP_F_TX_DISABLE); + state & SFP_F_TX_DISABLE); if (state & SFP_F_RATE_SELECT) gpiod_direction_output(sfp->gpio[GPIO_RATE_SELECT], - state & SFP_F_RATE_SELECT); + state & SFP_F_RATE_SELECT); } else { /* Otherwise, let them float to the pull-ups */ if (sfp->gpio[GPIO_TX_DISABLE]) @@ -164,7 +161,7 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state) } static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, - void *buf, size_t len) + void *buf, size_t len) { struct i2c_msg msgs[2]; int ret; @@ -186,7 +183,7 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, } static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf, - size_t len) + size_t len) { return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len); } @@ -220,7 +217,6 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) return 0; } - /* Interface */ static unsigned int sfp_get_state(struct sfp *sfp) { @@ -295,7 +291,8 @@ static void sfp_sm_next(struct sfp *sfp, unsigned int state, sfp_sm_set_timer(sfp, timeout); } -static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state, unsigned int timeout) +static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state, + unsigned int timeout) { sfp->sm_mod_state = state; sfp_sm_set_timer(sfp, timeout); @@ -370,7 +367,8 @@ static void sfp_sm_link_check_los(struct sfp *sfp) static void sfp_sm_fault(struct sfp *sfp, bool warn) { if (sfp->sm_retries && !--sfp->sm_retries) { - dev_err(sfp->dev, "module persistently indicates fault, disabling\n"); + dev_err(sfp->dev, + "module persistently indicates fault, disabling\n"); sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0); } else { if (warn) @@ -461,7 +459,8 @@ static int sfp_sm_mod_probe(struct sfp *sfp) memcpy(date, sfp->id.ext.datecode, 8); date[8] = '\0'; - dev_info(sfp->dev, "module %s %s rev %s sn %s dc %s\n", vendor, part, rev, sn, date); + dev_info(sfp->dev, "module %s %s rev %s sn %s dc %s\n", + vendor, part, rev, sn, date); /* We only support SFP modules, not the legacy GBIC modules. */ if (sfp->id.base.phys_id != SFP_PHYS_ID_SFP || @@ -651,7 +650,7 @@ static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo) } static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, - u8 *data) + u8 *data) { unsigned int first, last, len; int ret; From 54a2fc628a4aa172c62c34ef466b31c2c2e8ce9c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Oct 2017 21:42:58 -0700 Subject: [PATCH 1672/2177] net: phy: Fix sfp.c build against GPIO definitions include/gpio.h does not contain the references we want, we should be including linux/gpio/consumer.h instead. Fixes: 73970055450e ("sfp: add SFP module support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 448465da0422..e381811e5f11 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include From 687d4f2bea322077cc70be0555ad2a1cd48b812a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2017 10:08:23 +0000 Subject: [PATCH 1673/2177] net: ethernet: slicoss: remove redundant initialization of idx Variable idx is being initialized and later on over-written by a new value in a do-loop without the initial value ever being read. Hence the initializion is redundant and can be removed. Cleans up clang warning: drivers/net/ethernet/alacritech/slicoss.c:358:15: warning: Value stored to 'idx' during its initialization is never read Signed-off-by: Colin Ian King Acked-by: Lino Sanfilippo Signed-off-by: David S. Miller --- drivers/net/ethernet/alacritech/slicoss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c index 15a8096c60df..0b60921c392f 100644 --- a/drivers/net/ethernet/alacritech/slicoss.c +++ b/drivers/net/ethernet/alacritech/slicoss.c @@ -355,10 +355,10 @@ static void slic_xmit_complete(struct slic_device *sdev) { struct slic_tx_queue *txq = &sdev->txq; struct net_device *dev = sdev->netdev; - unsigned int idx = txq->done_idx; struct slic_tx_buffer *buff; unsigned int frames = 0; unsigned int bytes = 0; + unsigned int idx; /* Limit processing to SLIC_MAX_TX_COMPLETIONS frames to avoid that new * completions during processing keeps the loop running endlessly. From f02b2320b27c16b644691267ee3b5c110846f49e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 31 Oct 2017 18:25:37 +0800 Subject: [PATCH 1674/2177] ppp: Destroy the mutex when cleanup The mutex_destroy only makes sense when enable DEBUG_MUTEX. For the good readbility, it's better to invoke it in exit func when the init func invokes mutex_init. Signed-off-by: Gao Feng Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index af7f93ed1487..44891335f9af 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -960,6 +960,7 @@ static __net_exit void ppp_exit_net(struct net *net) unregister_netdevice_many(&list); rtnl_unlock(); + mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); } From 85cf7a62924927119bce5e65b008fcb8ed455914 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2017 12:01:47 +0000 Subject: [PATCH 1675/2177] net: hso: remove redundant unused variable dev The pointer dev is being assigned but is never used, hence it is redundant and can be removed. Cleans up clang warning: drivers/net/usb/hso.c:2280:2: warning: Value stored to 'dev' is never read Signed-off-by: Colin Ian King Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index d7a3379ea668..42d7edcc3106 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2263,7 +2263,6 @@ static void hso_serial_common_free(struct hso_serial *serial) static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, int rx_size, int tx_size) { - struct device *dev; int minor; int i; @@ -2277,7 +2276,6 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, serial->parent->dev = tty_port_register_device_attr(&serial->port, tty_drv, minor, &serial->parent->interface->dev, serial->parent, hso_serial_dev_groups); - dev = serial->parent->dev; /* fill in specific data for later use */ serial->minor = minor; From dc82673f0cb5175dcec87041441cdb107932cd07 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 31 Oct 2017 13:28:16 +0000 Subject: [PATCH 1676/2177] sctp: fix error return code in sctp_send_add_streams() Fix to returnerror code -ENOMEM from the sctp_make_strreset_addstrm() error handling case instead of 0. 'retval' can be overwritten to 0 after call sctp_stream_alloc_out(). Fixes: e090abd0d81c ("sctp: factor out stream->out allocation") Signed-off-by: Wei Yongjun Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 03764fc2b652..b8c8cabb1a58 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -393,7 +393,7 @@ int sctp_send_add_streams(struct sctp_association *asoc, { struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; - int retval = -ENOMEM; + int retval; __u32 outcnt, incnt; __u16 out, in; @@ -425,8 +425,10 @@ int sctp_send_add_streams(struct sctp_association *asoc, } chunk = sctp_make_strreset_addstrm(asoc, out, in); - if (!chunk) + if (!chunk) { + retval = -ENOMEM; goto out; + } asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); From 032a480202245e384fdbcac92da720d697384d8e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 31 Oct 2017 14:32:38 +0100 Subject: [PATCH 1677/2177] ipv4: fix validate_source for VRF setup David reported breakages of VRF scenarios due to the commit 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled."): the local addresses based test is too strict when VRFs are in place. With this change we fall-back to a full lookup when custom fib rules are in place; so that we address the VRF use case and possibly other similar issues in non trivial setups. v1 -> v2: - fix build breakage when CONFIG_IP_MULTIPLE_TABLES is not defined, reported by the kbuild test robot Reported-by: David Ahern Fixes: 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") Signed-off-by: Paolo Abeni Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f02819134ba2..f52d27a422c3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -73,6 +73,11 @@ fail: fib_free_table(main_table); return -ENOMEM; } + +static bool fib4_has_custom_rules(struct net *net) +{ + return false; +} #else struct fib_table *fib_new_table(struct net *net, u32 id) @@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } return NULL; } + +static bool fib4_has_custom_rules(struct net *net) +{ + return net->ipv4.fib_has_custom_rules; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ static void fib_replace_table(struct net *net, struct fib_table *old, @@ -405,10 +415,12 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { if (IN_DEV_ACCEPT_LOCAL(idev)) goto ok; - /* if no local routes are added from user space we can check - * for local addresses looking-up the ifaddr table + /* with custom local routes in place, checking local addresses + * only will be too optimistic, with custom rules, checking + * local addresses only can be too strict, e.g. due to vrf */ - if (net->ipv4.fib_has_custom_local_routes) + if (net->ipv4.fib_has_custom_local_routes || + fib4_has_custom_rules(net)) goto full_check; if (inet_lookup_ifaddr_rcu(net, src)) return -EINVAL; From e0337f92f6f36040ba91215bbe47203c410f472f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2017 14:23:24 +0000 Subject: [PATCH 1678/2177] net: macb: remove redundant assignment to variable work_done Variable work_done is set to zero and this value is never read, instead it is set to another value a few statements later. Remove the redundant assignment. Cleans up clang warning: drivers/net/ethernet/cadence/macb_main.c:1221:2: warning: Value stored to 'work_done' is never read Signed-off-by: Colin Ian King Tested-by: Alexander Dahl Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6df2cad61647..5dafcde67e45 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1218,8 +1218,6 @@ static int macb_poll(struct napi_struct *napi, int budget) status = macb_readl(bp, RSR); macb_writel(bp, RSR, status); - work_done = 0; - netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n", (unsigned long)status, budget); From 6978729fbd02ced7060f15b9f7e1ba5b39d9bbf3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 31 Oct 2017 14:29:47 +0000 Subject: [PATCH 1679/2177] sfc: support rx-fcs and rx-all Ethernet FCS inclusion (rx-fcs) is supported on EF10 NICs, conditional on a firmware capability bit (MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS). To receive frames with bad FCS (rx-all) we just don't return the discard flag EFX_RX_PKT_DISCARD from efx_ef10_handle_rx_event_errors() or efx_farch_handle_rx_not_ok(). Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 16 ++++++++++++---- drivers/net/ethernet/sfc/efx.c | 14 ++++++++++---- drivers/net/ethernet/sfc/farch.c | 4 ++++ drivers/net/ethernet/sfc/mcdi_port.c | 4 ++++ 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 13f72f5b18d2..19a91881fbf9 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -674,6 +674,10 @@ static int efx_ef10_probe(struct efx_nic *efx) efx->rx_packet_len_offset = ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE; + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN)) + efx->net_dev->hw_features |= NETIF_F_RXFCS; + rc = efx_mcdi_port_get_number(efx); if (rc < 0) goto fail5; @@ -3199,11 +3203,15 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel, const efx_qword_t *event) { struct efx_nic *efx = channel->efx; + bool handled = false; if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) { - if (!efx->loopback_selftest) - channel->n_rx_eth_crc_err += n_packets; - return EFX_RX_PKT_DISCARD; + if (!(efx->net_dev->features & NETIF_F_RXALL)) { + if (!efx->loopback_selftest) + channel->n_rx_eth_crc_err += n_packets; + return EFX_RX_PKT_DISCARD; + } + handled = true; } if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) { if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN && @@ -3274,7 +3282,7 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel, return 0; } - WARN_ON(1); /* No error bits were recognised */ + WARN_ON(!handled); /* No error bits were recognised */ return 0; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 8fdcf7aaf997..6668e371405c 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2315,8 +2315,11 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data) return rc; } - /* If Rx VLAN filter is changed, update filters via mac_reconfigure */ - if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) { + /* If Rx VLAN filter is changed, update filters via mac_reconfigure. + * If rx-fcs is changed, mac_reconfigure updates that too. + */ + if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_RXFCS)) { /* efx_set_rx_mode() will schedule MAC work to update filters * when a new features are finally set in net_dev. */ @@ -3242,7 +3245,7 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) /* Determine netdevice features */ net_dev->features |= (efx->type->offload_features | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_RXCSUM); + NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) net_dev->features |= NETIF_F_TSO6; /* Check whether device supports TSO */ @@ -3253,7 +3256,10 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); - net_dev->hw_features = net_dev->features & ~efx->fixed_features; + net_dev->hw_features |= net_dev->features & ~efx->fixed_features; + + /* Disable receiving frames with bad FCS, by default. */ + net_dev->features &= ~NETIF_F_RXALL; /* Disable VLAN filtering by default. It may be enforced if * the feature is fixed (i.e. VLAN filters are required to diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index ba45150f53c7..6608dfe455b1 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -927,6 +927,10 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue, } #endif + if (efx->net_dev->features & NETIF_F_RXALL) + /* don't discard frame for CRC error */ + rx_ev_eth_crc_err = false; + /* The frame must be discarded if any of these are true. */ return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_tobe_disc | rx_ev_pause_frm) ? diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c7407d129c7d..6e1f282b2976 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -1029,6 +1029,10 @@ int efx_mcdi_set_mac(struct efx_nic *efx) MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, + SET_MAC_IN_FLAG_INCLUDE_FCS, + !!(efx->net_dev->features & NETIF_F_RXFCS)); + switch (efx->wanted_fc) { case EFX_FC_RX | EFX_FC_TX: fcntl = MC_CMD_FCNTL_BIDIR; From 855a34b1b4893c7d9fd39e3f61834354f2149ba7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 31 Oct 2017 16:37:18 +0200 Subject: [PATCH 1680/2177] MAINTAINERS: Add lib/net_utils.c to NETWORKING (general) It looks like the best place in MAINTAINERS data base to cover this orphaned module. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 66471f7d77d4..9a24f56e0451 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9438,6 +9438,7 @@ F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: include/uapi/linux/net_namespace.h F: tools/testing/selftests/net/ +F: lib/net_utils.c F: lib/random32.c NETWORKING [IPSEC] From a95157d72d638913422773ea6faa384ebe4d366d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2017 14:37:55 +0000 Subject: [PATCH 1681/2177] net: thunderx: remove a couple of redundant assignments The assignment to pointer msg is redundant as it is never read, so remove msg. Also remove the first assignment to qset as this is not read before the next re-assignment of a new value to qset in the for-loop. Cleans up two clang warnings: drivers/net/ethernet/cavium/thunder/nic_main.c:589:2: warning: Value stored to 'msg' is never read drivers/net/ethernet/cavium/thunder/nic_main.c:611:2: warning: Value stored to 'qset' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index fb770b0182d3..988c06a28e5e 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -584,9 +584,6 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) static void nic_send_rss_size(struct nicpf *nic, int vf) { union nic_mbx mbx = {}; - u64 *msg; - - msg = (u64 *)&mbx; mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size; @@ -608,7 +605,6 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset; rssi = rssi_base; - qset = cfg->vf_id; for (; rssi < (rssi_base + cfg->tbl_len); rssi++) { u8 svf = cfg->ind_tbl[idx] >> 3; From 909fb9ae322154648d7354bcf7cdd0f705fb67f1 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 31 Oct 2017 15:48:00 +0100 Subject: [PATCH 1682/2177] net: dsa: lan9303: Transmit using ALR when unicast lan9303_xmit_use_arl() introduced in previous patch set is wrong. The chip flood broadcast and unknown multicast frames. The effect is that broadcasts and multicasts are duplicated on egress. It is not possible to configure the chip to direct unknown multicasts to CPU port only. This means that only unicast frames can be transmitted using ALR lookup. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 64092325aac3..537ca991fafe 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -46,7 +46,7 @@ /* Decide whether to transmit using ALR lookup, or transmit directly to * port using tag. ALR learning is performed only when using ALR lookup. - * If the two external ports are bridged and the packet is not STP BPDU, + * If the two external ports are bridged and the frame is unicast, * then use ALR lookup to allow ALR learning on CPU port. * Otherwise transmit directly to port with STP state override. * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1 @@ -55,7 +55,7 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) { struct lan9303 *chip = dp->ds->priv; - return chip->is_bridged && !ether_addr_equal(dest_addr, eth_stp_addr); + return chip->is_bridged && !is_multicast_ether_addr(dest_addr); } static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) From e9292f2c03851ef81bef38579a0ee9c42140e586 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 31 Oct 2017 15:48:01 +0100 Subject: [PATCH 1683/2177] net: dsa: lan9303: Add STP ALR entry on port 0 STP BPDUs arriving on user ports must sent to CPU port only, for processing by the SW bridge. Add an ALR entry with STP state override to fix that. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 2 ++ include/linux/dsa/lan9303.h | 2 ++ net/dsa/tag_lan9303.c | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 4c412bd52319..c4afc8f1a66d 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -773,6 +773,7 @@ static int lan9303_separate_ports(struct lan9303 *chip) { int ret; + lan9303_alr_del_port(chip, eth_stp_addr, 0); ret = lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_MIRROR, LAN9303_SWE_PORT_MIRROR_SNIFFER_PORT0 | LAN9303_SWE_PORT_MIRROR_MIRRORED_PORT1 | @@ -797,6 +798,7 @@ static void lan9303_bridge_ports(struct lan9303 *chip) lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE, chip->swe_port_state); + lan9303_alr_add_port(chip, eth_stp_addr, 0, true); } static int lan9303_handle_reset(struct lan9303 *chip) diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index 05d8d136baab..b2110e69630f 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -34,3 +34,5 @@ struct lan9303 { **/ struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; }; + +#define eth_stp_addr eth_reserved_addr_base diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 537ca991fafe..18f45cd9f625 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -42,7 +42,6 @@ #define LAN9303_TAG_LEN 4 # define LAN9303_TAG_TX_USE_ALR BIT(3) # define LAN9303_TAG_TX_STP_OVERRIDE BIT(4) -#define eth_stp_addr eth_reserved_addr_base /* Decide whether to transmit using ALR lookup, or transmit directly to * port using tag. ALR learning is performed only when using ALR lookup. From f849772915e5501c25f69f4815daa8cd86194634 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 31 Oct 2017 15:48:02 +0100 Subject: [PATCH 1684/2177] net: dsa: lan9303: lan9303_rcv set skb->offload_fwd_mark The chip flood broadcast and unknown multicast frames. On receive set skb->offload_fwd_mark to prevent the SW from flooding to the same ports. One exception: Because the ALR is set up to forward STP BPDUs only to CPU, the SW bridge should flood STP BPDUs if local STP is not enabled. This is archived by not setting skb->offload_fwd_mark on STP BPDUs. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 18f45cd9f625..e526c8967b98 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -126,6 +126,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull_rcsum(skb, 2 + 2); memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); + skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN, + eth_stp_addr); return skb; } From 0ba9a3b65c794982f4dc7fcdc8110c327359916b Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:52:59 +0530 Subject: [PATCH 1685/2177] cxgb4: save additional filter tuple field shifts in tp_params Save additional filter tuple field shifts in tp_params based on configured filter tuple fields. Also, save the combined filter tuple mask based on configured filter tuple fields. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 12 ++++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 18 ++++++++++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a57761b28edc..e2c75b73595f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -287,10 +287,18 @@ struct tp_params { * places we store their offsets here, or a -1 if the field isn't * present. */ - int vlan_shift; - int vnic_shift; + int fcoe_shift; int port_shift; + int vnic_shift; + int vlan_shift; + int tos_shift; int protocol_shift; + int ethertype_shift; + int macmatch_shift; + int matchtype_shift; + int frag_shift; + + u64 hash_filter_mask; }; struct vpd_params { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index c289ca1efc1b..efe9d3a20135 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -8816,11 +8816,21 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok) * shift positions of several elements of the Compressed Filter Tuple * for this adapter which we need frequently ... */ - adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F); - adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F); + adap->params.tp.fcoe_shift = t4_filter_field_shift(adap, FCOE_F); adap->params.tp.port_shift = t4_filter_field_shift(adap, PORT_F); + adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F); + adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F); + adap->params.tp.tos_shift = t4_filter_field_shift(adap, TOS_F); adap->params.tp.protocol_shift = t4_filter_field_shift(adap, PROTOCOL_F); + adap->params.tp.ethertype_shift = t4_filter_field_shift(adap, + ETHERTYPE_F); + adap->params.tp.macmatch_shift = t4_filter_field_shift(adap, + MACMATCH_F); + adap->params.tp.matchtype_shift = t4_filter_field_shift(adap, + MPSHITTYPE_F); + adap->params.tp.frag_shift = t4_filter_field_shift(adap, + FRAGMENTATION_F); /* If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID * represents the presence of an Outer VLAN instead of a VNIC ID. @@ -8828,6 +8838,10 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok) if ((adap->params.tp.ingress_config & VNIC_F) == 0) adap->params.tp.vnic_shift = -1; + v = t4_read_reg(adap, LE_3_DB_HASH_MASK_GEN_IPV4_T6_A); + adap->params.tp.hash_filter_mask = v; + v = t4_read_reg(adap, LE_4_DB_HASH_MASK_GEN_IPV4_T6_A); + adap->params.tp.hash_filter_mask |= ((u64)v << 32); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 483fb7644355..44713bad0045 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2933,6 +2933,9 @@ #define SSRAMINTPERR_V(x) ((x) << SSRAMINTPERR_S) #define SSRAMINTPERR_F SSRAMINTPERR_V(1U) +#define LE_3_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eac +#define LE_4_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eb0 + #define NCSI_INT_CAUSE_A 0x1a0d8 #define CIM_DM_PRTY_ERR_S 8 From 5c31254e35a8a5767c3b23377c34018d8bdd0567 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:00 +0530 Subject: [PATCH 1686/2177] cxgb4: initialize hash-filter configuration Add support for hash-filter configuration on T6. Also, do basic checks for the related initialization. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 6 +++++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 22 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 14 ++++++++---- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 14 ++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 1 + 6 files changed, 54 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index e2c75b73595f..5f021e6062b0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -366,6 +366,7 @@ struct adapter_params { unsigned char crypto; /* HW capability for crypto */ unsigned char bypass; + unsigned char hash_filter; unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ @@ -1140,6 +1141,11 @@ static inline int is_offload(const struct adapter *adap) return adap->params.offload; } +static inline int is_hashfilter(const struct adapter *adap) +{ + return adap->params.hash_filter; +} + static inline int is_pci_uld(const struct adapter *adap) { return adap->params.crypto; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 89272f29f807..566bd2d3737c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -915,3 +915,25 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) complete(&ctx->completion); } } + +int init_hash_filter(struct adapter *adap) +{ + /* On T6, verify the necessary register configs and warn the user in + * case of improper config + */ + if (is_t6(adap->params.chip)) { + if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4) + goto err; + + if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4) + goto err; + } else { + dev_err(adap->pdev_dev, "Hash filter supported only on T6\n"); + return -EINVAL; + } + adap->params.hash_filter = 1; + return 0; +err: + dev_warn(adap->pdev_dev, "Invalid hash filter config!\n"); + return -EINVAL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index 23742cb1c69f..d3c1a8fafd32 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -45,4 +45,5 @@ int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); void clear_all_filters(struct adapter *adapter); +int init_hash_filter(struct adapter *adap); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e16078ddb39f..4b07cfe8c66c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3963,7 +3963,8 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; - if (caps_cmd.ofldcaps) { + if (caps_cmd.ofldcaps || + (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) { /* query offload-related parameters */ params[0] = FW_PARAM_DEV(NTID); params[1] = FW_PARAM_PFVF(SERVER_START); @@ -4000,8 +4001,13 @@ static int adap_init0(struct adapter *adap) adap->vres.ddp.size = val[4] - val[3] + 1; adap->params.ofldq_wr_cred = val[5]; - adap->params.offload = 1; - adap->num_ofld_uld += 1; + if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) { + if (init_hash_filter(adap) < 0) + goto bye; + } else { + adap->params.offload = 1; + adap->num_ofld_uld += 1; + } } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -5171,7 +5177,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) cxgb4_init_tc_flower(adapter); } - if (is_offload(adapter)) { + if (is_offload(adapter) || is_hashfilter(adapter)) { if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) { u32 hash_base, hash_reg; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 44713bad0045..623f453bd327 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2933,6 +2933,20 @@ #define SSRAMINTPERR_V(x) ((x) << SSRAMINTPERR_S) #define SSRAMINTPERR_F SSRAMINTPERR_V(1U) +#define LE_DB_RSP_CODE_0_A 0x19c74 + +#define TCAM_ACTV_HIT_S 0 +#define TCAM_ACTV_HIT_M 0x1fU +#define TCAM_ACTV_HIT_V(x) ((x) << TCAM_ACTV_HIT_S) +#define TCAM_ACTV_HIT_G(x) (((x) >> TCAM_ACTV_HIT_S) & TCAM_ACTV_HIT_M) + +#define LE_DB_RSP_CODE_1_A 0x19c78 + +#define HASH_ACTV_HIT_S 25 +#define HASH_ACTV_HIT_M 0x1fU +#define HASH_ACTV_HIT_V(x) ((x) << HASH_ACTV_HIT_S) +#define HASH_ACTV_HIT_G(x) (((x) >> HASH_ACTV_HIT_S) & HASH_ACTV_HIT_M) + #define LE_3_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eac #define LE_4_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eb0 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 2ba890926c73..57eb4ad3485d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1092,6 +1092,7 @@ enum fw_caps_config_switch { enum fw_caps_config_nic { FW_CAPS_CONFIG_NIC = 0x00000001, FW_CAPS_CONFIG_NIC_VM = 0x00000002, + FW_CAPS_CONFIG_NIC_HASHFILTER = 0x00000020, }; enum fw_caps_config_ofld { From 12b276fbf6e092adca08a8125afcc4e7f530a0b6 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:01 +0530 Subject: [PATCH 1687/2177] cxgb4: add support to create hash filters Add support to create hash (exact-match) filters based on the value of 'hash' field in ch_filter_specification. Allocate SMT/L2T entries if DMAC-rewrite/SMAC-rewrite is requested. Allocate CLIP entry in case of IPv6 filter. Use cpl_act_open_req[6] to send hash filter create request to hw. Also, the filter tuple is calculated as part of sending this request. Hash-filter reply is processed on getting cpl_act_open_rpl. In case of success, various bits/fields in filter-tcb are set per filter requirement, such as enabling filter hitcnts, and/or various header rewrite operations, such as VLAN-rewrite, NAT or (L3/L4)-rewrite, and SMAC/DMAC-rewrite. In case of failure, clear the filter entry and release any hw resources occupied by it. The patch also moves the functions set_tcb_field, set_tcb_tflag and configure_filter_smac towards beginning of file. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 10 +- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 569 ++++++++++++++++-- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 + drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 5 + drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 17 + 6 files changed, 560 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 5f021e6062b0..bb7f0e4c9a81 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1050,6 +1050,7 @@ struct ch_filter_specification { * matching that doesn't exist as a (value, mask) tuple. */ uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + u32 hash:1; /* 0 => wild-card, 1 => exact-match */ /* Packet dispatch information. Ingress packets which match the * filter rules will be dropped, passed to the host or switched back @@ -1107,7 +1108,14 @@ enum { }; enum { - NAT_MODE_ALL = 7, /* NAT on entire 4-tuple */ + NAT_MODE_NONE = 0, /* No NAT performed */ + NAT_MODE_DIP, /* NAT on Dst IP */ + NAT_MODE_DIP_DP, /* NAT on Dst IP, Dst Port */ + NAT_MODE_DIP_DP_SIP, /* NAT on Dst IP, Dst Port and Src IP */ + NAT_MODE_DIP_DP_SP, /* NAT on Dst IP, Dst Port and Src Port */ + NAT_MODE_SIP_SP, /* NAT on Src IP and Src Port */ + NAT_MODE_DIP_SIP_SP, /* NAT on Dst IP, Src IP and Src Port */ + NAT_MODE_ALL /* NAT on entire 4-tuple */ }; /* Host shadow copy of ingress filter entry. This is in host native format diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 566bd2d3737c..cf8ca695f27d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -35,6 +35,8 @@ #include "cxgb4.h" #include "t4_regs.h" #include "t4_tcb.h" +#include "t4_values.h" +#include "clip_tbl.h" #include "l2t.h" #include "smt.h" #include "t4fw_api.h" @@ -50,6 +52,141 @@ static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) return !(conf & conf_mask) && is_field_set(val, mask); } +static int set_tcb_field(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, u16 word, u64 mask, u64 val, + int no_reply) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | + QUEUENO_V(adap->sge.fw_evtq.abs_id) | + NO_REPLY_V(no_reply)); + req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adap, skb); + return 0; +} + +/* Set one of the t_flags bits in the TCB. + */ +static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, unsigned int bit_pos, + unsigned int val, int no_reply) +{ + return set_tcb_field(adap, f, ftid, TCB_T_FLAGS_W, 1ULL << bit_pos, + (unsigned long long)val << bit_pos, no_reply); +} + +static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) +{ + int err; + + /* do a set-tcb for smac-sel and CWR bit.. */ + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); + if (err) + goto smac_err; + + err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, + TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), + TCB_SMAC_SEL_V(f->smt->idx), 1); + if (!err) + return 0; + +smac_err: + dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n", + f->tid, err); + return err; +} + +static void set_nat_params(struct adapter *adap, struct filter_entry *f, + unsigned int tid, bool dip, bool sip, bool dp, + bool sp) +{ + if (dip) { + if (f->fs.type) { + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W, + WORD_MASK, f->fs.nat_lip[15] | + f->fs.nat_lip[14] << 8 | + f->fs.nat_lip[13] << 16 | + f->fs.nat_lip[12] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1, + WORD_MASK, f->fs.nat_lip[11] | + f->fs.nat_lip[10] << 8 | + f->fs.nat_lip[9] << 16 | + f->fs.nat_lip[8] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2, + WORD_MASK, f->fs.nat_lip[7] | + f->fs.nat_lip[6] << 8 | + f->fs.nat_lip[5] << 16 | + f->fs.nat_lip[4] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3, + WORD_MASK, f->fs.nat_lip[3] | + f->fs.nat_lip[2] << 8 | + f->fs.nat_lip[1] << 16 | + f->fs.nat_lip[0] << 24, 1); + } else { + set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W, + WORD_MASK, f->fs.nat_lip[3] | + f->fs.nat_lip[2] << 8 | + f->fs.nat_lip[1] << 16 | + f->fs.nat_lip[0] << 24, 1); + } + } + + if (sip) { + if (f->fs.type) { + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W, + WORD_MASK, f->fs.nat_fip[15] | + f->fs.nat_fip[14] << 8 | + f->fs.nat_fip[13] << 16 | + f->fs.nat_fip[12] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1, + WORD_MASK, f->fs.nat_fip[11] | + f->fs.nat_fip[10] << 8 | + f->fs.nat_fip[9] << 16 | + f->fs.nat_fip[8] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2, + WORD_MASK, f->fs.nat_fip[7] | + f->fs.nat_fip[6] << 8 | + f->fs.nat_fip[5] << 16 | + f->fs.nat_fip[4] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3, + WORD_MASK, f->fs.nat_fip[3] | + f->fs.nat_fip[2] << 8 | + f->fs.nat_fip[1] << 16 | + f->fs.nat_fip[0] << 24, 1); + + } else { + set_tcb_field(adap, f, tid, + TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W, + WORD_MASK, f->fs.nat_fip[3] | + f->fs.nat_fip[2] << 8 | + f->fs.nat_fip[1] << 16 | + f->fs.nat_fip[0] << 24, 1); + } + } + + set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK, + (dp ? f->fs.nat_lport : 0) | + (sp ? f->fs.nat_fport << 16 : 0), 1); +} + /* Validate filter spec against configuration done on the card. */ static int validate_filter(struct net_device *dev, struct ch_filter_specification *fs) @@ -484,10 +621,8 @@ int delete_filter(struct adapter *adapter, unsigned int fidx) void clear_filter(struct adapter *adap, struct filter_entry *f) { /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. + * need to free any existing L2T, SMT, CLIP entries of filter + * rule. */ if (f->l2t) cxgb4_l2t_release(f->l2t); @@ -495,6 +630,9 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) if (f->smt) cxgb4_smt_release(f->smt); + if (f->fs.hash && f->fs.type) + cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); + /* The zeroing of the filter rule below clears the filter valid, * pending, locked flags, l2t pointer, etc. so it's all we need for * this operation. @@ -564,6 +702,269 @@ static void fill_default_mask(struct ch_filter_specification *fs) fs->mask.fport = ~0; } +static u64 hash_filter_ntuple(struct ch_filter_specification *fs, + struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + struct tp_params *tp = &adap->params.tp; + u64 ntuple = 0; + + /* Initialize each of the fields which we care about which are present + * in the Compressed Filter Tuple. + */ + if (tp->vlan_shift >= 0 && fs->mask.ivlan) + ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + + if (tp->port_shift >= 0 && fs->mask.iport) + ntuple |= (u64)fs->val.iport << tp->port_shift; + + if (tp->protocol_shift >= 0) { + if (!fs->val.proto) + ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift; + else + ntuple |= (u64)fs->val.proto << tp->protocol_shift; + } + + if (tp->tos_shift >= 0 && fs->mask.tos) + ntuple |= (u64)(fs->val.tos) << tp->tos_shift; + + if (tp->vnic_shift >= 0) { + if ((adap->params.tp.ingress_config & VNIC_F) && + fs->mask.pfvf_vld) + ntuple |= (u64)((fs->val.pfvf_vld << 16) | + (fs->val.pf << 13) | + (fs->val.vf)) << tp->vnic_shift; + else + ntuple |= (u64)((fs->val.ovlan_vld << 16) | + (fs->val.ovlan)) << tp->vnic_shift; + } + + if (tp->macmatch_shift >= 0 && fs->mask.macidx) + ntuple |= (u64)(fs->val.macidx) << tp->macmatch_shift; + + if (tp->ethertype_shift >= 0 && fs->mask.ethtype) + ntuple |= (u64)(fs->val.ethtype) << tp->ethertype_shift; + + if (tp->matchtype_shift >= 0 && fs->mask.matchtype) + ntuple |= (u64)(fs->val.matchtype) << tp->matchtype_shift; + + if (tp->frag_shift >= 0 && fs->mask.frag) + ntuple |= (u64)(fs->val.frag) << tp->frag_shift; + + if (tp->fcoe_shift >= 0 && fs->mask.fcoe) + ntuple |= (u64)(fs->val.fcoe) << tp->fcoe_shift; + return ntuple; +} + +static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, + unsigned int qid_filterid, struct adapter *adap) +{ + struct cpl_t6_act_open_req6 *t6req = NULL; + struct cpl_act_open_req6 *req = NULL; + + t6req = (struct cpl_t6_act_open_req6 *)__skb_put(skb, sizeof(*t6req)); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req6 *)t6req; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_filterid)); + req->local_port = cpu_to_be16(f->fs.val.lport); + req->peer_port = cpu_to_be16(f->fs.val.fport); + req->local_ip_hi = *(__be64 *)(&f->fs.val.lip); + req->local_ip_lo = *(((__be64 *)&f->fs.val.lip) + 1); + req->peer_ip_hi = *(__be64 *)(&f->fs.val.fip); + req->peer_ip_lo = *(((__be64 *)&f->fs.val.fip) + 1); + req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + DELACK_V(f->fs.hitcnts) | + L2T_IDX_V(f->l2t ? f->l2t->idx : 0) | + SMAC_SEL_V((cxgb4_port_viid(f->dev) & + 0x7F) << 1) | + TX_CHAN_V(f->fs.eport) | + NO_CONG_V(f->fs.rpttid) | + ULP_MODE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : ULP_MODE_NONE) | + TCAM_BYPASS_F | NON_OFFLOAD_F); + t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs, + f->dev))); + t6req->opt2 = htonl(RSS_QUEUE_VALID_F | + RSS_QUEUE_V(f->fs.iq) | + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_F | + CONG_CNTRL_V((f->fs.action == FILTER_DROP) | + (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | + ((f->fs.dirsteerhash) << 1)) | + CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); +} + +static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, + unsigned int qid_filterid, struct adapter *adap) +{ + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req *req = NULL; + + t6req = (struct cpl_t6_act_open_req *)__skb_put(skb, sizeof(*t6req)); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_filterid)); + req->local_port = cpu_to_be16(f->fs.val.lport); + req->peer_port = cpu_to_be16(f->fs.val.fport); + req->local_ip = f->fs.val.lip[0] | f->fs.val.lip[1] << 8 | + f->fs.val.lip[2] << 16 | f->fs.val.lip[3] << 24; + req->peer_ip = f->fs.val.fip[0] | f->fs.val.fip[1] << 8 | + f->fs.val.fip[2] << 16 | f->fs.val.fip[3] << 24; + req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + DELACK_V(f->fs.hitcnts) | + L2T_IDX_V(f->l2t ? f->l2t->idx : 0) | + SMAC_SEL_V((cxgb4_port_viid(f->dev) & + 0x7F) << 1) | + TX_CHAN_V(f->fs.eport) | + NO_CONG_V(f->fs.rpttid) | + ULP_MODE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : ULP_MODE_NONE) | + TCAM_BYPASS_F | NON_OFFLOAD_F); + + t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs, + f->dev))); + t6req->opt2 = htonl(RSS_QUEUE_VALID_F | + RSS_QUEUE_V(f->fs.iq) | + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_F | + CONG_CNTRL_V((f->fs.action == FILTER_DROP) | + (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | + ((f->fs.dirsteerhash) << 1)) | + CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); +} + +static int cxgb4_set_hash_filter(struct net_device *dev, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct tid_info *t = &adapter->tids; + struct filter_entry *f; + struct sk_buff *skb; + int iq, atid, size; + int ret = 0; + u32 iconf; + + fill_default_mask(fs); + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return -ENOMEM; + + f->fs = *fs; + f->ctx = ctx; + f->dev = dev; + f->fs.iq = iq; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + ret = -ENOMEM; + goto out_err; + } + } + + /* If the new filter requires loopback Source MAC rewriting then + * we need to allocate a SMT entry for the filter. + */ + if (f->fs.newsmac) { + f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac); + if (!f->smt) { + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + ret = -ENOMEM; + goto free_l2t; + } + } + + atid = cxgb4_alloc_atid(t, f); + if (atid < 0) + goto free_smt; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + size = sizeof(struct cpl_t6_act_open_req); + if (f->fs.type) { + ret = cxgb4_clip_get(f->dev, (const u32 *)&f->fs.val.lip, 1); + if (ret) + goto free_atid; + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_clip; + } + + mk_act_open_req6(f, skb, + ((adapter->sge.fw_evtq.abs_id << 14) | atid), + adapter); + } else { + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_atid; + } + + mk_act_open_req(f, skb, + ((adapter->sge.fw_evtq.abs_id << 14) | atid), + adapter); + } + + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_SETUP, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; + +free_clip: + cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); + +free_atid: + cxgb4_free_atid(t, atid); + +free_smt: + if (f->smt) { + cxgb4_smt_release(f->smt); + f->smt = NULL; + } + +free_l2t: + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + +out_err: + kfree(f); + return ret; +} + /* Check a Chelsio Filter Request for validity, convert it into our internal * format and send it to the hardware. Return 0 on success, an error number * otherwise. We attach any provided filter operation context to the internal @@ -580,6 +981,14 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, u32 iconf; int iq, ret; + if (fs->hash) { + if (is_hashfilter(adapter)) + return cxgb4_set_hash_filter(dev, fs, ctx); + netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n", + __func__); + return -EINVAL; + } + max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) @@ -789,60 +1198,126 @@ out: return ret; } -static int set_tcb_field(struct adapter *adap, struct filter_entry *f, - unsigned int ftid, u16 word, u64 mask, u64 val, - int no_reply) +static int configure_filter_tcb(struct adapter *adap, unsigned int tid, + struct filter_entry *f) { - struct cpl_set_tcb_field *req; - struct sk_buff *skb; + if (f->fs.hitcnts) + set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), + TCB_TIMESTAMP_V(0ULL) | + TCB_RTT_TS_RECENT_AGE_V(0ULL), + 1); - skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (f->fs.newdmac) + set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, + 1); - req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req)); - memset(req, 0, sizeof(*req)); - INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid); - req->reply_ctrl = htons(REPLY_CHAN_V(0) | - QUEUENO_V(adap->sge.fw_evtq.abs_id) | - NO_REPLY_V(no_reply)); - req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid)); - req->mask = cpu_to_be64(mask); - req->val = cpu_to_be64(val); - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adap, skb); + if (f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) + set_tcb_tflag(adap, f, tid, TF_CCTRL_RFR_S, 1, + 1); + if (f->fs.newsmac) + configure_filter_smac(adap, f); + + if (f->fs.nat_mode) { + switch (f->fs.nat_mode) { + case NAT_MODE_DIP: + set_nat_params(adap, f, tid, true, false, false, false); + break; + + case NAT_MODE_DIP_DP: + set_nat_params(adap, f, tid, true, false, true, false); + break; + + case NAT_MODE_DIP_DP_SIP: + set_nat_params(adap, f, tid, true, true, true, false); + break; + case NAT_MODE_DIP_DP_SP: + set_nat_params(adap, f, tid, true, false, true, true); + break; + + case NAT_MODE_SIP_SP: + set_nat_params(adap, f, tid, false, true, false, true); + break; + + case NAT_MODE_DIP_SIP_SP: + set_nat_params(adap, f, tid, true, true, false, true); + break; + + case NAT_MODE_ALL: + set_nat_params(adap, f, tid, true, true, true, true); + break; + + default: + pr_err("%s: Invalid NAT mode: %d\n", + __func__, f->fs.nat_mode); + return -EINVAL; + } + } return 0; } -/* Set one of the t_flags bits in the TCB. - */ -static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f, - unsigned int ftid, unsigned int bit_pos, - unsigned int val, int no_reply) +void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) { - return set_tcb_field(adap, f, ftid, TCB_T_FLAGS_W, 1ULL << bit_pos, - (unsigned long long)val << bit_pos, no_reply); -} + unsigned int ftid = TID_TID_G(AOPEN_ATID_G(ntohl(rpl->atid_status))); + unsigned int status = AOPEN_STATUS_G(ntohl(rpl->atid_status)); + struct tid_info *t = &adap->tids; + unsigned int tid = GET_TID(rpl); + struct filter_ctx *ctx = NULL; + struct filter_entry *f; -static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) -{ - int err; + dev_dbg(adap->pdev_dev, "%s: tid = %u; atid = %u; status = %u\n", + __func__, tid, ftid, status); - /* do a set-tcb for smac-sel and CWR bit.. */ - err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); - if (err) - goto smac_err; + f = lookup_atid(t, ftid); + if (!f) { + dev_err(adap->pdev_dev, "%s:could not find filter entry", + __func__); + return; + } + ctx = f->ctx; + f->ctx = NULL; - err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, - TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), - TCB_SMAC_SEL_V(f->smt->idx), 1); - if (!err) - return 0; + switch (status) { + case CPL_ERR_NONE: + f->tid = tid; + f->pending = 0; + f->valid = 1; + cxgb4_insert_tid(t, f, f->tid, 0); + cxgb4_free_atid(t, ftid); + if (ctx) { + ctx->tid = f->tid; + ctx->result = 0; + } + if (configure_filter_tcb(adap, tid, f)) { + clear_filter(adap, f); + cxgb4_remove_tid(t, 0, tid, 0); + kfree(f); + if (ctx) { + ctx->result = -EINVAL; + complete(&ctx->completion); + } + return; + } + break; -smac_err: - dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n", - f->tid, err); - return err; + default: + dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n", + __func__, status); + + if (ctx) { + if (status == CPL_ERR_TCAM_FULL) + ctx->result = -EAGAIN; + else + ctx->result = -EINVAL; + } + clear_filter(adap, f); + cxgb4_free_atid(t, ftid); + kfree(f); + } + if (ctx) + complete(&ctx->completion); } /* Handle a filter write/deletion reply. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index d3c1a8fafd32..7480d65550a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -37,7 +37,10 @@ #include "t4_msg.h" +#define WORD_MASK 0xffffffff + void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl); void clear_filter(struct adapter *adap, struct filter_entry *f); int set_filter_wr(struct adapter *adapter, int fidx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 4b07cfe8c66c..77b4bd958748 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -572,6 +572,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_set_tcb_rpl *p = (void *)rsp; filter_rpl(q->adap, p); + } else if (opcode == CPL_ACT_OPEN_RPL) { + const struct cpl_act_open_rpl *p = (void *)rsp; + + hash_filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index ce4838d907da..7e12f241145b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -286,6 +286,7 @@ struct work_request_hdr { #define RX_CHANNEL_S 26 #define RX_CHANNEL_V(x) ((x) << RX_CHANNEL_S) +#define RX_CHANNEL_F RX_CHANNEL_V(1U) #define WND_SCALE_EN_S 28 #define WND_SCALE_EN_V(x) ((x) << WND_SCALE_EN_S) @@ -315,6 +316,10 @@ struct cpl_pass_open_req { #define DELACK_V(x) ((x) << DELACK_S) #define DELACK_F DELACK_V(1U) +#define NON_OFFLOAD_S 7 +#define NON_OFFLOAD_V(x) ((x) << NON_OFFLOAD_S) +#define NON_OFFLOAD_F NON_OFFLOAD_V(1U) + #define DSCP_S 22 #define DSCP_M 0x3F #define DSCP_V(x) ((x) << DSCP_S) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index c1c76663034d..c7201eb7b14c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -42,6 +42,23 @@ #define TCB_T_FLAGS_W 1 +#define TF_CCTRL_ECE_S 60 #define TF_CCTRL_CWR_S 61 +#define TF_CCTRL_RFR_S 62 +#define TCB_TIMESTAMP_W 5 +#define TCB_TIMESTAMP_S 0 +#define TCB_TIMESTAMP_M 0xffffffffULL +#define TCB_TIMESTAMP_V(x) ((x) << TCB_TIMESTAMP_S) + +#define TCB_RTT_TS_RECENT_AGE_W 6 +#define TCB_RTT_TS_RECENT_AGE_S 0 +#define TCB_RTT_TS_RECENT_AGE_M 0xffffffffULL +#define TCB_RTT_TS_RECENT_AGE_V(x) ((x) << TCB_RTT_TS_RECENT_AGE_S) + +#define TCB_SND_UNA_RAW_W 10 +#define TCB_RX_FRAG2_PTR_RAW_W 27 +#define TCB_RX_FRAG3_LEN_RAW_W 29 +#define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W 30 +#define TCB_PDU_HDR_LEN_W 31 #endif /* __T4_TCB_H */ From 3b0b3bee56dd4e5cd1976a046f391a1435d727b2 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:02 +0530 Subject: [PATCH 1688/2177] cxgb4: add support to delete hash filter Use a combined ulptx work-request to send hash filter deletion request to hw. Hash filter deletion reply is processed on getting cpl_abort_rpl_rss. Release any L2T/SMT/CLIP entries on filter deletion. Also, free up the corresponding filter entry. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 159 +++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 2 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 + .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 2 +- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 4 +- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 4 +- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 5 + 7 files changed, 173 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index cf8ca695f27d..eb6ba9824501 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -87,6 +87,59 @@ static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f, (unsigned long long)val << bit_pos, no_reply); } +static void mk_abort_req_ulp(struct cpl_abort_req *abort_req, unsigned int tid) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_req; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_req), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*abort_req) - sizeof(struct work_request_hdr)); + OPCODE_TID(abort_req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + abort_req->rsvd0 = htonl(0); + abort_req->rsvd1 = 0; + abort_req->cmd = CPL_ABORT_NO_RST; +} + +static void mk_abort_rpl_ulp(struct cpl_abort_rpl *abort_rpl, unsigned int tid) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_rpl; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_rpl), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*abort_rpl) - sizeof(struct work_request_hdr)); + OPCODE_TID(abort_rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + abort_rpl->rsvd0 = htonl(0); + abort_rpl->rsvd1 = 0; + abort_rpl->cmd = CPL_ABORT_NO_RST; +} + +static void mk_set_tcb_ulp(struct filter_entry *f, + struct cpl_set_tcb_field *req, + unsigned int word, u64 mask, u64 val, + u8 cookie, int no_reply) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*req), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*req) - sizeof(struct work_request_hdr)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, f->tid)); + req->reply_ctrl = htons(NO_REPLY_V(no_reply) | REPLY_CHAN_V(0) | + QUEUENO_V(0)); + req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + sc = (struct ulptx_idata *)(req + 1); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); + sc->len = htonl(0); +} + static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) { int err; @@ -1110,12 +1163,74 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return ret; } +static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct tid_info *t = &adapter->tids; + struct cpl_abort_req *abort_req; + struct cpl_abort_rpl *abort_rpl; + struct cpl_set_tcb_field *req; + struct ulptx_idata *aligner; + struct work_request_hdr *wr; + struct filter_entry *f; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n", + __func__, filter_id, adapter->tids.nftids); + + if (filter_id > adapter->tids.ntids) + return -E2BIG; + + f = lookup_tid(t, filter_id); + if (!f) { + netdev_err(dev, "%s: no filter entry for filter_id = %d", + __func__, filter_id); + return -EINVAL; + } + + ret = writable_filter(f); + if (ret) + return ret; + + if (!f->valid) + return -EINVAL; + + f->ctx = ctx; + f->pending = 1; + wrlen = roundup(sizeof(*wr) + (sizeof(*req) + sizeof(*aligner)) + + sizeof(*abort_req) + sizeof(*abort_rpl), 16); + skb = alloc_skb(wrlen, GFP_KERNEL); + if (!skb) { + netdev_err(dev, "%s: could not allocate skb ..\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen); + INIT_ULPTX_WR(req, wrlen, 0, 0); + wr = (struct work_request_hdr *)req; + wr++; + req = (struct cpl_set_tcb_field *)wr; + mk_set_tcb_ulp(f, req, TCB_RSS_INFO_W, TCB_RSS_INFO_V(TCB_RSS_INFO_M), + TCB_RSS_INFO_V(adapter->sge.fw_evtq.abs_id), 0, 1); + aligner = (struct ulptx_idata *)(req + 1); + abort_req = (struct cpl_abort_req *)(aligner + 1); + mk_abort_req_ulp(abort_req, f->tid); + abort_rpl = (struct cpl_abort_rpl *)(abort_req + 1); + mk_abort_rpl_ulp(abort_rpl, f->tid); + t4_ofld_send(adapter, skb); + return 0; +} + /* Check a delete filter request for validity and send it to the hardware. * Return 0 on success, an error number otherwise. We attach any provided * filter operation context to the internal filter specification in order to * facilitate signaling completion of the operation. */ int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); @@ -1123,6 +1238,14 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, unsigned int max_fidx; int ret; + if (fs && fs->hash) { + if (is_hashfilter(adapter)) + return cxgb4_del_hash_filter(dev, filter_id, ctx); + netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n", + __func__); + return -EINVAL; + } + max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) @@ -1173,18 +1296,19 @@ out: return ret; } -int cxgb4_del_filter(struct net_device *dev, int filter_id) +int cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) { struct filter_ctx ctx; int ret; /* If we are shutting down the adapter do not wait for completion */ if (netdev2adap(dev)->flags & SHUTTING_DOWN) - return __cxgb4_del_filter(dev, filter_id, NULL); + return __cxgb4_del_filter(dev, filter_id, fs, NULL); init_completion(&ctx.completion); - ret = __cxgb4_del_filter(dev, filter_id, &ctx); + ret = __cxgb4_del_filter(dev, filter_id, fs, &ctx); if (ret) goto out; @@ -1258,6 +1382,35 @@ static int configure_filter_tcb(struct adapter *adap, unsigned int tid, return 0; } +void hash_del_filter_rpl(struct adapter *adap, + const struct cpl_abort_rpl_rss *rpl) +{ + unsigned int status = rpl->status; + struct tid_info *t = &adap->tids; + unsigned int tid = GET_TID(rpl); + struct filter_ctx *ctx = NULL; + struct filter_entry *f; + + dev_dbg(adap->pdev_dev, "%s: status = %u; tid = %u\n", + __func__, status, tid); + + f = lookup_tid(t, tid); + if (!f) { + dev_err(adap->pdev_dev, "%s:could not find filter entry", + __func__); + return; + } + ctx = f->ctx; + f->ctx = NULL; + clear_filter(adap, f); + cxgb4_remove_tid(t, 0, tid, 0); + kfree(f); + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } +} + void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) { unsigned int ftid = TID_TID_G(AOPEN_ATID_G(ntohl(rpl->atid_status))); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index 7480d65550a8..9475abd3384e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -41,6 +41,8 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl); +void hash_del_filter_rpl(struct adapter *adap, + const struct cpl_abort_rpl_rss *rpl); void clear_filter(struct adapter *adap, struct filter_entry *f); int set_filter_wr(struct adapter *adapter, int fidx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 77b4bd958748..35709c7f7c5b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -576,6 +576,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_act_open_rpl *p = (void *)rsp; hash_filter_rpl(q->adap, p); + } else if (opcode == CPL_ABORT_RPL_RSS) { + const struct cpl_abort_rpl_rss *p = (void *)rsp; + + hash_del_filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 614db014ef18..ed377e2e9f8a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -730,7 +730,7 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, if (!ch_flower) return -ENOENT; - ret = cxgb4_del_filter(dev, ch_flower->filter_id); + ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); if (ret) goto err; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 48970ba08bdc..cd0cd13a964d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -380,7 +380,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) return -EINVAL; } - ret = cxgb4_del_filter(dev, filter_id); + ret = cxgb4_del_filter(dev, filter_id, NULL); if (ret) goto out; @@ -399,7 +399,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) if (!test_bit(j, link->tid_map)) continue; - ret = __cxgb4_del_filter(dev, j, NULL); + ret = __cxgb4_del_filter(dev, j, NULL, NULL); if (ret) goto out; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 52324c77a4fe..a1c850861cbf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -217,10 +217,12 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, struct filter_ctx *ctx); int cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs); -int cxgb4_del_filter(struct net_device *dev, int filter_id); +int cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, u64 *hitcnt, u64 *bytecnt); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index c7201eb7b14c..3297ce025e8b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -46,6 +46,11 @@ #define TF_CCTRL_CWR_S 61 #define TF_CCTRL_RFR_S 62 +#define TCB_RSS_INFO_W 3 +#define TCB_RSS_INFO_S 0 +#define TCB_RSS_INFO_M 0x3ffULL +#define TCB_RSS_INFO_V(x) ((x) << TCB_RSS_INFO_S) + #define TCB_TIMESTAMP_W 5 #define TCB_TIMESTAMP_S 0 #define TCB_TIMESTAMP_M 0xffffffffULL From 9d922d4b016d3d7908dd70112aaf46a38313d866 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:03 +0530 Subject: [PATCH 1689/2177] cxgb4: add support to retrieve stats for hash filters Add support to retrieve packet-count and byte-count for hash-filters by retrieving filter-entry appropriately based on whether the request is for hash-filter or not. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 30 ++++++++++++------- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 6 ++-- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 +- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index eb6ba9824501..9b3ff6209eb5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -341,7 +341,7 @@ static int get_filter_steerq(struct net_device *dev, } static int get_filter_count(struct adapter *adapter, unsigned int fidx, - u64 *pkts, u64 *bytes) + u64 *pkts, u64 *bytes, bool hash) { unsigned int tcb_base, tcbaddr; unsigned int word_offset; @@ -350,14 +350,24 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx, int ret; tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); - if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids - 1)) && - fidx >= adapter->tids.nftids) - return -E2BIG; - - f = &adapter->tids.ftid_tab[fidx]; - if (!f->valid) - return -EINVAL; + if (is_hashfilter(adapter) && hash) { + if (fidx < adapter->tids.ntids) { + f = adapter->tids.tid_tab[fidx]; + if (!f) + return -EINVAL; + } else { + return -E2BIG; + } + } else { + if ((fidx != (adapter->tids.nftids + + adapter->tids.nsftids - 1)) && + fidx >= adapter->tids.nftids) + return -E2BIG; + f = &adapter->tids.ftid_tab[fidx]; + if (!f->valid) + return -EINVAL; + } tcbaddr = tcb_base + f->tid * TCB_SIZE; spin_lock(&adapter->win0_lock); @@ -409,11 +419,11 @@ out: } int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, - u64 *hitcnt, u64 *bytecnt) + u64 *hitcnt, u64 *bytecnt, bool hash) { struct adapter *adapter = netdev2adap(dev); - return get_filter_count(adapter, fidx, hitcnt, bytecnt); + return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash); } int cxgb4_get_free_ftid(struct net_device *dev, int family) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index ed377e2e9f8a..a26acd183eef 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -755,7 +755,8 @@ static void ch_flower_stats_cb(struct timer_list *t) hash_for_each_rcu(adap->flower_anymatch_tbl, i, flower_entry, link) { ret = cxgb4_get_filter_counters(adap->port[0], flower_entry->filter_id, - &packets, &bytes); + &packets, &bytes, + flower_entry->fs.hash); if (!ret) { spin_lock(&flower_entry->lock); ofld_stats = &flower_entry->stats; @@ -788,7 +789,8 @@ int cxgb4_tc_flower_stats(struct net_device *dev, } ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id, - &packets, &bytes); + &packets, &bytes, + ch_flower->fs.hash); if (ret < 0) goto err; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index a1c850861cbf..08e709ab6dd4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -224,7 +224,7 @@ int cxgb4_set_filter(struct net_device *dev, int filter_id, int cxgb4_del_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs); int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, - u64 *hitcnt, u64 *bytecnt); + u64 *hitcnt, u64 *bytecnt, bool hash); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { From 79e6d46a65abfd721de378bf496833a04ea10afe Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:04 +0530 Subject: [PATCH 1690/2177] cxgb4: convert flower table to use rhashtable T6 supports ~500K hash filters and can theoretically climb up to ~1 million hash filters. Preallocated hash table is not efficient in terms of memory usage. So, use rhashtable instead which gives the flexibility to grow based on usage. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 +- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 104 ++++++++++++------ .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 4 +- 4 files changed, 81 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index bb7f0e4c9a81..0c83ceb5a1a6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -918,8 +918,10 @@ struct adapter { struct chcr_stats_debug chcr_stats; /* TC flower offload */ - DECLARE_HASHTABLE(flower_anymatch_tbl, 9); + struct rhashtable flower_tbl; + struct rhashtable_params flower_ht_params; struct timer_list flower_stats_timer; + struct work_struct flower_stats_work; /* Ethtool Dump */ struct ethtool_dump eth_dump; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 35709c7f7c5b..8fd41917c07a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5182,7 +5182,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "could not offload tc u32, continuing\n"); - cxgb4_init_tc_flower(adapter); + if (cxgb4_init_tc_flower(adapter)) + dev_warn(&pdev->dev, + "could not offload tc flower, continuing\n"); } if (is_offload(adapter) || is_hashfilter(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index a26acd183eef..3953bc1fdc20 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -74,13 +74,8 @@ static struct ch_tc_flower_entry *allocate_flower_entry(void) static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, unsigned long flower_cookie) { - struct ch_tc_flower_entry *flower_entry; - - hash_for_each_possible_rcu(adap->flower_anymatch_tbl, flower_entry, - link, flower_cookie) - if (flower_entry->tc_flower_cookie == flower_cookie) - return flower_entry; - return NULL; + return rhashtable_lookup_fast(&adap->flower_tbl, &flower_cookie, + adap->flower_ht_params); } static void cxgb4_process_flow_match(struct net_device *dev, @@ -707,12 +702,17 @@ int cxgb4_tc_flower_replace(struct net_device *dev, goto free_entry; } - INIT_HLIST_NODE(&ch_flower->link); ch_flower->tc_flower_cookie = cls->cookie; ch_flower->filter_id = ctx.tid; - hash_add_rcu(adap->flower_anymatch_tbl, &ch_flower->link, cls->cookie); + ret = rhashtable_insert_fast(&adap->flower_tbl, &ch_flower->node, + adap->flower_ht_params); + if (ret) + goto del_filter; - return ret; + return 0; + +del_filter: + cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); free_entry: kfree(ch_flower); @@ -734,44 +734,66 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, if (ret) goto err; - hash_del_rcu(&ch_flower->link); + ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node, + adap->flower_ht_params); + if (ret) { + netdev_err(dev, "Flow remove from rhashtable failed"); + goto err; + } kfree_rcu(ch_flower, rcu); err: return ret; } -static void ch_flower_stats_cb(struct timer_list *t) +static void ch_flower_stats_handler(struct work_struct *work) { - struct adapter *adap = from_timer(adap, t, flower_stats_timer); + struct adapter *adap = container_of(work, struct adapter, + flower_stats_work); struct ch_tc_flower_entry *flower_entry; struct ch_tc_flower_stats *ofld_stats; - unsigned int i; + struct rhashtable_iter iter; u64 packets; u64 bytes; int ret; - rcu_read_lock(); - hash_for_each_rcu(adap->flower_anymatch_tbl, i, flower_entry, link) { - ret = cxgb4_get_filter_counters(adap->port[0], - flower_entry->filter_id, - &packets, &bytes, - flower_entry->fs.hash); - if (!ret) { - spin_lock(&flower_entry->lock); - ofld_stats = &flower_entry->stats; + rhashtable_walk_enter(&adap->flower_tbl, &iter); + do { + flower_entry = ERR_PTR(rhashtable_walk_start(&iter)); + if (IS_ERR(flower_entry)) + goto walk_stop; - if (ofld_stats->prev_packet_count != packets) { - ofld_stats->prev_packet_count = packets; - ofld_stats->last_used = jiffies; + while ((flower_entry = rhashtable_walk_next(&iter)) && + !IS_ERR(flower_entry)) { + ret = cxgb4_get_filter_counters(adap->port[0], + flower_entry->filter_id, + &packets, &bytes, + flower_entry->fs.hash); + if (!ret) { + spin_lock(&flower_entry->lock); + ofld_stats = &flower_entry->stats; + + if (ofld_stats->prev_packet_count != packets) { + ofld_stats->prev_packet_count = packets; + ofld_stats->last_used = jiffies; + } + spin_unlock(&flower_entry->lock); } - spin_unlock(&flower_entry->lock); } - } - rcu_read_unlock(); +walk_stop: + rhashtable_walk_stop(&iter); + } while (flower_entry == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); } +static void ch_flower_stats_cb(struct timer_list *t) +{ + struct adapter *adap = from_timer(adap, t, flower_stats_timer); + + schedule_work(&adap->flower_stats_work); +} + int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls) { @@ -814,15 +836,35 @@ err: return ret; } -void cxgb4_init_tc_flower(struct adapter *adap) +static const struct rhashtable_params cxgb4_tc_flower_ht_params = { + .nelem_hint = 384, + .head_offset = offsetof(struct ch_tc_flower_entry, node), + .key_offset = offsetof(struct ch_tc_flower_entry, tc_flower_cookie), + .key_len = sizeof(((struct ch_tc_flower_entry *)0)->tc_flower_cookie), + .max_size = 524288, + .min_size = 512, + .automatic_shrinking = true +}; + +int cxgb4_init_tc_flower(struct adapter *adap) { - hash_init(adap->flower_anymatch_tbl); + int ret; + + adap->flower_ht_params = cxgb4_tc_flower_ht_params; + ret = rhashtable_init(&adap->flower_tbl, &adap->flower_ht_params); + if (ret) + return ret; + + INIT_WORK(&adap->flower_stats_work, ch_flower_stats_handler); timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); + return 0; } void cxgb4_cleanup_tc_flower(struct adapter *adap) { if (adap->flower_stats_timer.function) del_timer_sync(&adap->flower_stats_timer); + cancel_work_sync(&adap->flower_stats_work); + rhashtable_destroy(&adap->flower_tbl); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 202d5c9ec303..050c8a50ae41 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -48,7 +48,7 @@ struct ch_tc_flower_entry { struct ch_filter_specification fs; struct ch_tc_flower_stats stats; unsigned long tc_flower_cookie; - struct hlist_node link; + struct rhash_head node; struct rcu_head rcu; spinlock_t lock; /* lock for stats */ u32 filter_id; @@ -115,6 +115,6 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls); -void cxgb4_init_tc_flower(struct adapter *adap); +int cxgb4_init_tc_flower(struct adapter *adap); void cxgb4_cleanup_tc_flower(struct adapter *adap); #endif /* __CXGB4_TC_FLOWER_H */ From 3eb8b62d5a260fcd9683b0ce89beb3b28b12a304 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 1 Nov 2017 08:53:05 +0530 Subject: [PATCH 1691/2177] cxgb4: add support to create hash-filters via tc-flower offload Determine whether the flow classifies as exact-match with respect to 4-tuple and configured tuple mask in hw. If successfully classified as exact-match, offload the flow as hash-filter in hw. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 148 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 2 + .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 16 +- 3 files changed, 161 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 9b3ff6209eb5..abab67d52edb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -31,6 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include #include "cxgb4.h" #include "t4_regs.h" @@ -765,6 +766,153 @@ static void fill_default_mask(struct ch_filter_specification *fs) fs->mask.fport = ~0; } +static bool is_addr_all_mask(u8 *ipmask, int family) +{ + if (family == AF_INET) { + struct in_addr *addr; + + addr = (struct in_addr *)ipmask; + if (addr->s_addr == 0xffffffff) + return true; + } else if (family == AF_INET6) { + struct in6_addr *addr6; + + addr6 = (struct in6_addr *)ipmask; + if (addr6->s6_addr32[0] == 0xffffffff && + addr6->s6_addr32[1] == 0xffffffff && + addr6->s6_addr32[2] == 0xffffffff && + addr6->s6_addr32[3] == 0xffffffff) + return true; + } + return false; +} + +static bool is_inaddr_any(u8 *ip, int family) +{ + int addr_type; + + if (family == AF_INET) { + struct in_addr *addr; + + addr = (struct in_addr *)ip; + if (addr->s_addr == htonl(INADDR_ANY)) + return true; + } else if (family == AF_INET6) { + struct in6_addr *addr6; + + addr6 = (struct in6_addr *)ip; + addr_type = ipv6_addr_type((const struct in6_addr *) + &addr6); + if (addr_type == IPV6_ADDR_ANY) + return true; + } + return false; +} + +bool is_filter_exact_match(struct adapter *adap, + struct ch_filter_specification *fs) +{ + struct tp_params *tp = &adap->params.tp; + u64 hash_filter_mask = tp->hash_filter_mask; + u32 mask; + + if (!is_hashfilter(adap)) + return false; + + if (fs->type) { + if (is_inaddr_any(fs->val.fip, AF_INET6) || + !is_addr_all_mask(fs->mask.fip, AF_INET6)) + return false; + + if (is_inaddr_any(fs->val.lip, AF_INET6) || + !is_addr_all_mask(fs->mask.lip, AF_INET6)) + return false; + } else { + if (is_inaddr_any(fs->val.fip, AF_INET) || + !is_addr_all_mask(fs->mask.fip, AF_INET)) + return false; + + if (is_inaddr_any(fs->val.lip, AF_INET) || + !is_addr_all_mask(fs->mask.lip, AF_INET)) + return false; + } + + if (!fs->val.lport || fs->mask.lport != 0xffff) + return false; + + if (!fs->val.fport || fs->mask.fport != 0xffff) + return false; + + if (tp->fcoe_shift >= 0) { + mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W; + if (mask && !fs->mask.fcoe) + return false; + } + + if (tp->port_shift >= 0) { + mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W; + if (mask && !fs->mask.iport) + return false; + } + + if (tp->vnic_shift >= 0) { + mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W; + + if ((adap->params.tp.ingress_config & VNIC_F)) { + if (mask && !fs->mask.pfvf_vld) + return false; + } else { + if (mask && !fs->mask.ovlan_vld) + return false; + } + } + + if (tp->vlan_shift >= 0) { + mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W; + if (mask && !fs->mask.ivlan) + return false; + } + + if (tp->tos_shift >= 0) { + mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W; + if (mask && !fs->mask.tos) + return false; + } + + if (tp->protocol_shift >= 0) { + mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W; + if (mask && !fs->mask.proto) + return false; + } + + if (tp->ethertype_shift >= 0) { + mask = (hash_filter_mask >> tp->ethertype_shift) & + FT_ETHERTYPE_W; + if (mask && !fs->mask.ethtype) + return false; + } + + if (tp->macmatch_shift >= 0) { + mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W; + if (mask && !fs->mask.macidx) + return false; + } + + if (tp->matchtype_shift >= 0) { + mask = (hash_filter_mask >> tp->matchtype_shift) & + FT_MPSHITTYPE_W; + if (mask && !fs->mask.matchtype) + return false; + } + if (tp->frag_shift >= 0) { + mask = (hash_filter_mask >> tp->frag_shift) & + FT_FRAGMENTATION_W; + if (mask && !fs->mask.frag) + return false; + } + return true; +} + static u64 hash_filter_ntuple(struct ch_filter_specification *fs, struct net_device *dev) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index 9475abd3384e..8db5fca6dcc9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -51,4 +51,6 @@ int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); void clear_all_filters(struct adapter *adapter); int init_hash_filter(struct adapter *adap); +bool is_filter_exact_match(struct adapter *adap, + struct ch_filter_specification *fs); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 3953bc1fdc20..d4a548a6a55c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -38,6 +38,7 @@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "cxgb4_tc_flower.h" #define STATS_CHECK_PERIOD (HZ / 2) @@ -672,11 +673,16 @@ int cxgb4_tc_flower_replace(struct net_device *dev, cxgb4_process_flow_match(dev, cls, fs); cxgb4_process_flow_actions(dev, cls, fs); - fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); - if (fidx < 0) { - netdev_err(dev, "%s: No fidx for offload.\n", __func__); - ret = -ENOMEM; - goto free_entry; + fs->hash = is_filter_exact_match(adap, fs); + if (fs->hash) { + fidx = 0; + } else { + fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); + if (fidx < 0) { + netdev_err(dev, "%s: No fidx for offload.\n", __func__); + ret = -ENOMEM; + goto free_entry; + } } init_completion(&ctx.completion); From 1969db47f8d0e800397abd4ee4e8d27d2b578587 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Nov 2017 00:08:04 -0700 Subject: [PATCH 1692/2177] bpf: fix verifier memory leaks fix verifier memory leaks Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5f26f7ad124f..2bb6d6aa7085 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -341,10 +341,12 @@ static int realloc_verifier_state(struct bpf_verifier_state *state, int size, return 0; } -static void free_verifier_state(struct bpf_verifier_state *state) +static void free_verifier_state(struct bpf_verifier_state *state, + bool free_self) { kfree(state->stack); - kfree(state); + if (free_self) + kfree(state); } /* copy verifier state from src to dst growing dst stack space @@ -382,6 +384,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, if (prev_insn_idx) *prev_insn_idx = head->prev_insn_idx; elem = head->next; + free_verifier_state(&head->st, false); kfree(head); env->head = elem; env->stack_size--; @@ -399,14 +402,14 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, if (!elem) goto err; - err = copy_verifier_state(&elem->st, cur); - if (err) - return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; env->head = elem; env->stack_size++; + err = copy_verifier_state(&elem->st, cur); + if (err) + goto err; if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { verbose(env, "BPF program is too complex\n"); goto err; @@ -3641,7 +3644,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state; - int i; + int i, err; sl = env->explored_states[insn_idx]; if (!sl) @@ -3679,7 +3682,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return -ENOMEM; /* add new state to the head of linked list */ - copy_verifier_state(&new_sl->state, cur); + err = copy_verifier_state(&new_sl->state, cur); + if (err) { + free_verifier_state(&new_sl->state, false); + kfree(new_sl); + return err; + } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; /* connect new state to parentage chain */ @@ -4424,6 +4432,7 @@ static void free_states(struct bpf_verifier_env *env) if (sl) while (sl != STATE_LIST_MARK) { sln = sl->next; + free_verifier_state(&sl->state, false); kfree(sl); sl = sln; } @@ -4494,7 +4503,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state); + free_verifier_state(env->cur_state, true); env->cur_state = NULL; skip_full_check: @@ -4601,7 +4610,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state); + free_verifier_state(env->cur_state, true); env->cur_state = NULL; skip_full_check: From e6cdfcc581866625980a89391be4e6a8b379d0c5 Mon Sep 17 00:00:00 2001 From: Parvi Kaustubhi Date: Wed, 1 Nov 2017 08:44:46 -0700 Subject: [PATCH 1693/2177] enic: reset fetch index Since we are allowing rx ring size modification, reset fetch index everytime. Otherwise it could have a stale value that can lead to a null pointer dereference. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: Parvi Kaustubhi Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_rq.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 36bc2c71fba9..f8aa326d1d58 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -139,20 +139,8 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset) { - u32 fetch_index = 0; - - /* Use current fetch_index as the ring starting point */ - fetch_index = ioread32(&rq->ctrl->fetch_index); - - if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ - /* Hardware surprise removal: reset fetch_index */ - fetch_index = 0; - } - - vnic_rq_init_start(rq, cq_index, - fetch_index, fetch_index, - error_interrupt_enable, - error_interrupt_offset); + vnic_rq_init_start(rq, cq_index, 0, 0, error_interrupt_enable, + error_interrupt_offset); } unsigned int vnic_rq_error_status(struct vnic_rq *rq) From ed519b7488a42ce549ef7eae8dd13e043dde10a4 Mon Sep 17 00:00:00 2001 From: Parvi Kaustubhi Date: Wed, 1 Nov 2017 08:44:47 -0700 Subject: [PATCH 1694/2177] enic: Add support for 'ethtool -g/-G' Add support for displaying and modifying rx and tx ring sizes using ethtool. Also, increasing version to 2.3.0.45 Signed-off-by: Parvi Kaustubhi Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic.h | 2 +- .../net/ethernet/cisco/enic/enic_ethtool.c | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index ba032ac9ae86..6a9527004cb1 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -33,7 +33,7 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" -#define DRV_VERSION "2.3.0.42" +#define DRV_VERSION "2.3.0.45" #define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc" #define ENIC_BARS_MAX 6 diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index fd3980cc1e34..462d0ce51240 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -176,6 +176,81 @@ static void enic_get_strings(struct net_device *netdev, u32 stringset, } } +static void enic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct enic *enic = netdev_priv(netdev); + struct vnic_enet_config *c = &enic->config; + + ring->rx_max_pending = ENIC_MAX_RQ_DESCS; + ring->rx_pending = c->rq_desc_count; + ring->tx_max_pending = ENIC_MAX_WQ_DESCS; + ring->tx_pending = c->wq_desc_count; +} + +static int enic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct enic *enic = netdev_priv(netdev); + struct vnic_enet_config *c = &enic->config; + int running = netif_running(netdev); + unsigned int rx_pending; + unsigned int tx_pending; + int err = 0; + + if (ring->rx_mini_max_pending || ring->rx_mini_pending) { + netdev_info(netdev, + "modifying mini ring params is not supported"); + return -EINVAL; + } + if (ring->rx_jumbo_max_pending || ring->rx_jumbo_pending) { + netdev_info(netdev, + "modifying jumbo ring params is not supported"); + return -EINVAL; + } + rx_pending = c->rq_desc_count; + tx_pending = c->wq_desc_count; + if (ring->rx_pending > ENIC_MAX_RQ_DESCS || + ring->rx_pending < ENIC_MIN_RQ_DESCS) { + netdev_info(netdev, "rx pending (%u) not in range [%u,%u]", + ring->rx_pending, ENIC_MIN_RQ_DESCS, + ENIC_MAX_RQ_DESCS); + return -EINVAL; + } + if (ring->tx_pending > ENIC_MAX_WQ_DESCS || + ring->tx_pending < ENIC_MIN_WQ_DESCS) { + netdev_info(netdev, "tx pending (%u) not in range [%u,%u]", + ring->tx_pending, ENIC_MIN_WQ_DESCS, + ENIC_MAX_WQ_DESCS); + return -EINVAL; + } + if (running) + dev_close(netdev); + c->rq_desc_count = + ring->rx_pending & 0xffffffe0; /* must be aligned to groups of 32 */ + c->wq_desc_count = + ring->tx_pending & 0xffffffe0; /* must be aligned to groups of 32 */ + enic_free_vnic_resources(enic); + err = enic_alloc_vnic_resources(enic); + if (err) { + netdev_err(netdev, + "Failed to alloc vNIC resources, aborting\n"); + enic_free_vnic_resources(enic); + goto err_out; + } + enic_init_vnic_resources(enic); + if (running) { + err = dev_open(netdev); + if (err) + goto err_out; + } + return 0; +err_out: + c->rq_desc_count = rx_pending; + c->wq_desc_count = tx_pending; + return err; +} + static int enic_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { @@ -509,6 +584,8 @@ static const struct ethtool_ops enic_ethtool_ops = { .set_msglevel = enic_set_msglevel, .get_link = ethtool_op_get_link, .get_strings = enic_get_strings, + .get_ringparam = enic_get_ringparam, + .set_ringparam = enic_set_ringparam, .get_sset_count = enic_get_sset_count, .get_ethtool_stats = enic_get_ethtool_stats, .get_coalesce = enic_get_coalesce, From 8af67d34915d879ea3a2e8ad153c71a65031d174 Mon Sep 17 00:00:00 2001 From: Parvi Kaustubhi Date: Wed, 1 Nov 2017 08:44:48 -0700 Subject: [PATCH 1695/2177] MAINTAINERS: update MAINTAINERS for cisco vic Add myself to list of maintainers for cisco vic ethernet nic driver. Remove Neel as he left. Signed-off-by: Parvi Kaustubhi Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9a24f56e0451..2ab243a11adf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3422,7 +3422,7 @@ F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti M: Govindarajulu Varadarajan <_govind@gmx.com> -M: Neel Patel +M: Parvi Kaustubhi S: Supported F: drivers/net/ethernet/cisco/enic/ From 22ac5ad4a7d4e201d19b7f04ce8d79346c80a34b Mon Sep 17 00:00:00 2001 From: Robert Hoo Date: Wed, 1 Nov 2017 11:41:09 +0100 Subject: [PATCH 1696/2177] samples/pktgen: Add some helper functions 1. given a device, get its NUMA belongings 2. given a device, get its queues' irq numbers. 3. given a NUMA node, get its cpu id list. Signed-off-by: Robert Hoo Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/pktgen/functions.sh | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 205e4cde4601..f8bb3cd0f4ce 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -119,3 +119,46 @@ function root_check_run_with_sudo() { err 4 "cannot perform sudo run of $0" fi } + +# Exact input device's NUMA node info +function get_iface_node() +{ + local node=$( Date: Wed, 1 Nov 2017 11:41:14 +0100 Subject: [PATCH 1697/2177] samples/pktgen: add script pktgen_sample06_numa_awared_queue_irq_affinity.sh This script simply does: * Detect $DEV's NUMA node belonging. * Bind each thread (processor of NUMA locality) with each $DEV queue's irq affinity, 1:1 mapping. * How many '-t' threads input determines how many queues will be utilized. If '-f' designates first cpu id, then offset in the NUMA node's cpu list. (Changes by Jesper: allow changing count from cmdline via '-n') Signed-off-by: Robert Hoo Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- ...sample06_numa_awared_queue_irq_affinity.sh | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh new file mode 100755 index 000000000000..353adc17205e --- /dev/null +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# +# Multiqueue: Using pktgen threads for sending on multiple CPUs +# * adding devices to kernel threads which are in the same NUMA node +# * bound devices queue's irq affinity to the threads, 1:1 mapping +# * notice the naming scheme for keeping device names unique +# * nameing scheme: dev@thread_number +# * flow variation via random UDP source port +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" +# +# Required param: -i dev in $DEV +source ${basedir}/parameters.sh + +# Base Config +DELAY="0" # Zero means max speed +[ -z "$COUNT" ] && COUNT="20000000" # Zero means indefinitely +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" + +# Flow variation random source port between min and max +UDP_MIN=9 +UDP_MAX=109 + +node=`get_iface_node $DEV` +irq_array=(`get_iface_irqs $DEV`) +cpu_array=(`get_node_cpus $node`) + +[ $THREADS -gt ${#irq_array[*]} -o $THREADS -gt ${#cpu_array[*]} ] && \ + err 1 "Thread number $THREADS exceeds: min (${#irq_array[*]},${#cpu_array[*]})" + +# (example of setting default params in your script) +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((i = 0; i < $THREADS; i++)); do + # The device name is extended with @name, using thread number to + # make then unique, but any name will do. + # Set the queue's irq affinity to this $thread (processor) + # if '-f' is designated, offset cpu id + thread=${cpu_array[$((i+F_THREAD))]} + dev=${DEV}@${thread} + echo $thread > /proc/irq/${irq_array[$i]}/smp_affinity_list + info "irq ${irq_array[$i]} is set affinity to `cat /proc/irq/${irq_array[$i]}/smp_affinity_list`" + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # select queue and bind the queue and $dev in 1:1 relationship + queue_num=$i + info "queue number is $queue_num" + pg_set $dev "queue_map_min $queue_num" + pg_set $dev "queue_map_max $queue_num" + + # Notice config queue to map to cpu (mirrors smp_processor_id()) + # It is beneficial to map IRQ /proc/irq/*/smp_affinity 1:1 to CPU number + pg_set $dev "flag QUEUE_MAP_CPU" + + # Base config of dev + pg_set $dev "count $COUNT" + pg_set $dev "clone_skb $CLONE_SKB" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + + # Flag example disabling timestamping + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst$IP6 $DEST_IP" + + # Setup random UDP port src range + pg_set $dev "flag UDPSRC_RND" + pg_set $dev "udp_src_min $UDP_MIN" + pg_set $dev "udp_src_max $UDP_MAX" +done + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +for ((i = 0; i < $THREADS; i++)); do + thread=${cpu_array[$((i+F_THREAD))]} + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" +done From 9efc44d74b586218e923e3dafb3462d21948c5c6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Nov 2017 11:41:19 +0100 Subject: [PATCH 1698/2177] samples/pktgen: update sample03, no need for clones when bursting Like sample05, don't use pktgen clone_skb feature when using 'burst' feature, it is not really needed. This brings the burst users in sync. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/pktgen/pktgen_sample03_burst_single_flow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 8d26e0ca683d..8a46daf27966 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -30,7 +30,7 @@ if [ -z "$DEST_IP" ]; then fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=32 -[ -z "$CLONE_SKB" ] && CLONE_SKB="100000" +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # Base Config From a4b6ade8359fc265fb4f8691fea33f4eaa66c951 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Nov 2017 11:41:24 +0100 Subject: [PATCH 1699/2177] samples/pktgen: remove remaining old pktgen sample scripts Since commit 0f06a6787e05 ("samples: Add an IPv6 '-6' option to the pktgen scripts") the newer pktgen_sampleXX script does show howto use IPv6 with pktgen. Thus, there is no longer a reason to keep the older sample scripts around. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/pktgen/pktgen.conf-1-1-ip6 | 60 --------------------- samples/pktgen/pktgen.conf-1-1-ip6-rdos | 63 ---------------------- samples/pktgen/pktgen.conf-1-2 | 69 ------------------------- 3 files changed, 192 deletions(-) delete mode 100755 samples/pktgen/pktgen.conf-1-1-ip6 delete mode 100755 samples/pktgen/pktgen.conf-1-1-ip6-rdos delete mode 100755 samples/pktgen/pktgen.conf-1-2 diff --git a/samples/pktgen/pktgen.conf-1-1-ip6 b/samples/pktgen/pktgen.conf-1-1-ip6 deleted file mode 100755 index 0b9ffd47fd41..000000000000 --- a/samples/pktgen/pktgen.conf-1-1-ip6 +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. One CPU example. We add eth1. -# IPv6. Note increase in minimal packet length - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - - -# device config -# delay 0 - -CLONE_SKB="clone_skb 1000000" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 66" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst6 fec0::1" - pgset "src6 fec0::2" - pgset "dst_mac 00:04:23:08:91:dc" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen.conf-1-1-ip6-rdos b/samples/pktgen/pktgen.conf-1-1-ip6-rdos deleted file mode 100755 index ad98e5f40776..000000000000 --- a/samples/pktgen/pktgen.conf-1-1-ip6-rdos +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. One CPU example. We add eth1. -# IPv6. Note increase in minimal packet length - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - - -# device config -# delay 0 means maximum speed. - -# We need to do alloc for every skb since we cannot clone here. -CLONE_SKB="clone_skb 0" - -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 66" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst6_min fec0::1" - pgset "dst6_max fec0::FFFF:FFFF" - - pgset "dst_mac 00:04:23:08:91:dc" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen.conf-1-2 b/samples/pktgen/pktgen.conf-1-2 deleted file mode 100755 index ba4eb26e168d..000000000000 --- a/samples/pktgen/pktgen.conf-1-2 +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# One CPU means one thread. One CPU example. We add eth1, eth2 respectivly. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - echo "Adding eth2" - pgset "add_device eth2" - - -# device config -# delay 0 means maximum speed. - -CLONE_SKB="clone_skb 1000000" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 10.10.11.2" - pgset "dst_mac 00:04:23:08:91:dc" - -PGDEV=/proc/net/pktgen/eth2 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - pgset "dst 192.168.2.2" - pgset "dst_mac 00:04:23:08:91:de" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2 From 3a1246fcbcb43b33c4540d74c36119d6389a24b4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Nov 2017 08:49:45 +0000 Subject: [PATCH 1700/2177] wan: wanxl: remove redundant assignment to stat stat set to zero and the value is never read, instead stat is set again in the do-loop. Hence the setting to zero is redundant and can be removed. Cleans up clang warning: drivers/net/wan/wanxl.c:737:2: warning: Value stored to 'stat' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/wanxl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index 0c7317520ed3..d573a57bc301 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -734,7 +734,6 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, return -ENODEV; } - stat = 0; timeout = jiffies + 5 * HZ; do { if ((stat = readl(card->plx + PLX_MAILBOX_5)) != 0) From 5618c8e24ad6ab09282f6583a228d80c1fd14c65 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Nov 2017 08:57:37 +0000 Subject: [PATCH 1701/2177] net: dl2k: remove redundant re-assignment to np The pointer np is initialized and then re-assigned the same value a few lines later. Remove the redundant duplicated assignment. Cleans up clang warning: drivers/net/ethernet/dlink/dl2k.c:314:25: warning: Value stored to 'np' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index a2f6758d38dd..f0536b16b3c3 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -313,7 +313,7 @@ find_miiphy (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); int i, phy_found = 0; - np = netdev_priv(dev); + np->phy_addr = 1; for (i = 31; i >= 0; i--) { From a666960d182cfb7074640bdb004633ffb2e58f26 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Nov 2017 09:09:13 +0000 Subject: [PATCH 1702/2177] liquidio: remove redundant setting of inst_processed to zero The zero value assigned to inst_processed at the end of each iteration of the do-while loop is overwritten on the next iteration and hence it is a redundant assignment and can be removed. Cleans up clang warning: drivers/net/ethernet/cavium/liquidio/request_manager.c:480:3: warning: Value stored to 'inst_processed' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/request_manager.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index a10459742ae4..e07d2093b971 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -489,8 +489,6 @@ octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, } tot_inst_processed += inst_processed; - inst_processed = 0; - } while (tot_inst_processed < napi_budget); if (napi_budget && (tot_inst_processed >= napi_budget)) From ad88d35a6216c54a005480d2693ed0a888ac1b7c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Nov 2017 10:17:15 +0000 Subject: [PATCH 1703/2177] net: hns3: remove a couple of redundant assignments The assignment to kinfo is redundant as this is a duplicate of the initialiation of kinfo a few lines earlier, so it can be removed. The assignment to v_tc_info is never read, so this variable is redundant and can be removed completely. Cleans up two clang warnings: drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c:433:34: warning: Value stored to 'kinfo' during its initialization is never read drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c:775:3: warning: Value stored to 'v_tc_info' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 1ae6eae82eb3..7bfa2e5497cb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -434,7 +434,6 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; u8 i; - kinfo = &vport->nic.kinfo; vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; kinfo->num_tc = min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc); @@ -766,13 +765,11 @@ static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; - struct hnae3_tc_info *v_tc_info; u8 ir_u, ir_b, ir_s; u32 i; int ret; for (i = 0; i < kinfo->num_tc; i++) { - v_tc_info = &kinfo->tc_info[i]; ret = hclge_shaper_para_calc( hdev->tm_info.tc_info[i].bw_limit, HCLGE_SHAPER_LVL_QSET, From 928990631327cf00a9195e30fa22f7ae5f8d7e67 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 1 Nov 2017 12:18:13 +0200 Subject: [PATCH 1704/2177] net: bridge: add notifications for the bridge dev on vlan change Currently the bridge device doesn't generate any notifications upon vlan modifications on itself because it doesn't use the generic bridge notifications. With the recent changes we know if anything was modified in the vlan config thus we can generate a notification when necessary for the bridge device so add support to br_ifinfo_notify() similar to how other combined functions are done - if port is present it takes precedence, otherwise notify about the bridge. I've explicitly marked the locations where the notification should be always for the port by setting bridge to NULL. I've also taken the liberty to rearrange each modified function's local variables in reverse xmas tree as well. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br.c | 2 +- net/bridge/br_if.c | 4 +-- net/bridge/br_ioctl.c | 2 +- net/bridge/br_netlink.c | 58 +++++++++++++++++++++------------------ net/bridge/br_private.h | 3 +- net/bridge/br_stp.c | 6 ++-- net/bridge/br_stp_if.c | 4 +-- net/bridge/br_stp_timer.c | 2 +- net/bridge/br_sysfs_if.c | 2 +- 9 files changed, 45 insertions(+), 38 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index 1407d1ba7577..6bf06e756df2 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v /* Events that may cause spanning tree to refresh */ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || event == NETDEV_CHANGE || event == NETDEV_DOWN) - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); return NOTIFY_DONE; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ae38547bbf91..9ba4ed65c52b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p) br_stp_disable_port(p); spin_unlock_bh(&br->lock); - br_ifinfo_notify(RTM_DELLINK, p); + br_ifinfo_notify(RTM_DELLINK, NULL, p); list_del_rcu(&p->list); if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom) @@ -589,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, br_stp_enable_port(p); spin_unlock_bh(&br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (changed_addr) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 16d01a3ff33f..73b957fd639d 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ret) { if (p) - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); else netdev_state_change(br->dev); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 26aeb0b5cf30..67bae0f11c67 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -361,14 +361,14 @@ nla_put_failure: * Contains port and master info as well as carrier and bridge state. */ static int br_fill_ifinfo(struct sk_buff *skb, - struct net_bridge_port *port, + const struct net_bridge_port *port, u32 pid, u32 seq, int event, unsigned int flags, u32 filter_mask, const struct net_device *dev) { + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; - u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; if (port) br = port->br; @@ -454,28 +454,36 @@ nla_put_failure: return -EMSGSIZE; } -/* - * Notify listeners of a change in port information - */ -void br_ifinfo_notify(int event, struct net_bridge_port *port) +/* Notify listeners of a change in bridge or port information */ +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port) { - struct net *net; + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + struct net_device *dev; struct sk_buff *skb; int err = -ENOBUFS; - u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + struct net *net; + u16 port_no = 0; - if (!port) + if (WARN_ON(!port && !br)) return; - net = dev_net(port->dev); - br_debug(port->br, "port %u(%s) event %d\n", - (unsigned int)port->port_no, port->dev->name, event); + if (port) { + dev = port->dev; + br = port->br; + port_no = port->port_no; + } else { + dev = br->dev; + } - skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); + net = dev_net(dev); + br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event); + + skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC); if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -488,7 +496,6 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } - /* * Dump information about all ports, in response to GETLINK */ @@ -809,10 +816,11 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) /* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); + struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + struct net_bridge_port *p; struct nlattr *protinfo; struct nlattr *afspec; - struct net_bridge_port *p; - struct nlattr *tb[IFLA_BRPORT_MAX + 1]; bool changed = false; int err = 0; @@ -852,13 +860,11 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) changed = true; } - if (afspec) { - err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_SETLINK, &changed); - } + if (afspec) + err = br_afspec(br, p, afspec, RTM_SETLINK, &changed); if (changed) - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, br, p); out: return err; } @@ -866,8 +872,9 @@ out: /* Delete port information */ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { - struct nlattr *afspec; + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); struct net_bridge_port *p; + struct nlattr *afspec; bool changed = false; int err = 0; @@ -880,13 +887,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (!p && !(dev->priv_flags & IFF_EBRIDGE)) return -EINVAL; - err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_DELLINK, &changed); + err = br_afspec(br, p, afspec, RTM_DELLINK, &changed); if (changed) /* Send RTM_NEWLINK because userspace * expects RTM_NEWLINK for vlan dels */ - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, br, p); return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 860e4afaf71a..40553d832b6e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1071,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); -void br_ifinfo_notify(int event, struct net_bridge_port *port); +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 8f56c2d1f1a7..b6941961a876 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br, (unsigned int) p->port_no, p->dev->name); br_set_state(p, BR_STATE_LISTENING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay > 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); @@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p) br_topology_change_detection(p->br); br_set_state(p, BR_STATE_BLOCKING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->forward_delay_timer); } @@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p) else br_set_state(p, BR_STATE_LEARNING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay != 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 89110319ef0f..808e2b914015 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); } /* called under bridge lock */ @@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->message_age_timer); del_timer(&p->forward_delay_timer); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 60b6fe277a8b..b54c1a331450 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -99,7 +99,7 @@ static void br_forward_delay_timer_expired(unsigned long arg) netif_carrier_on(br->dev); } rcu_read_lock(); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); rcu_read_unlock(); spin_unlock(&br->lock); } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 0a1fa9ccd8b7..0254c35b2bf0 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -280,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj, ret = brport_attr->store(p, val); spin_unlock_bh(&p->br->lock); if (!ret) { - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); ret = count; } } From 0b5a89caee5c9958c18cd933c7f8891e35b21781 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:38 +0100 Subject: [PATCH 1705/2177] net: sched: remove unused tc_should_offload helper tc_should_offload is no longer used, remove it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 37c5ef766655..108dcdd96421 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -623,13 +623,6 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, u32 flags) -{ - if (tc_skip_hw(flags)) - return false; - return tc_can_offload(dev); -} - static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; From 44ae12a768b7212976a362c590075716a77e8f28 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:39 +0100 Subject: [PATCH 1706/2177] net: sched: move the can_offload check from binding phase to rule insertion phase This restores the original behaviour before the block callbacks were introduced. Allow the drivers to do binding of block always, no matter if the NETIF_F_HW_TC feature is on or off. Move the check to the block callback which is called for rule insertion. Reported-by: Alexander Duyck Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 +++ drivers/net/ethernet/netronome/nfp/bpf/main.c | 3 +++ drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 +++ net/dsa/slave.c | 3 +++ net/sched/cls_api.c | 2 +- 11 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5ce950629ce9..c3dfaa5151aa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7347,7 +7347,7 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct bnxt *bp = cb_priv; - if (!bnxt_tc_flower_enabled(bp)) + if (!bnxt_tc_flower_enabled(bp) || !tc_can_offload(bp->dev)) return -EOPNOTSUPP; switch (type) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index c1761ed5785e..b6aa7db99705 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -124,7 +124,7 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type, struct bnxt *bp = vf_rep->bp; int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid; - if (!bnxt_tc_flower_enabled(vf_rep->bp)) + if (!bnxt_tc_flower_enabled(vf_rep->bp) || !tc_can_offload(bp->dev)) return -EOPNOTSUPP; switch (type) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 8fd41917c07a..6f900ffe25cc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2943,6 +2943,9 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return -EINVAL; } + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSU32: return cxgb_setup_tc_cls_u32(dev, type_data); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 38bd2e339e48..507977994a03 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9386,6 +9386,9 @@ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct ixgbe_adapter *adapter = cb_priv; + if (!tc_can_offload(adapter->netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSU32: return ixgbe_setup_tc_cls_u32(adapter, type_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 560b208c0483..28ae00b3eb88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3106,6 +3106,9 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; + if (!tc_can_offload(priv->netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0edb7065d811..2c43606c26b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -682,6 +682,9 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; + if (!tc_can_offload(priv->netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 021926974da6..3f4be9556e56 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1738,6 +1738,9 @@ static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct mlxsw_sp_port *mlxsw_sp_port = cb_priv; + if (!tc_can_offload(mlxsw_sp_port->dev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSMATCHALL: return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index f15a186f6c87..04424db24b80 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -121,6 +121,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; + if (!tc_can_offload(nn->dp.netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSBPF: if (!nfp_net_ebpf_capable(nn) || diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index c47753fdb55b..7c6cab176293 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -470,6 +470,9 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, { struct nfp_net *nn = cb_priv; + if (!tc_can_offload(nn->dp.netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return nfp_flower_repr_offload(nn->app, nn->port->netdev, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 48b954a76b0d..9b75d0ac4092 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -799,6 +799,9 @@ static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct net_device *dev = cb_priv; + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSMATCHALL: return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2c03fcbc7188..15e3216ef25d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -256,7 +256,7 @@ static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; struct tc_block_offload bo = {}; - if (!tc_can_offload(dev)) + if (!dev->netdev_ops->ndo_setup_tc) return; bo.command = command; bo.binder_type = ei->binder_type; From 7612fb0387d6ffcfc3173527466fe3f596657c58 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:40 +0100 Subject: [PATCH 1707/2177] net: sched: remove tc_can_offload check from egdev call Since the only user, mlx5 driver does the check in mlx5e_setup_tc_block_cb, no need to check here. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 15e3216ef25d..a26c690b48ac 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1206,7 +1206,7 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, if (!a->ops->get_dev) continue; dev = a->ops->get_dev(a); - if (!dev || !tc_can_offload(dev)) + if (!dev) continue; ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); if (ret < 0) From 70b5aee46782208c14d93b715e9f62f7fec844f1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 1 Nov 2017 11:47:41 +0100 Subject: [PATCH 1708/2177] net: sched: remove ndo_setup_tc check from tc_can_offload Since tc_can_offload is always called from block callback or egdev callback, no need to check if ndo_setup_tc exists. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 108dcdd96421..d15c40c7bde7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -611,11 +611,7 @@ struct tc_cls_u32_offload { static inline bool tc_can_offload(const struct net_device *dev) { - if (!(dev->features & NETIF_F_HW_TC)) - return false; - if (!dev->netdev_ops->ndo_setup_tc) - return false; - return true; + return dev->features & NETIF_F_HW_TC; } static inline bool tc_skip_hw(u32 flags) From 03c4cc385faaa46e5877f499c6b997fef792f8d3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Nov 2017 12:44:45 +0100 Subject: [PATCH 1709/2177] bpf: cpumap micro-optimization in cpu_map_enqueue Discovered that the compiler laid-out asm code in suboptimal way when studying perf report during benchmarking of cpumap. Help the compiler by the marking unlikely code paths. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/cpumap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 86e29cbf7827..ce5b669003b2 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -208,7 +208,7 @@ static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp) headroom = xdp->data - xdp->data_hard_start; metasize = xdp->data - xdp->data_meta; metasize = metasize > 0 ? metasize : 0; - if ((headroom - metasize) < sizeof(*xdp_pkt)) + if (unlikely((headroom - metasize) < sizeof(*xdp_pkt))) return NULL; /* Store info in top of packet */ @@ -656,7 +656,7 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct xdp_pkt *xdp_pkt; xdp_pkt = convert_to_xdp_pkt(xdp); - if (!xdp_pkt) + if (unlikely(!xdp_pkt)) return -EOVERFLOW; /* Info needed when constructing SKB on remote CPU */ From a35c52b71580a1fb29df11270b1461f6d17f5670 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Wed, 1 Nov 2017 21:10:32 +0800 Subject: [PATCH 1710/2177] net: dpaa: fix maybe uninitialized var in dpaa_open() Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index a8d0be824149..68f0ac129ba4 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2468,7 +2468,8 @@ static int dpaa_open(struct net_device *net_dev) mac_dev = priv->mac_dev; dpaa_eth_napi_enable(priv); - if (dpaa_phy_init(net_dev)) + err = dpaa_phy_init(net_dev); + if (err) goto phy_init_failed; for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { From d7bcde417b6b2e6f99ed9c2c38a0771b66efb060 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Wed, 1 Nov 2017 21:11:11 +0800 Subject: [PATCH 1711/2177] net: dpaa: remove init which already done in per-cpu allocation Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 68f0ac129ba4..969f6b12952e 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2669,7 +2669,6 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl) static int dpaa_eth_probe(struct platform_device *pdev) { struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL}; - struct dpaa_percpu_priv *percpu_priv; struct net_device *net_dev = NULL; struct dpaa_fq *dpaa_fq, *tmp; struct dpaa_priv *priv = NULL; @@ -2815,10 +2814,6 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = -ENOMEM; goto free_dpaa_fqs; } - for_each_possible_cpu(i) { - percpu_priv = per_cpu_ptr(priv->percpu_priv, i); - memset(percpu_priv, 0, sizeof(*percpu_priv)); - } priv->num_tc = 1; netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM); From 5d42ced1950c7a7b5f5aa5c1c7e2c78dde9f8ca4 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Wed, 1 Nov 2017 10:38:24 -0700 Subject: [PATCH 1712/2177] nfp: bpf: rename ALU_OP_NEG to ALU_OP_NOT The current ALU_OP_NEG is Op encoding 0x4 for NPF ALU instruction. It is actually performing "~B" operation which is bitwise NOT. The using naming ALU_OP_NEG is misleading as NEG is -B which is not the same as ~B. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e7eeb7a07f81..369173100fcf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -944,7 +944,7 @@ wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) if (alu_op == ALU_OP_XOR) { if (!~imm) emit_alu(nfp_prog, reg_both(dst), reg_none(), - ALU_OP_NEG, reg_b(dst)); + ALU_OP_NOT, reg_b(dst)); if (!imm || !~imm) return; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index f4d1df3a1925..74d0c11ab2f9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -174,7 +174,7 @@ enum shf_sc { enum alu_op { ALU_OP_NONE = 0x00, ALU_OP_ADD = 0x01, - ALU_OP_NEG = 0x04, + ALU_OP_NOT = 0x04, ALU_OP_AND = 0x08, ALU_OP_SUB_C = 0x0d, ALU_OP_ADD_C = 0x11, From 254ef4d746878162bb095484fc4b53d713620c33 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Wed, 1 Nov 2017 10:38:25 -0700 Subject: [PATCH 1713/2177] nfp: bpf: support [BPF_ALU | BPF_ALU64] | BPF_NEG This patch supports BPF_NEG under both BPF_ALU64 and BPF_ALU. LLVM recently starts to generate it. NOTE: BPF_NEG takes single operand which is an register and serve as both input and output. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 369173100fcf..2609a2487100 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1218,6 +1218,18 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), + ALU_OP_SUB, reg_b(insn->dst_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), + ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); + + return 0; +} + static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -1338,6 +1350,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); } +static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u8 dst = meta->insn.dst_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -1847,6 +1869,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, @@ -1861,6 +1884,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_ADD | BPF_K] = add_imm, [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_NEG] = neg_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, From 3ded76a8ff53fd2a9b011e86c6f3588a984d432d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 1 Nov 2017 11:29:47 -0700 Subject: [PATCH 1714/2177] net: systemport: Only inspect valid switch port & queues Hesoteric board configurations where port 0 is not available would still make SYSTEMPORT inspect the switch port 0, queue 0, which, not being enabled, would cause transmit timeouts over time. Just ignore those unconfigured rings instead. Fixes: 84ff33eeb23d ("net: systemport: Establish DSA network device queue mapping") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 9 +++++++-- drivers/net/ethernet/broadcom/bcmsysport.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index dcee843d05d7..e6da9b165bbe 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1420,8 +1420,12 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* Configure QID and port mapping */ reg = tdma_readl(priv, TDMA_DESC_RING_MAPPING(index)); reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT); - reg |= ring->switch_queue & RING_QID_MASK; - reg |= ring->switch_port << RING_PORT_ID_SHIFT; + if (ring->inspect) { + reg |= ring->switch_queue & RING_QID_MASK; + reg |= ring->switch_port << RING_PORT_ID_SHIFT; + } else { + reg |= RING_IGNORE_STATUS; + } tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index)); @@ -2108,6 +2112,7 @@ static int bcm_sysport_map_queues(struct net_device *dev, */ ring->switch_queue = q; ring->switch_port = port; + ring->inspect = true; priv->ring_map[q + port * num_tx_queues] = ring; /* Set all queues as being used now */ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 82f70a6783cb..f5a984c1c986 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -714,6 +714,7 @@ struct bcm_sysport_tx_ring { unsigned long bytes; /* bytes statistics */ unsigned int switch_queue; /* switch port queue number */ unsigned int switch_port; /* switch port queue number */ + bool inspect; /* inspect switch port and queue */ }; /* Driver private structure */ From 1495dc9f0a711a54f8fec849ce7f3a8f585a11e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Nov 2017 11:48:00 -0700 Subject: [PATCH 1715/2177] security: bpf: replace include of linux/bpf.h with forward declarations Touching linux/bpf.h makes us rebuild a surprisingly large portion of the kernel. Remove the unnecessary dependency from security.h, it only needs forward declarations. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/security.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/security.h b/include/linux/security.h index 18800b0911e5..73f1ef625d40 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,7 +31,6 @@ #include #include #include -#include struct linux_binprm; struct cred; @@ -1732,6 +1731,10 @@ static inline void securityfs_remove(struct dentry *dentry) #endif #ifdef CONFIG_BPF_SYSCALL +union bpf_attr; +struct bpf_map; +struct bpf_prog; +struct bpf_prog_aux; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); From b06723da824af1e979eb1699623881b5f45a783c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 1 Nov 2017 23:58:09 +0100 Subject: [PATCH 1716/2177] bpf: minor cleanups after merge Two minor cleanups after Dave's recent merge in f8ddadc4db6c ("Merge git://git.kernel.org...") of net into net-next in order to get the code in line with what was done originally in the net tree: i) use max() instead of max_t() since both ranges are u16, ii) don't split the direct access test cases in the middle with bpf_exit test cases from 390ee7e29fc ("bpf: enforce return code for cgroup-bpf programs"). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 144 ++++++++++---------- 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bb6d6aa7085..2cc3e9486a1f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2532,7 +2532,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, continue; reg = &state->stack[i].spilled_ptr; if (reg->type == type && reg->id == dst_reg->id) - reg->range = max_t(u16, reg->range, new_range); + reg->range = max(reg->range, new_range); } } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1b93941bdfea..3b38a3d2eebd 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7249,78 +7249,6 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, - { - "bpf_exit with invalid return code. test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0xffffffff)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0x3)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test5", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x2; 0x0)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test6", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R0 is not a known value (ctx)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test7", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), - BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has unknown scalar value", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", .insns = { @@ -7470,6 +7398,78 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, + { + "bpf_exit with invalid return code. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0xffffffff)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0x3)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x2; 0x0)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test6", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 is not a known value (ctx)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has unknown scalar value", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, }; static int probe_filter_length(const struct bpf_insn *fp) From 5beca081be9195b4316ac5f32921df0234ee8e0e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 1 Nov 2017 23:58:10 +0100 Subject: [PATCH 1717/2177] bpf: also improve pattern matches for meta access Follow-up to 0fd4759c5515 ("bpf: fix pattern matches for direct packet access") to cover also the remaining data_meta/data matches in the verifier. The matches are also refactored a bit to simplify handling of all the cases. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 165 ++++++++++++++++++++++++------------------ 1 file changed, 96 insertions(+), 69 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cc3e9486a1f..530b68550fd2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2787,6 +2787,99 @@ static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, } } +static bool try_match_pkt_pointers(const struct bpf_insn *insn, + struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg, + struct bpf_verifier_state *this_branch, + struct bpf_verifier_state *other_branch) +{ + if (BPF_SRC(insn->code) != BPF_X) + return false; + + switch (BPF_OP(insn->code)) { + case BPF_JGT: + if ((dst_reg->type == PTR_TO_PACKET && + src_reg->type == PTR_TO_PACKET_END) || + (dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { + /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ + find_good_pkt_pointers(this_branch, dst_reg, + dst_reg->type, false); + } else if ((dst_reg->type == PTR_TO_PACKET_END && + src_reg->type == PTR_TO_PACKET) || + (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + src_reg->type == PTR_TO_PACKET_META)) { + /* pkt_end > pkt_data', pkt_data > pkt_meta' */ + find_good_pkt_pointers(other_branch, src_reg, + src_reg->type, true); + } else { + return false; + } + break; + case BPF_JLT: + if ((dst_reg->type == PTR_TO_PACKET && + src_reg->type == PTR_TO_PACKET_END) || + (dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { + /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ + find_good_pkt_pointers(other_branch, dst_reg, + dst_reg->type, true); + } else if ((dst_reg->type == PTR_TO_PACKET_END && + src_reg->type == PTR_TO_PACKET) || + (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + src_reg->type == PTR_TO_PACKET_META)) { + /* pkt_end < pkt_data', pkt_data > pkt_meta' */ + find_good_pkt_pointers(this_branch, src_reg, + src_reg->type, false); + } else { + return false; + } + break; + case BPF_JGE: + if ((dst_reg->type == PTR_TO_PACKET && + src_reg->type == PTR_TO_PACKET_END) || + (dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { + /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ + find_good_pkt_pointers(this_branch, dst_reg, + dst_reg->type, true); + } else if ((dst_reg->type == PTR_TO_PACKET_END && + src_reg->type == PTR_TO_PACKET) || + (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + src_reg->type == PTR_TO_PACKET_META)) { + /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ + find_good_pkt_pointers(other_branch, src_reg, + src_reg->type, false); + } else { + return false; + } + break; + case BPF_JLE: + if ((dst_reg->type == PTR_TO_PACKET && + src_reg->type == PTR_TO_PACKET_END) || + (dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { + /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ + find_good_pkt_pointers(other_branch, dst_reg, + dst_reg->type, false); + } else if ((dst_reg->type == PTR_TO_PACKET_END && + src_reg->type == PTR_TO_PACKET) || + (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + src_reg->type == PTR_TO_PACKET_META)) { + /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ + find_good_pkt_pointers(this_branch, src_reg, + src_reg->type, true); + } else { + return false; + } + break; + default: + return false; + } + + return true; +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -2893,75 +2986,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE); mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && - dst_reg->type == PTR_TO_PACKET && - regs[insn->src_reg].type == PTR_TO_PACKET_END) { - /* pkt_data' > pkt_end */ - find_good_pkt_pointers(this_branch, dst_reg, - PTR_TO_PACKET, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && - dst_reg->type == PTR_TO_PACKET_END && - regs[insn->src_reg].type == PTR_TO_PACKET) { - /* pkt_end > pkt_data' */ - find_good_pkt_pointers(other_branch, ®s[insn->src_reg], - PTR_TO_PACKET, true); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && - dst_reg->type == PTR_TO_PACKET && - regs[insn->src_reg].type == PTR_TO_PACKET_END) { - /* pkt_data' < pkt_end */ - find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET, - true); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && - dst_reg->type == PTR_TO_PACKET_END && - regs[insn->src_reg].type == PTR_TO_PACKET) { - /* pkt_end < pkt_data' */ - find_good_pkt_pointers(this_branch, ®s[insn->src_reg], - PTR_TO_PACKET, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && - dst_reg->type == PTR_TO_PACKET && - regs[insn->src_reg].type == PTR_TO_PACKET_END) { - /* pkt_data' >= pkt_end */ - find_good_pkt_pointers(this_branch, dst_reg, - PTR_TO_PACKET, true); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && - dst_reg->type == PTR_TO_PACKET_END && - regs[insn->src_reg].type == PTR_TO_PACKET) { - /* pkt_end >= pkt_data' */ - find_good_pkt_pointers(other_branch, ®s[insn->src_reg], - PTR_TO_PACKET, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && - dst_reg->type == PTR_TO_PACKET && - regs[insn->src_reg].type == PTR_TO_PACKET_END) { - /* pkt_data' <= pkt_end */ - find_good_pkt_pointers(other_branch, dst_reg, - PTR_TO_PACKET, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && - dst_reg->type == PTR_TO_PACKET_END && - regs[insn->src_reg].type == PTR_TO_PACKET) { - /* pkt_end <= pkt_data' */ - find_good_pkt_pointers(this_branch, ®s[insn->src_reg], - PTR_TO_PACKET, true); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && - dst_reg->type == PTR_TO_PACKET_META && - reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { - find_good_pkt_pointers(this_branch, dst_reg, - PTR_TO_PACKET_META, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && - dst_reg->type == PTR_TO_PACKET_META && - reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { - find_good_pkt_pointers(other_branch, dst_reg, - PTR_TO_PACKET_META, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && - reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && - regs[insn->src_reg].type == PTR_TO_PACKET_META) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg], - PTR_TO_PACKET_META, false); - } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && - reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && - regs[insn->src_reg].type == PTR_TO_PACKET_META) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg], - PTR_TO_PACKET_META, false); - } else if (is_pointer_value(env, insn->dst_reg)) { + } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], + this_branch, other_branch) && + is_pointer_value(env, insn->dst_reg)) { verbose(env, "R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; From 634eab11bd96d60d3a877182baa5b4b8bbed8685 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 1 Nov 2017 23:58:11 +0100 Subject: [PATCH 1718/2177] bpf: add test cases to bpf selftests to cover all meta tests Lets also add test cases to cover all possible data_meta access tests for good/bad access cases so we keep tracking them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 442 ++++++++++++++++++++ 1 file changed, 442 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3b38a3d2eebd..bb3c4ad8c59f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7398,6 +7398,448 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, + { + "XDP pkt read, pkt_meta' > pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' > pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_meta' > pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data > pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data > pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data > pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' < pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' < pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data < pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data < pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data < pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' >= pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data >= pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data >= pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data >= pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' <= pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_meta' <= pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data <= pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data <= pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, { "bpf_exit with invalid return code. test1", .insns = { From e20f469660ad526fcd9bf865e30ee491ae4e4a86 Mon Sep 17 00:00:00 2001 From: Vijaya Mohan Guvva Date: Wed, 1 Nov 2017 16:19:49 -0700 Subject: [PATCH 1719/2177] liquidio: synchronize VF representor names with NIC firmware LiquidIO firmware supports a vswitch that needs to know the names of the VF representors in the host to maintain compatibility for direct programming using external Openflow agents. So, for each VF representor, send its name to the firmware when it gets registered and when its name changes. Signed-off-by: Vijaya Mohan Guvva Signed-off-by: Raghu Vatsavayi Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 15 ++++ .../net/ethernet/cavium/liquidio/lio_vf_rep.c | 68 +++++++++++++++++++ .../net/ethernet/cavium/liquidio/lio_vf_rep.h | 2 + .../cavium/liquidio/liquidio_common.h | 8 ++- 4 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index f27f0afd0ecf..f05045a69dcc 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1639,6 +1639,10 @@ static void liquidio_remove(struct pci_dev *pdev) if (oct_dev->watchdog_task) kthread_stop(oct_dev->watchdog_task); + if (!oct_dev->octeon_id && + oct_dev->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) + lio_vf_rep_modexit(); + if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP)) liquidio_stop_nic_module(oct_dev); @@ -4029,6 +4033,17 @@ static int liquidio_init_nic_module(struct octeon_device *oct) goto octnet_init_failure; } + /* Call vf_rep_modinit if the firmware is switchdev capable + * and do it from the first liquidio function probed. + */ + if (!oct->octeon_id && + oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) { + if (lio_vf_rep_modinit()) { + liquidio_stop_nic_module(oct); + goto octnet_init_failure; + } + } + liquidio_ptp_init(oct); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index de0c80d150f3..2adafa366d3f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -625,3 +625,71 @@ lio_vf_rep_destroy(struct octeon_device *oct) oct->vf_rep_list.num_vfs = 0; } + +static int +lio_vf_rep_netdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lio_vf_rep_desc *vf_rep; + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + switch (event) { + case NETDEV_REGISTER: + case NETDEV_CHANGENAME: + break; + + default: + return NOTIFY_DONE; + } + + if (ndev->netdev_ops != &lio_vf_rep_ndev_ops) + return NOTIFY_DONE; + + vf_rep = netdev_priv(ndev); + oct = vf_rep->oct; + + if (strlen(ndev->name) > LIO_IF_NAME_SIZE) { + dev_err(&oct->pci_dev->dev, + "Device name change sync failed as the size is > %d\n", + LIO_IF_NAME_SIZE); + return NOTIFY_DONE; + } + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_DEVNAME; + rep_cfg.ifidx = vf_rep->ifidx; + strncpy(rep_cfg.rep_name.name, ndev->name, LIO_IF_NAME_SIZE); + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + if (ret) + dev_err(&oct->pci_dev->dev, + "vf_rep netdev name change failed with err %d\n", ret); + + return NOTIFY_DONE; +} + +static struct notifier_block lio_vf_rep_netdev_notifier = { + .notifier_call = lio_vf_rep_netdev_event, +}; + +int +lio_vf_rep_modinit(void) +{ + if (register_netdevice_notifier(&lio_vf_rep_netdev_notifier)) { + pr_err("netdev notifier registration failed\n"); + return -EFAULT; + } + + return 0; +} + +void +lio_vf_rep_modexit(void) +{ + if (unregister_netdevice_notifier(&lio_vf_rep_netdev_notifier)) + pr_err("netdev notifier unregister failed\n"); +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h index 5a9ec9851426..bb3cedc63c63 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h @@ -44,4 +44,6 @@ struct lio_vf_rep_sc_ctx { int lio_vf_rep_create(struct octeon_device *oct); void lio_vf_rep_destroy(struct octeon_device *oct); +int lio_vf_rep_modinit(void); +void lio_vf_rep_modexit(void); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 441cc78faff1..3bcdda85e360 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -928,7 +928,8 @@ enum lio_vf_rep_req_type { LIO_VF_REP_REQ_NONE, LIO_VF_REP_REQ_STATE, LIO_VF_REP_REQ_MTU, - LIO_VF_REP_REQ_STATS + LIO_VF_REP_REQ_STATS, + LIO_VF_REP_REQ_DEVNAME }; enum { @@ -936,12 +937,17 @@ enum { LIO_VF_REP_STATE_UP }; +#define LIO_IF_NAME_SIZE 16 struct lio_vf_rep_req { u8 req_type; u8 ifidx; u8 rsvd[6]; union { + struct lio_vf_rep_name { + char name[LIO_IF_NAME_SIZE]; + } rep_name; + struct lio_vf_rep_mtu { u32 mtu; u32 rsvd; From cdc89c91987bd6efa962b36d4ea86d0c9152eb11 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Thu, 2 Nov 2017 17:09:45 +0900 Subject: [PATCH 1720/2177] tools: bpf: handle long path in jit disasm Use PATH_MAX instead of hardcoded array size 256 Signed-off-by: Prashant Bhole Signed-off-by: David S. Miller --- tools/bpf/bpf_jit_disasm.c | 3 ++- tools/bpf/bpftool/jit_disasm.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 422d9abd666a..75bf526a0168 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -27,6 +27,7 @@ #include #include #include +#include #define CMD_ACTION_SIZE_BUFFER 10 #define CMD_ACTION_READ_ALL 3 @@ -51,7 +52,7 @@ static void get_exec_path(char *tpath, size_t size) static void get_asm_insns(uint8_t *image, size_t len, int opcodes) { int count, i, pc = 0; - char tpath[256]; + char tpath[PATH_MAX]; struct disassemble_info info; disassembler_ftype disassemble; bfd *bfdf; diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 5937e134e408..1551d3918d4c 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "json_writer.h" #include "main.h" @@ -80,7 +81,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) disassembler_ftype disassemble; struct disassemble_info info; int count, i, pc = 0; - char tpath[256]; + char tpath[PATH_MAX]; bfd *bfdf; if (!len) From 2df7b2d20622f98bca2f001bb0e8d8bee01782ac Mon Sep 17 00:00:00 2001 From: John Hurley Date: Thu, 2 Nov 2017 01:31:29 -0700 Subject: [PATCH 1721/2177] nfp: flower: app should use struct nfp_repr Ensure priv netdev data in flower app is cast to nfp_repr and not nfp_net as in other apps. Fixes: 363fc53b8b58 ("nfp: flower: Convert ndo_setup_tc offloads to block callbacks") Signed-off-by: John Hurley Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 7c6cab176293..95c2b9284857 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -468,14 +468,14 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { - struct nfp_net *nn = cb_priv; + struct nfp_repr *repr = cb_priv; - if (!tc_can_offload(nn->dp.netdev)) + if (!tc_can_offload(repr->netdev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSFLOWER: - return nfp_flower_repr_offload(nn->app, nn->port->netdev, + return nfp_flower_repr_offload(repr->app, repr->netdev, type_data); default: return -EOPNOTSUPP; @@ -485,7 +485,7 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, static int nfp_flower_setup_tc_block(struct net_device *netdev, struct tc_block_offload *f) { - struct nfp_net *nn = netdev_priv(netdev); + struct nfp_repr *repr = netdev_priv(netdev); if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; @@ -494,11 +494,11 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, case TC_BLOCK_BIND: return tcf_block_cb_register(f->block, nfp_flower_setup_tc_block_cb, - nn, nn); + repr, repr); case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, nfp_flower_setup_tc_block_cb, - nn); + repr); return 0; default: return -EOPNOTSUPP; From dc4646a950153242313ed340f0a404de38b21d5c Mon Sep 17 00:00:00 2001 From: John Hurley Date: Thu, 2 Nov 2017 01:31:30 -0700 Subject: [PATCH 1722/2177] nfp: flower: vxlan - ensure no sleep in atomic context Functions called by the netevent notifier must be in atomic context. Change the mutex to spinlock and ensure mem allocations are done with the atomic flag. Also, remove unnecessary locking after notifiers are unregistered. Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.c | 9 ++-- .../net/ethernet/netronome/nfp/flower/cmsg.h | 2 +- .../net/ethernet/netronome/nfp/flower/main.h | 2 +- .../ethernet/netronome/nfp/flower/offload.c | 2 +- .../netronome/nfp/flower/tunnel_conf.c | 47 ++++++++----------- 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 6b71c719deba..e98bb9cdb6a3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -50,14 +50,14 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb) struct sk_buff * nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, - enum nfp_flower_cmsg_type_port type) + enum nfp_flower_cmsg_type_port type, gfp_t flag) { struct nfp_flower_cmsg_hdr *ch; struct sk_buff *skb; size += NFP_FLOWER_CMSG_HLEN; - skb = nfp_app_ctrl_msg_alloc(app, size, GFP_KERNEL); + skb = nfp_app_ctrl_msg_alloc(app, size, flag); if (!skb) return NULL; @@ -78,7 +78,8 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) unsigned int size; size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]); - skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR); + skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR, + GFP_KERNEL); if (!skb) return NULL; @@ -109,7 +110,7 @@ int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok) struct sk_buff *skb; skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), - NFP_FLOWER_CMSG_TYPE_PORT_MOD); + NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 64e87f8e7089..66070741d55f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -458,6 +458,6 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work); void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb); struct sk_buff * nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, - enum nfp_flower_cmsg_type_port type); + enum nfp_flower_cmsg_type_port type, gfp_t flag); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 12c319a219d8..c90e72b7ff5a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -115,7 +115,7 @@ struct nfp_flower_priv { struct mutex nfp_mac_off_lock; struct mutex nfp_mac_index_lock; struct mutex nfp_ipv4_off_lock; - struct mutex nfp_neigh_off_lock; + spinlock_t nfp_neigh_off_lock; struct ida nfp_mac_off_ids; int nfp_mac_off_count; struct notifier_block nfp_tun_mac_nb; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 95c2b9284857..cdbb5464b790 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -95,7 +95,7 @@ nfp_flower_xmit_flow(struct net_device *netdev, nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ; nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ; - skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype); + skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index c495f8f38506..b03f22f29612 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -224,12 +224,13 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) } static int -nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, + gfp_t flag) { struct sk_buff *skb; unsigned char *msg; - skb = nfp_flower_cmsg_alloc(app, plen, mtype); + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); if (!skb) return -ENOMEM; @@ -246,15 +247,15 @@ static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - mutex_lock(&priv->nfp_neigh_off_lock); + spin_lock_bh(&priv->nfp_neigh_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); return true; } } - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); return false; } @@ -264,24 +265,24 @@ static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - mutex_lock(&priv->nfp_neigh_off_lock); + spin_lock_bh(&priv->nfp_neigh_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); return; } } - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); return; } entry->ipv4_addr = ipv4_addr; list_add_tail(&entry->list, &priv->nfp_neigh_off_list); - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); } static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) @@ -290,7 +291,7 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) struct nfp_ipv4_route_entry *entry; struct list_head *ptr, *storage; - mutex_lock(&priv->nfp_neigh_off_lock); + spin_lock_bh(&priv->nfp_neigh_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); if (entry->ipv4_addr == ipv4_addr) { @@ -299,12 +300,12 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) break; } } - mutex_unlock(&priv->nfp_neigh_off_lock); + spin_unlock_bh(&priv->nfp_neigh_off_lock); } static void nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh) + struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) { struct nfp_tun_neigh payload; @@ -334,7 +335,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, send_msg: nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, sizeof(struct nfp_tun_neigh), - (unsigned char *)&payload); + (unsigned char *)&payload, flag); } static int @@ -385,7 +386,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, #endif flow.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh(n->dev, app, &flow, n); + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); return NOTIFY_OK; } @@ -423,7 +424,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto route_fail_warning; - nfp_tun_write_neigh(n->dev, app, &flow, n); + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL); neigh_release(n); return; @@ -456,7 +457,7 @@ static void nfp_tun_write_ipv4_list(struct nfp_app *app) nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, sizeof(struct nfp_tun_ipv4_addr), - &payload); + &payload, GFP_KERNEL); } void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) @@ -548,7 +549,7 @@ void nfp_tunnel_write_macs(struct nfp_app *app) } err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, - pay_size, payload); + pay_size, payload, GFP_KERNEL); kfree(payload); @@ -729,7 +730,7 @@ int nfp_tunnel_config_start(struct nfp_app *app) INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); /* Initialise priv data for neighbour offloading. */ - mutex_init(&priv->nfp_neigh_off_lock); + spin_lock_init(&priv->nfp_neigh_off_lock); INIT_LIST_HEAD(&priv->nfp_neigh_off_list); priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; @@ -769,43 +770,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app) unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); /* Free any memory that may be occupied by MAC list. */ - mutex_lock(&priv->nfp_mac_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, list); list_del(&mac_entry->list); kfree(mac_entry); } - mutex_unlock(&priv->nfp_mac_off_lock); /* Free any memory that may be occupied by MAC index list. */ - mutex_lock(&priv->nfp_mac_index_lock); list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); list_del(&mac_idx->list); kfree(mac_idx); } - mutex_unlock(&priv->nfp_mac_index_lock); ida_destroy(&priv->nfp_mac_off_ids); /* Free any memory that may be occupied by ipv4 list. */ - mutex_lock(&priv->nfp_ipv4_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); list_del(&ip_entry->list); kfree(ip_entry); } - mutex_unlock(&priv->nfp_ipv4_off_lock); /* Free any memory that may be occupied by the route list. */ - mutex_lock(&priv->nfp_neigh_off_lock); list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); list_del(&route_entry->list); kfree(route_entry); } - mutex_unlock(&priv->nfp_neigh_off_lock); } From f449657f83532807f388b9b99cf0c3f7be65eda9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:31 -0700 Subject: [PATCH 1723/2177] nfp: bpf: reject TC offload if XDP loaded Recent TC changes dropped the check protecting us from trying to offload a TC program if XDP programs are already loaded. Fixes: 90d97315b3e7 ("nfp: bpf: Convert ndo_setup_tc offloads to block callbacks") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 04424db24b80..8e3e89cace8d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -130,6 +130,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, cls_bpf->common.protocol != htons(ETH_P_ALL) || cls_bpf->common.chain_index) return -EOPNOTSUPP; + if (nn->dp.bpf_offload_xdp) + return -EBUSY; + return nfp_net_bpf_offload(nn, cls_bpf); default: return -EOPNOTSUPP; From 2c4197a041dfbb5101aaa8be7b378ba69b91e765 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:32 -0700 Subject: [PATCH 1724/2177] nfp: reorganize the app table The app table is an unordered array right now. We have to search apps by ID. It also makes it harder to fall back to core NIC if advanced functions are not compiled into the kernel (e.g. eBPF). Make the table keyed by app id. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_app.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 5d9e2eba5b49..085c5151c601 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -43,10 +43,10 @@ #include "nfp_net_repr.h" static const struct nfp_app_type *apps[] = { - &app_nic, - &app_bpf, + [NFP_APP_CORE_NIC] = &app_nic, + [NFP_APP_BPF_NIC] = &app_bpf, #ifdef CONFIG_NFP_APP_FLOWER - &app_flower, + [NFP_APP_FLOWER_NIC] = &app_flower, #endif }; @@ -116,17 +116,13 @@ exit_unlock: struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) { struct nfp_app *app; - unsigned int i; - for (i = 0; i < ARRAY_SIZE(apps); i++) - if (apps[i]->id == id) - break; - if (i == ARRAY_SIZE(apps)) { + if (id >= ARRAY_SIZE(apps) || !apps[id]) { nfp_err(pf->cpp, "failed to find app with ID 0x%02hhx\n", id); return ERR_PTR(-EINVAL); } - if (WARN_ON(!apps[i]->name || !apps[i]->vnic_alloc)) + if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) return ERR_PTR(-EINVAL); app = kzalloc(sizeof(*app), GFP_KERNEL); @@ -136,7 +132,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) app->pf = pf; app->cpp = pf->cpp; app->pdev = pf->pdev; - app->type = apps[i]; + app->type = apps[id]; return app; } From 43b45245e5a6c274f374ecb49e5bca39f28dbfad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:33 -0700 Subject: [PATCH 1725/2177] nfp: bpf: fall back to core NIC app if BPF not selected If kernel config does not include BPF just replace the BPF app handler with the handler for basic NIC. The BPF app will now be built only if BPF infrastructure is selected in kernel config. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 4 ++-- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 --- drivers/net/ethernet/netronome/nfp/nfp_app.c | 4 ++++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index bd3b2bd408bc..9e8d30cb1517 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -27,8 +27,6 @@ nfp-objs := \ nfp_net_sriov.o \ nfp_netvf_main.o \ nfp_port.o \ - bpf/main.o \ - bpf/offload.o \ nic/main.o ifeq ($(CONFIG_NFP_APP_FLOWER),y) @@ -44,6 +42,8 @@ endif ifeq ($(CONFIG_BPF_SYSCALL),y) nfp-objs += \ + bpf/main.o \ + bpf/offload.o \ bpf/verifier.o \ bpf/jit.o endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 63c8f7847054..6d576f631392 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -150,9 +150,6 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, unsigned int max_mtu; int ret; - if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) - return -EOPNOTSUPP; - ret = nfp_net_bpf_get_act(nn, cls_bpf); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 085c5151c601..3644d74fe304 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -44,7 +44,11 @@ static const struct nfp_app_type *apps[] = { [NFP_APP_CORE_NIC] = &app_nic, +#ifdef CONFIG_BPF_SYSCALL [NFP_APP_BPF_NIC] = &app_bpf, +#else + [NFP_APP_BPF_NIC] = &app_nic, +#endif #ifdef CONFIG_NFP_APP_FLOWER [NFP_APP_FLOWER_NIC] = &app_flower, #endif From 790a399171831d31c8016a27294ef69130d3e7cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:34 -0700 Subject: [PATCH 1726/2177] nfp: switch to dev_alloc_page() Use the dev_alloc_page() networking helper to allocate pages for RX packets. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index eddf850a6a7f..7147335a8b36 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1212,7 +1212,7 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) } else { struct page *page; - page = alloc_page(GFP_ATOMIC | __GFP_COLD); + page = dev_alloc_page(); frag = page ? page_address(page) : NULL; } if (!frag) { From 16f50cda06ae023cb7beb15c88233fc516c03a2a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:35 -0700 Subject: [PATCH 1727/2177] nfp: use a counter instead of log message for allocation failures Add a counter incremented when allocation of replacement RX page fails. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 3 +++ .../net/ethernet/netronome/nfp/nfp_net_common.c | 15 ++++++++++----- .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 12 +++++++----- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index d51d8237b984..3d411f0d15b6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -394,6 +394,7 @@ struct nfp_net_rx_ring { * @tx_lso: Counter of LSO packets sent * @tx_errors: How many TX errors were encountered * @tx_busy: How often was TX busy (no space)? + * @rx_replace_buf_alloc_fail: Counter of RX buffer allocation failures * @irq_vector: Interrupt vector number (use for talking to the OS) * @handler: Interrupt handler for this ring vector * @name: Name of the interrupt vector @@ -437,6 +438,8 @@ struct nfp_net_r_vector { u64 hw_csum_tx_inner; u64 tx_gather; u64 tx_lso; + + u64 rx_replace_buf_alloc_fail; u64 tx_errors; u64 tx_busy; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7147335a8b36..185a3dd35a3f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1209,15 +1209,15 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) if (!dp->xdp_prog) { frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; } else { struct page *page; page = dev_alloc_page(); - frag = page ? page_address(page) : NULL; - } - if (!frag) { - nn_dp_warn(dp, "Failed to alloc receive page frag\n"); - return NULL; + if (unlikely(!page)) + return NULL; + frag = page_address(page); } *dma_addr = nfp_net_dma_map_rx(dp, frag); @@ -1514,6 +1514,11 @@ nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, { u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; u64_stats_update_end(&r_vec->rx_sync); /* skb is build based on the frag, free_skb() would free the frag diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index dc016dfec64d..6d5c376f0000 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -181,7 +181,7 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 -#define NN_ET_RVEC_GATHER_STATS 7 +#define NN_ET_RVEC_GATHER_STATS 8 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version) { @@ -444,6 +444,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "hw_rx_csum_ok"); data = nfp_pr_et(data, "hw_rx_csum_inner_ok"); data = nfp_pr_et(data, "hw_rx_csum_err"); + data = nfp_pr_et(data, "rx_replace_buf_alloc_fail"); data = nfp_pr_et(data, "hw_tx_csum"); data = nfp_pr_et(data, "hw_tx_inner_csum"); data = nfp_pr_et(data, "tx_gather"); @@ -468,16 +469,17 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; tmp[2] = nn->r_vecs[i].hw_csum_rx_error; + tmp[3] = nn->r_vecs[i].rx_replace_buf_alloc_fail; } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); do { start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; - tmp[3] = nn->r_vecs[i].hw_csum_tx; - tmp[4] = nn->r_vecs[i].hw_csum_tx_inner; - tmp[5] = nn->r_vecs[i].tx_gather; - tmp[6] = nn->r_vecs[i].tx_lso; + tmp[4] = nn->r_vecs[i].hw_csum_tx; + tmp[5] = nn->r_vecs[i].hw_csum_tx_inner; + tmp[6] = nn->r_vecs[i].tx_gather; + tmp[7] = nn->r_vecs[i].tx_lso; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); data += 3; From 18f76191796ad478e42528cfafcae0b11d4c8db4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2017 01:31:36 -0700 Subject: [PATCH 1728/2177] nfp: improve defines for constants in ethtool We split rvector stats into two categories - per queue and stats which are added up into one total counter. Improve the defines denoting their number. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 6d5c376f0000..c67b90c8d8b7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -181,7 +181,8 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 -#define NN_ET_RVEC_GATHER_STATS 8 +#define NN_RVEC_GATHER_STATS 8 +#define NN_RVEC_PER_Q_STATS 3 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version) { @@ -427,7 +428,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - return NN_ET_RVEC_GATHER_STATS + nn->dp.num_r_vecs * 3; + return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS; } static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) @@ -455,9 +456,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) { - u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {}; + u64 gathered_stats[NN_RVEC_GATHER_STATS] = {}; struct nfp_net *nn = netdev_priv(netdev); - u64 tmp[NN_ET_RVEC_GATHER_STATS]; + u64 tmp[NN_RVEC_GATHER_STATS]; unsigned int i, j; for (i = 0; i < nn->dp.num_r_vecs; i++) { @@ -482,13 +483,13 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[7] = nn->r_vecs[i].tx_lso; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); - data += 3; + data += NN_RVEC_PER_Q_STATS; - for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) gathered_stats[j] += tmp[j]; } - for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) *data++ = gathered_stats[j]; return data; From b35be415499ad257954813f8def9c84f49f1ff34 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 2 Nov 2017 10:20:58 +0100 Subject: [PATCH 1729/2177] net: dsa: lan9303: Added Documentation/networking/dsa/lan9303.txt Provide a rough overview of the state of the driver. And explain that the driver operates in two modes: bridged and port-separated. Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- Documentation/networking/dsa/lan9303.txt | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Documentation/networking/dsa/lan9303.txt diff --git a/Documentation/networking/dsa/lan9303.txt b/Documentation/networking/dsa/lan9303.txt new file mode 100644 index 000000000000..ec28683d107d --- /dev/null +++ b/Documentation/networking/dsa/lan9303.txt @@ -0,0 +1,37 @@ +LAN9303 Ethernet switch driver +============================== + +The LAN9303 is a three port 10/100 ethernet switch with integrated phys for the +two external ethernet ports. The third port is an RMII/MII interface to a host +master network interface (e.g. fixed link). + + +Driver details +============== + +The driver is implemented as a DSA driver, see +Documentation/networking/dsa/dsa.txt. + +See Documentation/devicetree/bindings/net/dsa/lan9303.txt for device tree +binding. + +The LAN9303 can be managed both via MDIO and I2C, both supported by this driver. + +At startup the driver configures the device to provide two separate network +interfaces (which is the default state of a DSA device). Due to HW limitations, +no HW MAC learning takes place in this mode. + +When both user ports are joined to the same bridge, the normal HW MAC learning +is enabled. This means that unicast traffic is forwarded in HW. Broadcast and +multicast is flooded in HW. STP is also supported in this mode. The driver +support fdb/mdb operations as well, meaning IGMP snooping is supported. + +If one of the user ports leave the bridge, the ports goes back to the initial +separated operation. + + +Driver limitations +================== + + - Support for VLAN filtering is not implemented + - The HW does not support VLAN-specific fdb entries From 9691cea91c9d35f09d6b31a5e8dbfc631d40903a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 12:13:36 +0100 Subject: [PATCH 1730/2177] net: seeq: fix timer conversion One of the timer conversion patches evidently escaped build testing until I ran into in on ARM randconfig builds: drivers/net/ethernet/seeq/ether3.c: In function 'ether3_ledoff': drivers/net/ethernet/seeq/ether3.c:175:40: error: 'priv' undeclared (first use in this function); did you mean 'pid'? drivers/net/ethernet/seeq/ether3.c:176:27: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] This fixes the two small typos that caused the problems. Fixes: 6fd9c53f7186 ("net: seeq: Convert timers to use timer_setup()") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/seeq/ether3.c | 2 +- drivers/net/ethernet/seeq/ether3.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index da4807723a06..c5bc124b41a9 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -172,7 +172,7 @@ ether3_setbuffer(struct net_device *dev, buffer_rw_t read, int start) */ static void ether3_ledoff(struct timer_list *t) { - struct dev_priv *private = from_timer(priv, t, timer); + struct dev_priv *private = from_timer(private, t, timer); struct net_device *dev = private->dev; ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2); diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h index ea2ba286e665..be19e5fa5cf2 100644 --- a/drivers/net/ethernet/seeq/ether3.h +++ b/drivers/net/ethernet/seeq/ether3.h @@ -165,7 +165,7 @@ struct dev_priv { unsigned char tx_tail; /* buffer nr of transmitting packet */ unsigned int rx_head; /* address to fetch next packet from */ struct timer_list timer; - net_device *dev; + struct net_device *dev; int broken; /* 0 = ok, 1 = something went wrong */ }; From 7df7dad633e2c6d43ee9b39c267ee0add9798384 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:15 +0800 Subject: [PATCH 1731/2177] net: hns3: Refactor the mapping of tqp to vport This patch refactor the mapping of tqp to vport, making the maping function can be used both in the reset process and initialization process. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 61 +++++++++++++++---- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 0b95fbe63ac1..404757a6792f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1184,11 +1184,7 @@ static int hclge_assign_tqp(struct hclge_vport *vport, struct hnae3_queue **tqp, u16 num_tqps) { struct hclge_dev *hdev = vport->back; - int i, alloced, func_id, ret; - bool is_pf; - - func_id = vport->vport_id; - is_pf = (vport->vport_id == 0) ? true : false; + int i, alloced; for (i = 0, alloced = 0; i < hdev->num_tqps && alloced < num_tqps; i++) { @@ -1197,12 +1193,6 @@ static int hclge_assign_tqp(struct hclge_vport *vport, hdev->htqp[i].q.tqp_index = alloced; tqp[alloced] = &hdev->htqp[i].q; hdev->htqp[i].alloced = true; - ret = hclge_map_tqps_to_func(hdev, func_id, - hdev->htqp[i].index, - alloced, is_pf); - if (ret) - return ret; - alloced++; } } @@ -1254,6 +1244,49 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps) return 0; } +static int hclge_map_tqp_to_vport(struct hclge_dev *hdev, + struct hclge_vport *vport) +{ + struct hnae3_handle *nic = &vport->nic; + struct hnae3_knic_private_info *kinfo; + u16 i; + + kinfo = &nic->kinfo; + for (i = 0; i < kinfo->num_tqps; i++) { + struct hclge_tqp *q = + container_of(kinfo->tqp[i], struct hclge_tqp, q); + bool is_pf; + int ret; + + is_pf = !(vport->vport_id); + ret = hclge_map_tqps_to_func(hdev, vport->vport_id, q->index, + i, is_pf); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_map_tqp(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + u16 i, num_vport; + + num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + for (i = 0; i < num_vport; i++) { + int ret; + + ret = hclge_map_tqp_to_vport(hdev, vport); + if (ret) + return ret; + + vport++; + } + + return 0; +} + static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps) { /* this would be initialized later */ @@ -4459,6 +4492,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_map_tqp(hdev); + if (ret) { + dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret); + return ret; + } + ret = hclge_mac_init(hdev); if (ret) { dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); From cf9cca2dd903b78d04ea7ad4cde0231988944d0f Mon Sep 17 00:00:00 2001 From: qumingguang Date: Thu, 2 Nov 2017 20:45:16 +0800 Subject: [PATCH 1732/2177] net: hns3: Refactor mac_init function It needs initialize mdio in initialization process, but reset process does not reset mdio, so do not initialize mdio in reset process. This patch move out the mdio configuration function from the mac_init. So mac_init can be used both in reset process and initialization process. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 404757a6792f..5daa8c791010 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2228,13 +2228,6 @@ static int hclge_mac_init(struct hclge_dev *hdev) mac->link = 0; - ret = hclge_mac_mdio_config(hdev); - if (ret) { - dev_warn(&hdev->pdev->dev, - "mdio config fail ret=%d\n", ret); - return ret; - } - /* Initialize the MTA table work mode */ hdev->accept_mta_mc = true; hdev->enable_mta = true; @@ -4498,6 +4491,13 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_mac_mdio_config(hdev); + if (ret) { + dev_warn(&hdev->pdev->dev, + "mdio config fail ret=%d\n", ret); + return ret; + } + ret = hclge_mac_init(hdev); if (ret) { dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); From 3efb960f056d855d4b1f07095df1f313c05765f4 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:17 +0800 Subject: [PATCH 1733/2177] net: hns3: Refactor the initialization of command queue There is no necessary to reallocate the descriptor and remap the descriptor memory in reset process, But there is still some other action exist in both reset process and initialization process. To reuse the common interface in reset process and initialization process, This patch moves out the descriptor allocate and memory maping from interface cmdq_init. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.c | 39 ++++++++++++------- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../hisilicon/hns3/hns3pf/hclge_main.c | 9 ++++- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 60960e588b5f..ff13d1876d9e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -62,7 +62,7 @@ static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring) ring->desc = NULL; } -static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type) +static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type) { struct hclge_hw *hw = &hdev->hw; struct hclge_cmq_ring *ring = @@ -79,9 +79,6 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type) return ret; } - ring->next_to_clean = 0; - ring->next_to_use = 0; - return 0; } @@ -302,37 +299,52 @@ static enum hclge_cmd_status hclge_cmd_query_firmware_version( return ret; } -int hclge_cmd_init(struct hclge_dev *hdev) +int hclge_cmd_queue_init(struct hclge_dev *hdev) { - u32 version; int ret; /* Setup the queue entries for use cmd queue */ hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; - /* Setup the lock for command queue */ - spin_lock_init(&hdev->hw.cmq.csq.lock); - spin_lock_init(&hdev->hw.cmq.crq.lock); - /* Setup Tx write back timeout */ hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT; /* Setup queue rings */ - ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CSQ); + ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CSQ); if (ret) { dev_err(&hdev->pdev->dev, "CSQ ring setup error %d\n", ret); return ret; } - ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CRQ); + ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CRQ); if (ret) { dev_err(&hdev->pdev->dev, "CRQ ring setup error %d\n", ret); goto err_csq; } + return 0; +err_csq: + hclge_free_cmd_desc(&hdev->hw.cmq.csq); + return ret; +} + +int hclge_cmd_init(struct hclge_dev *hdev) +{ + u32 version; + int ret; + + hdev->hw.cmq.csq.next_to_clean = 0; + hdev->hw.cmq.csq.next_to_use = 0; + hdev->hw.cmq.crq.next_to_clean = 0; + hdev->hw.cmq.crq.next_to_use = 0; + + /* Setup the lock for command queue */ + spin_lock_init(&hdev->hw.cmq.csq.lock); + spin_lock_init(&hdev->hw.cmq.crq.lock); + hclge_cmd_init_regs(&hdev->hw); ret = hclge_cmd_query_firmware_version(&hdev->hw, &version); @@ -346,9 +358,6 @@ int hclge_cmd_init(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version); return 0; -err_csq: - hclge_free_cmd_desc(&hdev->hw.cmq.csq); - return ret; } static void hclge_destroy_queue(struct hclge_cmq_ring *ring) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index b4373345c2b4..6bdc2167084b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -750,4 +750,5 @@ enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, struct hclge_desc *desc); void hclge_destroy_cmd_queue(struct hclge_hw *hw); +int hclge_cmd_queue_init(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 5daa8c791010..cf0fafec7954 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4446,7 +4446,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_pci_init; } - /* Command queue initialize */ + /* Firmware command queue initialize */ + ret = hclge_cmd_queue_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret); + return ret; + } + + /* Firmware command initialize */ ret = hclge_cmd_init(hdev); if (ret) goto err_cmd_init; From 466b0c00391bf160d1355489e542ecbfc86f4d98 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:18 +0800 Subject: [PATCH 1734/2177] net: hns3: Add support for misc interrupt This patch adds initialization and deinitialization for misc interrupt. This interrupt will be used to handle reset message(IRQ). Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 5 ++ .../hisilicon/hns3/hns3pf/hclge_main.c | 74 +++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.h | 2 + 3 files changed, 81 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 6bdc2167084b..db4d887fd748 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -63,6 +63,11 @@ enum hclge_cmd_status { HCLGE_ERR_CSQ_ERROR = -3, }; +struct hclge_misc_vector { + u8 __iomem *addr; + int vector_irq; +}; + struct hclge_cmq { struct hclge_cmq_ring csq; struct hclge_cmq_ring crq; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cf0fafec7954..e45842e48865 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2392,11 +2392,71 @@ static void hclge_service_complete(struct hclge_dev *hdev) clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); } +static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable) +{ + writel(enable ? 1 : 0, vector->addr); +} + +static irqreturn_t hclge_misc_irq_handle(int irq, void *data) +{ + struct hclge_dev *hdev = data; + + hclge_enable_vector(&hdev->misc_vector, false); + if (!test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) + schedule_work(&hdev->service_task); + + return IRQ_HANDLED; +} + +static void hclge_free_vector(struct hclge_dev *hdev, int vector_id) +{ + hdev->vector_status[vector_id] = HCLGE_INVALID_VPORT; + hdev->num_msi_left += 1; + hdev->num_msi_used -= 1; +} + +static void hclge_get_misc_vector(struct hclge_dev *hdev) +{ + struct hclge_misc_vector *vector = &hdev->misc_vector; + + vector->vector_irq = pci_irq_vector(hdev->pdev, 0); + + vector->addr = hdev->hw.io_base + HCLGE_MISC_VECTOR_REG_BASE; + hdev->vector_status[0] = 0; + + hdev->num_msi_left -= 1; + hdev->num_msi_used += 1; +} + +static int hclge_misc_irq_init(struct hclge_dev *hdev) +{ + int ret; + + hclge_get_misc_vector(hdev); + + ret = devm_request_irq(&hdev->pdev->dev, + hdev->misc_vector.vector_irq, + hclge_misc_irq_handle, 0, "hclge_misc", hdev); + if (ret) { + hclge_free_vector(hdev, 0); + dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n", + hdev->misc_vector.vector_irq); + } + + return ret; +} + +static void hclge_misc_irq_service_task(struct hclge_dev *hdev) +{ + hclge_enable_vector(&hdev->misc_vector, true); +} + static void hclge_service_task(struct work_struct *work) { struct hclge_dev *hdev = container_of(work, struct hclge_dev, service_task); + hclge_misc_irq_service_task(hdev); hclge_update_speed_duplex(hdev); hclge_update_link_status(hdev); hclge_update_stats_for_all(hdev); @@ -4480,6 +4540,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_misc_irq_init(hdev); + if (ret) { + dev_err(&pdev->dev, + "Misc IRQ(vector0) init error, ret = %d.\n", + ret); + return ret; + } + ret = hclge_alloc_tqps(hdev); if (ret) { dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret); @@ -4545,6 +4613,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) timer_setup(&hdev->service_timer, hclge_service_timer, 0); INIT_WORK(&hdev->service_task, hclge_service_task); + /* Enable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, true); + set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state); set_bit(HCLGE_STATE_DOWN, &hdev->state); @@ -4577,6 +4648,9 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) if (mac->phydev) mdiobus_unregister(mac->mdio_bus); + /* Disable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, false); + hclge_free_vector(hdev, 0); hclge_destroy_cmd_queue(&hdev->hw); hclge_pci_uninit(hdev); ae_dev->priv = NULL; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index bca4430bb7e7..2a1d4d6810bf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -27,6 +27,7 @@ (HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES) #define HCLGE_VECTOR_REG_BASE 0x20000 +#define HCLGE_MISC_VECTOR_REG_BASE 0x20400 #define HCLGE_VECTOR_REG_OFFSET 0x4 #define HCLGE_VECTOR_VF_OFFSET 0x100000 @@ -400,6 +401,7 @@ struct hclge_dev { struct pci_dev *pdev; struct hnae3_ae_dev *ae_dev; struct hclge_hw hw; + struct hclge_misc_vector misc_vector; struct hclge_hw_stats hw_stats; unsigned long state; From 4ed340ab8f49275a83337cb66e8f53e544f34674 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:19 +0800 Subject: [PATCH 1735/2177] net: hns3: Add reset process in hclge_main This patch adds reset support for PF,it include : global reset, core reset, IMP reset, PF reset.The core reset will Reset all datapath of all functions except IMP, MAC and PCI interface. Global reset is equal with the core reset plus all MAC reset. IMP reset is caused by watchdog timer expiration, the same with core reset in the reset flow. PF reset will reset whole physical function. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 19 ++ .../hisilicon/hns3/hns3pf/hclge_cmd.h | 7 + .../hisilicon/hns3/hns3pf/hclge_main.c | 285 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.h | 15 + 4 files changed, 326 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3acd8db0a794..67c59e1039f2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -110,6 +110,21 @@ enum hnae3_media_type { HNAE3_MEDIA_TYPE_BACKPLANE, }; +enum hnae3_reset_notify_type { + HNAE3_UP_CLIENT, + HNAE3_DOWN_CLIENT, + HNAE3_INIT_CLIENT, + HNAE3_UNINIT_CLIENT, +}; + +enum hnae3_reset_type { + HNAE3_FUNC_RESET, + HNAE3_CORE_RESET, + HNAE3_GLOBAL_RESET, + HNAE3_IMP_RESET, + HNAE3_NONE_RESET, +}; + struct hnae3_vector_info { u8 __iomem *io_addr; int vector; @@ -133,6 +148,8 @@ struct hnae3_client_ops { void (*uninit_instance)(struct hnae3_handle *handle, bool reset); void (*link_status_change)(struct hnae3_handle *handle, bool state); int (*setup_tc)(struct hnae3_handle *handle, u8 tc); + int (*reset_notify)(struct hnae3_handle *handle, + enum hnae3_reset_notify_type type); }; #define HNAE3_CLIENT_NAME_LENGTH 16 @@ -367,6 +384,8 @@ struct hnae3_ae_ops { u16 vlan_id, bool is_kill); int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, u16 vlan, u8 qos, __be16 proto); + void (*reset_event)(struct hnae3_handle *handle, + enum hnae3_reset_type reset); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index db4d887fd748..844c83ea549e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -697,6 +697,13 @@ struct hclge_reset_tqp_queue_cmd { u8 rsv[20]; }; +#define HCLGE_CFG_RESET_MAC_B 3 +#define HCLGE_CFG_RESET_FUNC_B 7 +struct hclge_reset_cmd { + u8 mac_func_reset; + u8 fun_reset_vfid; + u8 rsv[22]; +}; #define HCLGE_DEFAULT_TX_BUF 0x4000 /* 16k bytes */ #define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */ #define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e45842e48865..699983a954b2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -35,6 +35,7 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable); static int hclge_init_vlan_config(struct hclge_dev *hdev); +static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static struct hnae3_ae_algo ae_algo; @@ -2446,8 +2447,212 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev) return ret; } +static int hclge_notify_client(struct hclge_dev *hdev, + enum hnae3_reset_notify_type type) +{ + struct hnae3_client *client = hdev->nic_client; + u16 i; + + if (!client->ops->reset_notify) + return -EOPNOTSUPP; + + for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + struct hnae3_handle *handle = &hdev->vport[i].nic; + int ret; + + ret = client->ops->reset_notify(handle, type); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_reset_wait(struct hclge_dev *hdev) +{ +#define HCLGE_RESET_WATI_MS 100 +#define HCLGE_RESET_WAIT_CNT 5 + u32 val, reg, reg_bit; + u32 cnt = 0; + + switch (hdev->reset_type) { + case HNAE3_GLOBAL_RESET: + reg = HCLGE_GLOBAL_RESET_REG; + reg_bit = HCLGE_GLOBAL_RESET_BIT; + break; + case HNAE3_CORE_RESET: + reg = HCLGE_GLOBAL_RESET_REG; + reg_bit = HCLGE_CORE_RESET_BIT; + break; + case HNAE3_FUNC_RESET: + reg = HCLGE_FUN_RST_ING; + reg_bit = HCLGE_FUN_RST_ING_B; + break; + default: + dev_err(&hdev->pdev->dev, + "Wait for unsupported reset type: %d\n", + hdev->reset_type); + return -EINVAL; + } + + val = hclge_read_dev(&hdev->hw, reg); + while (hnae_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) { + msleep(HCLGE_RESET_WATI_MS); + val = hclge_read_dev(&hdev->hw, reg); + cnt++; + } + + /* must clear reset status register to + * prevent driver detect reset interrupt again + */ + reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG); + hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, reg); + + if (cnt >= HCLGE_RESET_WAIT_CNT) { + dev_warn(&hdev->pdev->dev, + "Wait for reset timeout: %d\n", hdev->reset_type); + return -EBUSY; + } + + return 0; +} + +static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) +{ + struct hclge_desc desc; + struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_MAC_B, 0); + hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_FUNC_B, 1); + req->fun_reset_vfid = func_id; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "send function reset cmd fail, status =%d\n", ret); + + return ret; +} + +static void hclge_do_reset(struct hclge_dev *hdev, enum hnae3_reset_type type) +{ + struct pci_dev *pdev = hdev->pdev; + u32 val; + + switch (type) { + case HNAE3_GLOBAL_RESET: + val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); + hnae_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1); + hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); + dev_info(&pdev->dev, "Global Reset requested\n"); + break; + case HNAE3_CORE_RESET: + val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); + hnae_set_bit(val, HCLGE_CORE_RESET_BIT, 1); + hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); + dev_info(&pdev->dev, "Core Reset requested\n"); + break; + case HNAE3_FUNC_RESET: + dev_info(&pdev->dev, "PF Reset requested\n"); + hclge_func_reset_cmd(hdev, 0); + break; + default: + dev_warn(&pdev->dev, + "Unsupported reset type: %d\n", type); + break; + } +} + +static enum hnae3_reset_type hclge_detected_reset_event(struct hclge_dev *hdev) +{ + enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; + u32 rst_reg_val; + + rst_reg_val = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG); + if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_reg_val) + rst_level = HNAE3_GLOBAL_RESET; + else if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_reg_val) + rst_level = HNAE3_CORE_RESET; + else if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_reg_val) + rst_level = HNAE3_IMP_RESET; + + return rst_level; +} + +static void hclge_reset_event(struct hnae3_handle *handle, + enum hnae3_reset_type reset) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + dev_info(&hdev->pdev->dev, + "Receive reset event , reset_type is %d", reset); + + switch (reset) { + case HNAE3_FUNC_RESET: + case HNAE3_CORE_RESET: + case HNAE3_GLOBAL_RESET: + if (test_bit(HCLGE_STATE_RESET_INT, &hdev->state)) { + dev_err(&hdev->pdev->dev, "Already in reset state"); + return; + } + hdev->reset_type = reset; + set_bit(HCLGE_STATE_RESET_INT, &hdev->state); + set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + schedule_work(&hdev->service_task); + break; + default: + dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset); + break; + } +} + +static void hclge_reset_subtask(struct hclge_dev *hdev) +{ + bool do_reset; + + do_reset = hdev->reset_type != HNAE3_NONE_RESET; + + /* Reset is detected by interrupt */ + if (hdev->reset_type == HNAE3_NONE_RESET) + hdev->reset_type = hclge_detected_reset_event(hdev); + + if (hdev->reset_type == HNAE3_NONE_RESET) + return; + + switch (hdev->reset_type) { + case HNAE3_FUNC_RESET: + case HNAE3_CORE_RESET: + case HNAE3_GLOBAL_RESET: + case HNAE3_IMP_RESET: + hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + + if (do_reset) + hclge_do_reset(hdev, hdev->reset_type); + else + set_bit(HCLGE_STATE_RESET_INT, &hdev->state); + + if (!hclge_reset_wait(hdev)) { + hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); + hclge_reset_ae_dev(hdev->ae_dev); + hclge_notify_client(hdev, HNAE3_INIT_CLIENT); + clear_bit(HCLGE_STATE_RESET_INT, &hdev->state); + } + hclge_notify_client(hdev, HNAE3_UP_CLIENT); + break; + default: + dev_err(&hdev->pdev->dev, "Unsupported reset type:%d\n", + hdev->reset_type); + break; + } + hdev->reset_type = HNAE3_NONE_RESET; +} + static void hclge_misc_irq_service_task(struct hclge_dev *hdev) { + hclge_reset_subtask(hdev); hclge_enable_vector(&hdev->misc_vector, true); } @@ -4498,6 +4703,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->flag |= HCLGE_FLAG_USE_MSIX; hdev->pdev = pdev; hdev->ae_dev = ae_dev; + hdev->reset_type = HNAE3_NONE_RESET; ae_dev->priv = hdev; ret = hclge_pci_init(hdev); @@ -4630,6 +4836,84 @@ err_hclge_dev: return ret; } +static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct pci_dev *pdev = ae_dev->pdev; + int ret; + + set_bit(HCLGE_STATE_DOWN, &hdev->state); + + ret = hclge_cmd_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Cmd queue init failed\n"); + return ret; + } + + ret = hclge_get_cap(hdev); + if (ret) { + dev_err(&pdev->dev, "get hw capability error, ret = %d.\n", + ret); + return ret; + } + + ret = hclge_configure(hdev); + if (ret) { + dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_map_tqp(hdev); + if (ret) { + dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_mac_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); + return ret; + } + + ret = hclge_buffer_alloc(hdev); + if (ret) { + dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); + if (ret) { + dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_init_vlan_config(hdev); + if (ret) { + dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_tm_schd_init(hdev); + if (ret) { + dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_rss_init_hw(hdev); + if (ret) { + dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); + return ret; + } + + /* Enable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, true); + + dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", + HCLGE_DRIVER_NAME); + + return 0; +} + static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; @@ -4699,6 +4983,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_mdix_mode = hclge_get_mdix_mode, .set_vlan_filter = hclge_set_port_vlan_filter, .set_vf_vlan_filter = hclge_set_vf_vlan_filter, + .reset_event = hclge_reset_event, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 2a1d4d6810bf..742e6ee9efaf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -79,6 +79,19 @@ #define HCLGE_PHY_MDIX_STATUS_B (6) #define HCLGE_PHY_SPEED_DUP_RESOLVE_B (11) +/* Reset related Registers */ +#define HCLGE_MISC_RESET_STS_REG 0x20700 +#define HCLGE_GLOBAL_RESET_REG 0x20A00 +#define HCLGE_GLOBAL_RESET_BIT 0x0 +#define HCLGE_CORE_RESET_BIT 0x1 +#define HCLGE_FUN_RST_ING 0x20C00 +#define HCLGE_FUN_RST_ING_B 0 + +/* Vector0 register bits define */ +#define HCLGE_VECTOR0_GLOBALRESET_INT_B 5 +#define HCLGE_VECTOR0_CORERESET_INT_B 6 +#define HCLGE_VECTOR0_IMPRESET_INT_B 7 + enum HCLGE_DEV_STATE { HCLGE_STATE_REINITING, HCLGE_STATE_DOWN, @@ -88,6 +101,7 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_MBX_IRQ, + HCLGE_STATE_RESET_INT, HCLGE_STATE_MAX }; @@ -405,6 +419,7 @@ struct hclge_dev { struct hclge_hw_stats hw_stats; unsigned long state; + enum hnae3_reset_type reset_type; u32 fw_version; u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */ u16 num_tqps; /* Num task queue pairs of this PF */ From f8fa222ca57cccb066d18767010275e9e3a2b9fe Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:20 +0800 Subject: [PATCH 1736/2177] net: hns3: Add timeout process in hns3_enet This patch add timeout handler in hns3_enet.c to handle TX side timeout event, when TX timeout event occur, it will triger NIC driver into reset process. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_enet.c | 86 +++++++++++++++++++ .../hisilicon/hns3/hns3pf/hns3_enet.h | 2 + 2 files changed, 88 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index c6c5b2a96aaa..f0cb88a07850 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -258,6 +258,7 @@ out_start_err: static int hns3_nic_net_open(struct net_device *netdev) { + struct hns3_nic_priv *priv = netdev_priv(netdev); int ret; netif_carrier_off(netdev); @@ -273,6 +274,7 @@ static int hns3_nic_net_open(struct net_device *netdev) return ret; } + priv->last_reset_time = jiffies; return 0; } @@ -1322,10 +1324,91 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) return ret; } +static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hns3_enet_ring *tx_ring = NULL; + int timeout_queue = 0; + int hw_head, hw_tail; + int i; + + /* Find the stopped queue the same way the stack does */ + for (i = 0; i < ndev->real_num_tx_queues; i++) { + struct netdev_queue *q; + unsigned long trans_start; + + q = netdev_get_tx_queue(ndev, i); + trans_start = q->trans_start; + if (netif_xmit_stopped(q) && + time_after(jiffies, + (trans_start + ndev->watchdog_timeo))) { + timeout_queue = i; + break; + } + } + + if (i == ndev->num_tx_queues) { + netdev_info(ndev, + "no netdev TX timeout queue found, timeout count: %llu\n", + priv->tx_timeout_count); + return false; + } + + tx_ring = priv->ring_data[timeout_queue].ring; + + hw_head = readl_relaxed(tx_ring->tqp->io_base + + HNS3_RING_TX_RING_HEAD_REG); + hw_tail = readl_relaxed(tx_ring->tqp->io_base + + HNS3_RING_TX_RING_TAIL_REG); + netdev_info(ndev, + "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, HW_HEAD: 0x%x, HW_TAIL: 0x%x, INT: 0x%x\n", + priv->tx_timeout_count, + timeout_queue, + tx_ring->next_to_use, + tx_ring->next_to_clean, + hw_head, + hw_tail, + readl(tx_ring->tqp_vector->mask_addr)); + + return true; +} + +static void hns3_nic_net_timeout(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + unsigned long last_reset_time = priv->last_reset_time; + struct hnae3_handle *h = priv->ae_handle; + + if (!hns3_get_tx_timeo_queue_info(ndev)) + return; + + priv->tx_timeout_count++; + + /* This timeout is far away enough from last timeout, + * if timeout again,set the reset type to PF reset + */ + if (time_after(jiffies, (last_reset_time + 20 * HZ))) + priv->reset_level = HNAE3_FUNC_RESET; + + /* Don't do any new action before the next timeout */ + else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo))) + return; + + priv->last_reset_time = jiffies; + + if (h->ae_algo->ops->reset_event) + h->ae_algo->ops->reset_event(h, priv->reset_level); + + priv->reset_level++; + if (priv->reset_level > HNAE3_GLOBAL_RESET) + priv->reset_level = HNAE3_GLOBAL_RESET; +} + static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_open = hns3_nic_net_open, .ndo_stop = hns3_nic_net_stop, .ndo_start_xmit = hns3_nic_net_xmit, + .ndo_tx_timeout = hns3_nic_net_timeout, .ndo_set_mac_address = hns3_nic_net_set_mac_address, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, @@ -2763,6 +2846,9 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->dev = &pdev->dev; priv->netdev = netdev; priv->ae_handle = handle; + priv->last_reset_time = jiffies; + priv->reset_level = HNAE3_FUNC_RESET; + priv->tx_timeout_count = 0; handle->kinfo.netdev = netdev; handle->priv = (void *)priv; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 58dc30bf893c..8a9de759957b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -518,6 +518,8 @@ struct hns3_nic_priv { /* The most recently read link state */ int link; u64 tx_timeout_count; + enum hnae3_reset_type reset_level; + unsigned long last_reset_time; unsigned long state; From bb6b94a896d4dd4dcdeccca87c3fd22521c652c0 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Thu, 2 Nov 2017 20:45:21 +0800 Subject: [PATCH 1737/2177] net: hns3: Add reset interface implementation in client This patch implement the interface of reset notification in hns3_enet, it will do resetting business which include shutdown nic device, free and initialize client side resource. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_enet.c | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index f0cb88a07850..39679fdb83c7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -3009,11 +3009,164 @@ err_out: return ret; } +static void hns3_recover_hw_addr(struct net_device *ndev) +{ + struct netdev_hw_addr_list *list; + struct netdev_hw_addr *ha, *tmp; + + /* go through and sync uc_addr entries to the device */ + list = &ndev->uc; + list_for_each_entry_safe(ha, tmp, &list->list, list) + hns3_nic_uc_sync(ndev, ha->addr); + + /* go through and sync mc_addr entries to the device */ + list = &ndev->mc; + list_for_each_entry_safe(ha, tmp, &list->list, list) + hns3_nic_mc_sync(ndev, ha->addr); +} + +static void hns3_drop_skb_data(struct hns3_enet_ring *ring, struct sk_buff *skb) +{ + dev_kfree_skb_any(skb); +} + +static void hns3_clear_all_ring(struct hnae3_handle *h) +{ + struct net_device *ndev = h->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(ndev); + u32 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) { + struct netdev_queue *dev_queue; + struct hns3_enet_ring *ring; + + ring = priv->ring_data[i].ring; + hns3_clean_tx_ring(ring, ring->desc_num); + dev_queue = netdev_get_tx_queue(ndev, + priv->ring_data[i].queue_index); + netdev_tx_reset_queue(dev_queue); + + ring = priv->ring_data[i + h->kinfo.num_tqps].ring; + hns3_clean_rx_ring(ring, ring->desc_num, hns3_drop_skb_data); + } +} + +static int hns3_reset_notify_down_enet(struct hnae3_handle *handle) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct net_device *ndev = kinfo->netdev; + + if (!netif_running(ndev)) + return -EIO; + + return hns3_nic_net_stop(ndev); +} + +static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev); + int ret = 0; + + if (netif_running(kinfo->netdev)) { + ret = hns3_nic_net_up(kinfo->netdev); + if (ret) { + netdev_err(kinfo->netdev, + "hns net up fail, ret=%d!\n", ret); + return ret; + } + + priv->last_reset_time = jiffies; + } + + return ret; +} + +static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) +{ + struct net_device *netdev = handle->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(netdev); + int ret; + + priv->reset_level = 1; + hns3_init_mac_addr(netdev); + hns3_nic_set_rx_mode(netdev); + hns3_recover_hw_addr(netdev); + + /* Carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + + ret = hns3_get_ring_config(priv); + if (ret) + return ret; + + ret = hns3_nic_init_vector_data(priv); + if (ret) + return ret; + + ret = hns3_init_all_ring(priv); + if (ret) { + hns3_nic_uninit_vector_data(priv); + priv->ring_data = NULL; + } + + return ret; +} + +static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) +{ + struct net_device *netdev = handle->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(netdev); + int ret; + + hns3_clear_all_ring(handle); + + ret = hns3_nic_uninit_vector_data(priv); + if (ret) { + netdev_err(netdev, "uninit vector error\n"); + return ret; + } + + ret = hns3_uninit_all_ring(priv); + if (ret) + netdev_err(netdev, "uninit ring error\n"); + + priv->ring_data = NULL; + + return ret; +} + +static int hns3_reset_notify(struct hnae3_handle *handle, + enum hnae3_reset_notify_type type) +{ + int ret = 0; + + switch (type) { + case HNAE3_UP_CLIENT: + ret = hns3_reset_notify_up_enet(handle); + break; + case HNAE3_DOWN_CLIENT: + ret = hns3_reset_notify_down_enet(handle); + break; + case HNAE3_INIT_CLIENT: + ret = hns3_reset_notify_init_enet(handle); + break; + case HNAE3_UNINIT_CLIENT: + ret = hns3_reset_notify_uninit_enet(handle); + break; + default: + break; + } + + return ret; +} + static const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, .setup_tc = hns3_client_setup_tc, + .reset_notify = hns3_reset_notify, }; /* hns3_init_module - Driver registration routine From ae064e6123f89f90af7e4ea190cc0c612643ca93 Mon Sep 17 00:00:00 2001 From: qumingguang Date: Thu, 2 Nov 2017 20:45:22 +0800 Subject: [PATCH 1738/2177] net: hns3: Fix a misuse to devm_free_irq we should use free_irq to free the nic irq during the unloading time. because we use request_irq to apply it when nic up. It will crash if up net device after reset the port. This patch fixes the issue. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 39679fdb83c7..2a0af11c9b59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2558,9 +2558,8 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) (void)irq_set_affinity_hint( priv->tqp_vector[i].vector_irq, NULL); - devm_free_irq(&pdev->dev, - priv->tqp_vector[i].vector_irq, - &priv->tqp_vector[i]); + free_irq(priv->tqp_vector[i].vector_irq, + &priv->tqp_vector[i]); } priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED; From c6dc52130536d6d3d750bf1097b3bc897f7ef054 Mon Sep 17 00:00:00 2001 From: qumingguang Date: Thu, 2 Nov 2017 20:45:23 +0800 Subject: [PATCH 1739/2177] net: hns3: hns3:fix a bug about statistic counter in reset process All member of Struct hdev->hw_stats is initialized to 0 as hdev is allocated by devm_kzalloc. But in reset process, hdev will not be allocated again, so need clear hdev->hw_stats in reset process, otherwise the statistic will be wrong after reset. This patch set all of the statistic counters to zero after reset. Signed-off-by: qumingguang Signed-off-by: Lipeng Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 699983a954b2..c6ba89089ef3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4836,6 +4836,11 @@ err_hclge_dev: return ret; } +static void hclge_stats_clear(struct hclge_dev *hdev) +{ + memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); +} + static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; @@ -4844,6 +4849,8 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) set_bit(HCLGE_STATE_DOWN, &hdev->state); + hclge_stats_clear(hdev); + ret = hclge_cmd_init(hdev); if (ret) { dev_err(&pdev->dev, "Cmd queue init failed\n"); From 20acbd9a7aeee0b0af7107f3de791a52c949f3ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:08 +0000 Subject: [PATCH 1740/2177] rxrpc: Lock around calling a kernel service Rx notification Place a spinlock around the invocation of call->notify_rx() for a kernel service call and lock again when ending the call and replace the notification pointer with a pointer to a dummy function. This is required because it's possible for rxrpc_notify_socket() to be called after the call has been ended by the kernel service if called from the asynchronous work function rxrpc_process_call(). However, rxrpc_notify_socket() currently only holds the RCU read lock when invoking ->notify_rx(), which means that the afs_call struct would need to be disposed of by call_rcu() rather than by kfree(). But we shouldn't see any notifications from a call after calling rxrpc_kernel_end_call(), so a lock is required in rxrpc code. Without this, we may see the call wait queue as having a corrupt spinlock: BUG: spinlock bad magic on CPU#0, kworker/0:2/1612 general protection fault: 0000 [#1] SMP ... Workqueue: krxrpcd rxrpc_process_call task: ffff88040b83c400 task.stack: ffff88040adfc000 RIP: 0010:spin_bug+0x161/0x18f RSP: 0018:ffff88040adffcc0 EFLAGS: 00010002 RAX: 0000000000000032 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffff81ab16cf RDX: ffff88041fa14c01 RSI: ffff88041fa0ccb8 RDI: ffff88041fa0ccb8 RBP: ffff88040adffcd8 R08: 00000000ffffffff R09: 00000000ffffffff R10: ffff88040adffc60 R11: 000000000000022c R12: ffff88040aca2208 R13: ffffffff81a58114 R14: 0000000000000000 R15: 0000000000000000 .... Call Trace: do_raw_spin_lock+0x1d/0x89 _raw_spin_lock_irqsave+0x3d/0x49 ? __wake_up_common_lock+0x4c/0xa7 __wake_up_common_lock+0x4c/0xa7 ? __lock_is_held+0x47/0x7a __wake_up+0xe/0x10 afs_wake_up_call_waiter+0x11b/0x122 [kafs] rxrpc_notify_socket+0x12b/0x258 rxrpc_process_call+0x18e/0x7d0 process_one_work+0x298/0x4de ? rescuer_thread+0x280/0x280 worker_thread+0x1d1/0x2ae ? rescuer_thread+0x280/0x280 kthread+0x12c/0x134 ? kthread_create_on_node+0x3a/0x3a ret_from_fork+0x27/0x40 In this case, note the corrupt data in EBX. The address of the offending afs_call is in R12, plus the offset to the spinlock. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 16 ++++++++++++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 1 + net/rxrpc/recvmsg.c | 2 ++ 4 files changed, 20 insertions(+) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 344b2dcad52d..9b5c46b052fd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -322,6 +322,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, } EXPORT_SYMBOL(rxrpc_kernel_begin_call); +/* + * Dummy function used to stop the notifier talking to recvmsg(). + */ +static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ +} + /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using * @sock: The socket the call is on @@ -336,6 +344,14 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); + + /* Make sure we're not going to call back into a kernel service */ + if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); + call->notify_rx = rxrpc_dummy_notify_rx; + spin_unlock_bh(&call->notify_lock); + } + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_kernel); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ea5600b747cc..b2151993d384 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -525,6 +525,7 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t lock; + spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fcdd6555a820..4c7fbc6dcce7 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); + spin_lock_init(&call->notify_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index e4937b3f3685..8510a98b87e1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); + spin_unlock_bh(&call->notify_lock); } else { write_lock_bh(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { From 1457cc4cfb93511de347d1d0a1c9da3e826b66fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:55 +0000 Subject: [PATCH 1741/2177] rxrpc: Fix a null ptr deref in rxrpc_fill_out_ack() rxrpc_fill_out_ack() needs to be passed the connection pointer from its caller rather than using call->conn as the call may be disconnected in parallel with it, clearing call->conn, leading to: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: rxrpc_send_ack_packet+0x231/0x6a4 Signed-off-by: David Howells --- net/rxrpc/output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 71e6f713fbe7..8ee8b2d4a3eb 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -35,7 +35,8 @@ struct rxrpc_abort_buffer { /* * Fill out an ACK packet. */ -static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, +static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + struct rxrpc_call *call, struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, rxrpc_seq_t *_top, @@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } while (before_eq(seq, top)); } - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); @@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) } call->ackr_reason = 0; } - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); + n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); From dcbefc30fbc2c1926bcecdd62579e3e107653d82 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:26 +0000 Subject: [PATCH 1742/2177] rxrpc: Fix call expiry handling Fix call expiry handling in the following ways (1) If all the request data from a client call is acked, don't send a follow up IDLE ACK with firstPacket == 1 and previousPacket == 0 as this appears to fool some servers into thinking everything has been accepted. (2) Never send an abort back to the server once it has ACK'd all the request packets; rather just try to reuse the channel for the next call. The first request DATA packet of the next call on the same channel will implicitly ACK the entire reply of the dead call - even if we haven't transmitted it yet. (3) Don't send RX_CALL_TIMEOUT in an ABORT packet, librx uses abort codes to pass local errors to the caller in addition to remote errors, and this is meant to be local only. The following also need to be addressed in future patches: (4) Service calls should send PING ACKs as 'keep alives' if the server is still processing the call. (5) VERSION REPLY packets should be sent to the peers of service connections to act as keep-alives. This is used to keep firewall routes in place. The AFS CM should enable this. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 2 -- net/rxrpc/output.c | 10 ++++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7a77844aab16..3574508baf9a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ recheck_state: now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1e37eb1c0c66..1b592073ec96 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, - rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8ee8b2d4a3eb..f47659c7b224 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -222,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) rxrpc_serial_t serial; int ret; + /* Don't bother sending aborts for a client call once the server has + * hard-ACK'd all of its request data. After that point, we're not + * going to stop the operation proceeding, and whilst we might limit + * the reply, it's not worth it if we can send a new call on the same + * channel instead, thereby closing off this call. + */ + if (rxrpc_is_client_call(call) && + test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + return 0; + spin_lock_bh(&call->lock); if (call->conn) conn = rxrpc_get_connection_maybe(call->conn); From 39f1332c526cd9d6de59a72520e8334e54b62cda Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Wed, 1 Nov 2017 17:42:44 +0530 Subject: [PATCH 1743/2177] rsi: move rsi_sdio_reinit_device() out of CONFIG_PM This function is generic. It doesn't contain wowlan specific code. It should not be under CONFIG_PM. This patch resolves compilation errors observed when CONFIG_PM flag is disabled. Reported-by: kbuild test robot Fixes: ef71ed0608c ("rsi: sdio: Add WOWLAN support for S5 shutdown state") Fixes: a24e35fcee0 ("rsi: sdio: Add WOWLAN support for S4 hibernate state") Fixes: e1ced6422a3 ("rsi: sdio: add WOWLAN support for S3 suspend state") Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 52 ++++++++++++------------- drivers/net/wireless/rsi/rsi_sdio.h | 1 - 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 3288fb6888b9..b0cf41195051 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -871,6 +871,32 @@ fail: return status; } +static int rsi_sdio_reinit_device(struct rsi_hw *adapter) +{ + struct rsi_91x_sdiodev *sdev = adapter->rsi_dev; + struct sdio_func *pfunction = sdev->pfunction; + int ii; + + for (ii = 0; ii < NUM_SOFT_QUEUES; ii++) + skb_queue_purge(&adapter->priv->tx_queue[ii]); + + /* Initialize device again */ + sdio_claim_host(pfunction); + + sdio_release_irq(pfunction); + rsi_reset_card(pfunction); + + sdio_enable_func(pfunction); + rsi_setupcard(adapter); + rsi_init_sdio_slave_regs(adapter); + sdio_claim_irq(pfunction, rsi_handle_interrupt); + rsi_hal_device_init(adapter); + + sdio_release_host(pfunction); + + return 0; +} + static struct rsi_host_intf_ops sdio_host_intf_ops = { .write_pkt = rsi_sdio_host_intf_write_pkt, .read_pkt = rsi_sdio_host_intf_read_pkt, @@ -1281,32 +1307,6 @@ static void rsi_shutdown(struct device *dev) rsi_dbg(INFO_ZONE, "***** RSI module shut down *****\n"); } -int rsi_sdio_reinit_device(struct rsi_hw *adapter) -{ - struct rsi_91x_sdiodev *sdev = adapter->rsi_dev; - struct sdio_func *pfunction = sdev->pfunction; - int ii; - - for (ii = 0; ii < NUM_SOFT_QUEUES; ii++) - skb_queue_purge(&adapter->priv->tx_queue[ii]); - - /* Initialize device again */ - sdio_claim_host(pfunction); - - sdio_release_irq(pfunction); - rsi_reset_card(pfunction); - - sdio_enable_func(pfunction); - rsi_setupcard(adapter); - rsi_init_sdio_slave_regs(adapter); - sdio_claim_irq(pfunction, rsi_handle_interrupt); - rsi_hal_device_init(adapter); - - sdio_release_host(pfunction); - - return 0; -} - static int rsi_restore(struct device *dev) { struct sdio_func *pfunction = dev_to_sdio_func(dev); diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h index 8fbf90eb7b42..49c549ba6682 100644 --- a/drivers/net/wireless/rsi/rsi_sdio.h +++ b/drivers/net/wireless/rsi/rsi_sdio.h @@ -131,5 +131,4 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word); void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit); int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter); int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num); -int rsi_sdio_reinit_device(struct rsi_hw *adapter); #endif From e6b3b2ed3d270b3c7080c9cf7d28636dc74b0387 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Wed, 1 Nov 2017 17:42:45 +0530 Subject: [PATCH 1744/2177] rsi: fix kbuild reported build errors with CONFIG_PM off Some wowlan related code was outside CONFIG_PM flag which caused these build errors. They are fixed by moving that code under CONFIG_PM flag. Reported-by: kbuild test robot Fixes: ef71ed0608c ("rsi: sdio: Add WOWLAN support for S5 shutdown state") Fixes: a24e35fcee0 ("rsi: sdio: Add WOWLAN support for S4 hibernate state") Fixes: e1ced6422a3 ("rsi: sdio: add WOWLAN support for S3 suspend state") Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 5 ++++- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 2 ++ drivers/net/wireless/rsi/rsi_common.h | 2 ++ drivers/net/wireless/rsi/rsi_mgmt.h | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 36c63e953f84..32f5cb46fd4f 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1752,6 +1752,7 @@ static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw) return 0; } +#ifdef CONFIG_PM static const struct wiphy_wowlan_support rsi_wowlan_support = { .flags = WIPHY_WOWLAN_ANY | WIPHY_WOWLAN_MAGIC_PKT | @@ -1824,7 +1825,6 @@ int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan) } EXPORT_SYMBOL(rsi_config_wowlan); -#ifdef CONFIG_PM static int rsi_mac80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) { @@ -1977,7 +1977,10 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER; wiphy->reg_notifier = rsi_reg_notify; +#ifdef CONFIG_PM wiphy->wowlan = &rsi_wowlan_support; +#endif + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); /* Wi-Fi direct parameters */ diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index d38a09f15742..46c9d5470dfb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1597,6 +1597,7 @@ static int rsi_send_beacon(struct rsi_common *common) return 0; } +#ifdef CONFIG_PM int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, u16 sleep_status) { @@ -1630,6 +1631,7 @@ int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, return rsi_send_internal_mgmt_frame(common, skb); } +#endif /** * rsi_handle_ta_confirm_type() - This function handles the confirm frames. diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 70b8b4b49d04..d07dbba61727 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -83,7 +83,9 @@ u16 rsi_get_connected_channel(struct ieee80211_vif *vif); struct rsi_hw *rsi_91x_init(void); void rsi_91x_deinit(struct rsi_hw *adapter); int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len); +#ifdef CONFIG_PM int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan); +#endif struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr); struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac); void rsi_roc_timeout(struct timer_list *t); diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 76337ce817e0..389094a3f91c 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -668,8 +668,10 @@ int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); +#ifdef CONFIG_PM int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, u16 sleep_status); +#endif int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, struct ieee80211_vif *vif); #endif From 47d3d7ac656a1ffb9d0f0d3c845663ed6fd7e78d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 30 Oct 2017 14:16:00 -0700 Subject: [PATCH 1745/2177] ipv6: Implement limits on Hop-by-Hop and Destination options RFC 8200 (IPv6) defines Hop-by-Hop options and Destination options extension headers. Both of these carry a list of TLVs which is only limited by the maximum length of the extension header (2048 bytes). By the spec a host must process all the TLVs in these options, however these could be used as a fairly obvious denial of service attack. I think this could in fact be a significant DOS vector on the Internet, one mitigating factor might be that many FWs drop all packets with EH (and obviously this is only IPv6) so an Internet wide attack might not be so effective (yet!). By my calculation, the worse case packet with TLVs in a standard 1500 byte MTU packet that would be processed by the stack contains 1282 invidual TLVs (including pad TLVS) or 724 two byte TLVs. I wrote a quick test program that floods a whole bunch of these packets to a host and sure enough there is substantial time spent in ip6_parse_tlv. These packets contain nothing but unknown TLVS (that are ignored), TLV padding, and bogus UDP header with zero payload length. 25.38% [kernel] [k] __fib6_clean_all 21.63% [kernel] [k] ip6_parse_tlv 4.21% [kernel] [k] __local_bh_enable_ip 2.18% [kernel] [k] ip6_pol_route.isra.39 1.98% [kernel] [k] fib6_walk_continue 1.88% [kernel] [k] _raw_write_lock_bh 1.65% [kernel] [k] dst_release This patch adds configurable limits to Destination and Hop-by-Hop options. There are three limits that may be set: - Limit the number of options in a Hop-by-Hop or Destination options extension header. - Limit the byte length of a Hop-by-Hop or Destination options extension header. - Disallow unrecognized options in a Hop-by-Hop or Destination options extension header. The limits are set in corresponding sysctls: ipv6.sysctl.max_dst_opts_cnt ipv6.sysctl.max_hbh_opts_cnt ipv6.sysctl.max_dst_opts_len ipv6.sysctl.max_hbh_opts_len If a max_*_opts_cnt is less than zero then unknown TLVs are disallowed. The number of known TLVs that are allowed is the absolute value of this number. If a limit is exceeded when processing an extension header the packet is dropped. Default values are set to 8 for options counts, and set to INT_MAX for maximum length. Note the choice to limit options to 8 is an arbitrary guess (roughly based on the fact that the stack supports three HBH options and just one destination option). These limits have being proposed in draft-ietf-6man-rfc6434-bis. Tested (by Martin Lau) I tested out 1 thread (i.e. one raw_udp process). I changed the net.ipv6.max_dst_(opts|hbh)_number between 8 to 2048. With sysctls setting to 2048, the softirq% is packed to 100%. With 8, the softirq% is almost unnoticable from mpstat. v2; - Code and documention cleanup. - Change references of RFC2460 to be RFC8200. - Add reference to RFC6434-bis where the limits will be in standard. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 24 +++++++++ include/net/ipv6.h | 40 +++++++++++++++ include/net/netns/ipv6.h | 4 ++ net/ipv6/af_inet6.c | 4 ++ net/ipv6/exthdrs.c | 67 +++++++++++++++++++++----- net/ipv6/sysctl_net_ipv6.c | 32 ++++++++++++ 6 files changed, 159 insertions(+), 12 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 77f4de59dc9c..e6661b205f72 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1385,6 +1385,30 @@ mld_qrv - INTEGER Default: 2 (as specified by RFC3810 9.1) Minimum: 1 (as specified by RFC6636 4.5) +max_dst_opts_cnt - INTEGER + Maximum number of non-padding TLVs allowed in a Destination + options extension header. If this value is less than zero + then unknown options are disallowed and the number of known + TLVs allowed is the absolute value of this number. + Default: 8 + +max_hbh_opts_cnt - INTEGER + Maximum number of non-padding TLVs allowed in a Hop-by-Hop + options extension header. If this value is less than zero + then unknown options are disallowed and the number of known + TLVs allowed is the absolute value of this number. + Default: 8 + +max dst_opts_len - INTEGER + Maximum length allowed for a Destination options extension + header. + Default: INT_MAX (unlimited) + +max hbh_opts_len - INTEGER + Maximum length allowed for a Hop-by-Hop options extension + header. + Default: INT_MAX (unlimited) + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3cda3b521c36..fb6d67012de6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -51,6 +51,46 @@ #define IPV6_DEFAULT_HOPLIMIT 64 #define IPV6_DEFAULT_MCASTHOPS 1 +/* Limits on Hop-by-Hop and Destination options. + * + * Per RFC8200 there is no limit on the maximum number or lengths of options in + * Hop-by-Hop or Destination options other then the packet must fit in an MTU. + * We allow configurable limits in order to mitigate potential denial of + * service attacks. + * + * There are three limits that may be set: + * - Limit the number of options in a Hop-by-Hop or Destination options + * extension header + * - Limit the byte length of a Hop-by-Hop or Destination options extension + * header + * - Disallow unknown options + * + * The limits are expressed in corresponding sysctls: + * + * ipv6.sysctl.max_dst_opts_cnt + * ipv6.sysctl.max_hbh_opts_cnt + * ipv6.sysctl.max_dst_opts_len + * ipv6.sysctl.max_hbh_opts_len + * + * max_*_opts_cnt is the number of TLVs that are allowed for Destination + * options or Hop-by-Hop options. If the number is less than zero then unknown + * TLVs are disallowed and the number of known options that are allowed is the + * absolute value. Setting the value to INT_MAX indicates no limit. + * + * max_*_opts_len is the length limit in bytes of a Destination or + * Hop-by-Hop options extension header. Setting the value to INT_MAX + * indicates no length limit. + * + * If a limit is exceeded when processing an extension header the packet is + * silently discarded. + */ + +/* Default limits for Hop-by-Hop and Destination options */ +#define IP6_DEFAULT_MAX_DST_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8 +#define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ +#define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ + /* * Addr type * diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2ea1ed341ef8..600ba1c1befc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -37,6 +37,10 @@ struct netns_sysctl_ipv6 { int idgen_delay; int flowlabel_state_ranges; int flowlabel_reflect; + int max_dst_opts_cnt; + int max_hbh_opts_cnt; + int max_dst_opts_len; + int max_hbh_opts_len; }; struct netns_ipv6 { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fe5262fd6aa5..c26f71234b9c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -810,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; + net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT; + net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; + net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; + net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 9f918a770f87..83bd75713535 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -74,8 +74,20 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, + bool disallow_unknowns) { + if (disallow_unknowns) { + /* If unknown TLVs are disallowed by configuration + * then always silently drop packet. Note this also + * means no ICMP parameter problem is sent which + * could be a good property to mitigate a reflection DOS + * attack. + */ + + goto drop; + } + switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return true; @@ -95,20 +107,30 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) return false; } +drop: kfree_skb(skb); return false; } /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) +static bool ip6_parse_tlv(const struct tlvtype_proc *procs, + struct sk_buff *skb, + int max_count) { - const struct tlvtype_proc *curr; + int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - int len = (skb_transport_header(skb)[1] + 1) << 3; + const struct tlvtype_proc *curr; + bool disallow_unknowns = false; + int tlv_count = 0; int padlen = 0; + if (unlikely(max_count < 0)) { + disallow_unknowns = true; + max_count = -max_count; + } + if (skb_transport_offset(skb) + len > skb_headlen(skb)) goto bad; @@ -149,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) default: /* Other TLV code so scan list */ if (optlen > len) goto bad; + + tlv_count++; + if (tlv_count > max_count) + goto bad; + for (curr = procs; curr->type >= 0; curr++) { if (curr->type == nh[off]) { /* type specific length/alignment @@ -159,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) break; } } - if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) - return false; - } + if (curr->type < 0 && + !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) + return false; + padlen = 0; break; } @@ -258,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(skb->dev); + int extlen; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); +fail_and_free: kfree_skb(skb); return -1; } + extlen = (skb_transport_header(skb)[1] + 1) << 3; + if (extlen > net->ipv6.sysctl.max_dst_opts_len) + goto fail_and_free; + opt->lastopt = opt->dst1 = skb_network_header_len(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { - skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; + if (ip6_parse_tlv(tlvprocdestopt_lst, skb, + init_net.ipv6.sysctl.max_dst_opts_cnt)) { + skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; @@ -803,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + int extlen; /* * skb_network_header(skb) is equal to skb->data, and @@ -813,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + ((skb_transport_header(skb)[1] + 1) << 3)))) { +fail_and_free: kfree_skb(skb); return -1; } + extlen = (skb_transport_header(skb)[1] + 1) << 3; + if (extlen > net->ipv6.sysctl.max_hbh_opts_len) + goto fail_and_free; + opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { - skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; + if (ip6_parse_tlv(tlvprochopopt_lst, skb, + init_net.ipv6.sysctl.max_hbh_opts_cnt)) { + skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6fbf8ae5e52c..4a2f0fd870bc 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -97,6 +97,34 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "max_dst_opts_number", + .data = &init_net.ipv6.sysctl.max_dst_opts_cnt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_hbh_opts_number", + .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_dst_opts_length", + .data = &init_net.ipv6.sysctl.max_dst_opts_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_hbh_length", + .data = &init_net.ipv6.sysctl.max_hbh_opts_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -157,6 +185,10 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect; + ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt; + ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; + ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; + ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) From cf34ce3da1e41579296364509266c7dac573822a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 30 Oct 2017 14:41:35 -0700 Subject: [PATCH 1746/2177] tcp: add tracepoint trace_tcp_retransmit_synack() This tracepoint can be used to trace synack retransmits. It maintains pointer to struct request_sock. We cannot simply reuse trace_tcp_retransmit_skb() here, because the sk here is the LISTEN socket. The IP addresses and ports should be extracted from struct request_sock. Note that, like many other tracepoints, this patch uses IS_ENABLED in TP_fast_assign macro, which triggers sparse warning like: ./include/trace/events/tcp.h:274:1: error: directive in argument list ./include/trace/events/tcp.h:281:1: error: directive in argument list However, there is no good solution to avoid these warnings. To the best of our knowledge, these warnings are harmless. Signed-off-by: Song Liu Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_output.c | 1 + 2 files changed, 57 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 03699ba71623..07cccca6cbf1 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -237,6 +237,62 @@ TRACE_EVENT(tcp_set_state, show_tcp_state_name(__entry->newstate)) ); +TRACE_EVENT(tcp_retransmit_synack, + + TP_PROTO(const struct sock *sk, const struct request_sock *req), + + TP_ARGS(sk, req), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(const void *, req) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + ), + + TP_fast_assign( + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *pin6; + __be32 *p32; + + __entry->skaddr = sk; + __entry->req = req; + + __entry->sport = ireq->ir_num; + __entry->dport = ntohs(ireq->ir_rmt_port); + + p32 = (__be32 *) __entry->saddr; + *p32 = ireq->ir_loc_addr; + + p32 = (__be32 *) __entry->daddr; + *p32 = ireq->ir_rmt_addr; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->saddr_v6; + *pin6 = ireq->ir_v6_loc_addr; + pin6 = (struct in6_addr *)__entry->daddr_v6; + *pin6 = ireq->ir_v6_rmt_addr; + } else +#endif + { + pin6 = (struct in6_addr *)__entry->saddr_v6; + ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); + pin6 = (struct in6_addr *)__entry->daddr_v6; + ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); + } + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a85e8a282d17..06a0c89ffe40 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3782,6 +3782,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; + trace_tcp_retransmit_synack(sk, req); } return res; } From 25c5f715381e0f52993972567fae653b700126fa Mon Sep 17 00:00:00 2001 From: Felix Manlunas Date: Wed, 1 Nov 2017 18:14:49 -0700 Subject: [PATCH 1747/2177] liquidio: bump up driver version to 1.7.0 to match newer NIC firmware Signed-off-by: Felix Manlunas Acked-by: Derek Chickles Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/liquidio_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 3bcdda85e360..522dcc4dcff7 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -27,8 +27,8 @@ #define LIQUIDIO_PACKAGE "" #define LIQUIDIO_BASE_MAJOR_VERSION 1 -#define LIQUIDIO_BASE_MINOR_VERSION 6 -#define LIQUIDIO_BASE_MICRO_VERSION 1 +#define LIQUIDIO_BASE_MINOR_VERSION 7 +#define LIQUIDIO_BASE_MICRO_VERSION 0 #define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ __stringify(LIQUIDIO_BASE_MINOR_VERSION) #define LIQUIDIO_MICRO_VERSION "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) From 054287295b1132c8742ea55f8e3af9cbd630c932 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 2 Nov 2017 10:36:48 +0100 Subject: [PATCH 1748/2177] net: Define eth_stp_addr in linux/etherdevice.h The lan9303 driver defines eth_stp_addr as a synonym to eth_reserved_addr_base to get the STP ethernet address 01:80:c2:00:00:00. eth_reserved_addr_base is also used to define the start of Bridge Reserved ethernet address range, which happen to be the STP address. br_dev_setup refer to eth_reserved_addr_base as a definition of STP address. Clean up by: - Move the eth_stp_addr definition to linux/etherdevice.h - Use eth_stp_addr instead of eth_reserved_addr_base in br_dev_setup. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 2 -- include/linux/etherdevice.h | 1 + net/bridge/br_device.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index b2110e69630f..05d8d136baab 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -34,5 +34,3 @@ struct lan9303 { **/ struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; }; - -#define eth_stp_addr eth_reserved_addr_base diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2d9f80848d4b..263dbcad22fc 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -66,6 +66,7 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff); /* Reserved Ethernet Addresses per IEEE 802.1Q */ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +#define eth_stp_addr eth_reserved_addr_base /** * is_link_local_ether_addr - Determine if given Ethernet address is link-local diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 28bb22186fa0..af5b8c87f590 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -421,7 +421,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00; - ether_addr_copy(br->group_addr, eth_reserved_addr_base); + ether_addr_copy(br->group_addr, eth_stp_addr); br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; From 7cce782ef32f5003c18ab8f9f586f2ed5ce2c33e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 12:05:51 +0100 Subject: [PATCH 1749/2177] bpf: fix link error without CONFIG_NET I ran into this link error with the latest net-next plus linux-next trees when networking is disabled: kernel/bpf/verifier.o:(.rodata+0x2958): undefined reference to `tc_cls_act_analyzer_ops' kernel/bpf/verifier.o:(.rodata+0x2970): undefined reference to `xdp_analyzer_ops' It seems that the code was written to deal with varying contents of the arrray, but the actual #ifdef was missing. Both tc_cls_act_analyzer_ops and xdp_analyzer_ops are defined in the core networking code, so adding a check for CONFIG_NET seems appropriate here, and I've verified this with many randconfig builds Fixes: 4f9218aaf8a4 ("bpf: move knowledge about post-translation offsets out of verifier") Signed-off-by: Arnd Bergmann Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 530b68550fd2..5f3799dcba01 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4588,8 +4588,10 @@ err_free_env: } static const struct bpf_verifier_ops * const bpf_analyzer_ops[] = { +#ifdef CONFIG_NET [BPF_PROG_TYPE_XDP] = &xdp_analyzer_ops, [BPF_PROG_TYPE_SCHED_CLS] = &tc_cls_act_analyzer_ops, +#endif }; int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, From eba0c929d1d0f16c4b03628b7bf8ce363b9e5c9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 12:05:52 +0100 Subject: [PATCH 1750/2177] bpf: fix out-of-bounds access warning in bpf_check The bpf_verifer_ops array is generated dynamically and may be empty depending on configuration, which then causes an out of bounds access: kernel/bpf/verifier.c: In function 'bpf_check': kernel/bpf/verifier.c:4320:29: error: array subscript is above array bounds [-Werror=array-bounds] This adds a check to the start of the function as a workaround. I would assume that the function is never called in that configuration, so the warning is probably harmless. Fixes: 00176a34d9e2 ("bpf: remove the verifier ops from program structure") Signed-off-by: Arnd Bergmann Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5f3799dcba01..ab5aa5497666 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4474,6 +4474,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) struct bpf_verifer_log *log; int ret = -EINVAL; + /* no program is valid */ + if (ARRAY_SIZE(bpf_verifier_ops) == 0) + return -EINVAL; + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ From a882d20cdb7775ff8b4aac880255eff6a2c1c85e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Nov 2017 11:15:07 +0000 Subject: [PATCH 1751/2177] cxgb4: fix error return code in cxgb4_set_hash_filter() Fix to return a negative error code from thecxgb4_alloc_atid() error handling case instead of 0. Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Signed-off-by: Wei Yongjun Acked-By: Kumar Sanghvi Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index abab67d52edb..5980f308a253 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1110,8 +1110,10 @@ static int cxgb4_set_hash_filter(struct net_device *dev, } atid = cxgb4_alloc_atid(t, f); - if (atid < 0) + if (atid < 0) { + ret = atid; goto free_smt; + } iconf = adapter->params.tp.ingress_config; if (iconf & VNIC_F) { From 3ae6ec08292f01c6782d1a80be0b2cc675e0ecfc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:05 +0100 Subject: [PATCH 1752/2177] ipv4: Send a netevent whenever multipath hash policy is changed Devices performing IPv4 forwarding need to update their multipath hash policy whenever it is changed. Inform these devices by generating a netevent. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/netevent.h | 1 + net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/net/netevent.h b/include/net/netevent.h index f440df172b56..e3f0e8f2f6e8 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -25,6 +25,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ + NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4602af6d5358..8dcc2b185fcc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -25,6 +25,7 @@ #include #include #include +#include static int zero; static int one = 1; @@ -385,6 +386,23 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_fib_multipath_hash_policy); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + + return ret; +} +#endif + static struct ctl_table ipv4_table[] = { { .procname = "tcp_max_orphans", @@ -907,7 +925,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_fib_multipath_hash_policy, .extra1 = &zero, .extra2 = &one, }, From 48fac8852637f00abcb05b1af3489e3cd45cda58 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:06 +0100 Subject: [PATCH 1753/2177] mlxsw: spectrum_router: Embed netevent notifier block in router struct We are going to need to respond to netevents notifying us about multipath hash updates by configuring the device's hash parameters. Embed the netevent notifier in the router struct so that we could retrieve it upon notifications and use it to configure the device. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 ------- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 -- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3f4be9556e56..52f38b480669 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4574,10 +4574,6 @@ static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { .notifier_call = mlxsw_sp_inet6addr_event, }; -static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { - .notifier_call = mlxsw_sp_router_netevent_event, -}; - static const struct pci_device_id mlxsw_sp_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, {0, }, @@ -4596,7 +4592,6 @@ static int __init mlxsw_sp_module_init(void) register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); - register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) @@ -4611,7 +4606,6 @@ static int __init mlxsw_sp_module_init(void) err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); @@ -4623,7 +4617,6 @@ static void __exit mlxsw_sp_module_exit(void) { mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index aa0cefb25e18..b2393bb8cef9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -385,8 +385,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) /* spectrum_router.c */ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9fe4cdb23189..d49c1c92a0fa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -96,6 +96,7 @@ struct mlxsw_sp_router { struct list_head ipip_list; bool aborted; struct notifier_block fib_nb; + struct notifier_block netevent_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; @@ -2076,8 +2077,8 @@ out: kfree(neigh_work); } -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct mlxsw_sp_neigh_event_work *neigh_work; struct mlxsw_sp_port *mlxsw_sp_port; @@ -6720,6 +6721,12 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_neigh_init; + mlxsw_sp->router->netevent_nb.notifier_call = + mlxsw_sp_router_netevent_event; + err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); + if (err) + goto err_register_netevent_notifier; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -6729,6 +6736,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); +err_register_netevent_notifier: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); @@ -6754,6 +6763,7 @@ err_router_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); From ceb8881ddf6d79c257c916763b0e558d053b2560 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:07 +0100 Subject: [PATCH 1754/2177] mlxsw: spectrum_router: Properly name netevent work struct The struct containing the work item queued from the netevent handler is named after the only event it is currently used for, which is neighbour updates. Use a more appropriate name for the struct, as we are going to use it for more events. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d49c1c92a0fa..d5094b81adbf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2026,7 +2026,7 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } -struct mlxsw_sp_neigh_event_work { +struct mlxsw_sp_netevent_work { struct work_struct work; struct mlxsw_sp *mlxsw_sp; struct neighbour *n; @@ -2034,11 +2034,11 @@ struct mlxsw_sp_neigh_event_work { static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) { - struct mlxsw_sp_neigh_event_work *neigh_work = - container_of(work, struct mlxsw_sp_neigh_event_work, work); - struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - struct neighbour *n = neigh_work->n; + struct neighbour *n = net_work->n; unsigned char ha[ETH_ALEN]; bool entry_connected; u8 nud_state, dead; @@ -2074,13 +2074,13 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) out: rtnl_unlock(); neigh_release(n); - kfree(neigh_work); + kfree(net_work); } static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct mlxsw_sp_neigh_event_work *neigh_work; + struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp; unsigned long interval; @@ -2119,22 +2119,22 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, if (!mlxsw_sp_port) return NOTIFY_DONE; - neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); - if (!neigh_work) { + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_BAD; } - INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); - neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - neigh_work->n = n; + INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); + net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + net_work->n = n; /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - mlxsw_core_schedule_work(&neigh_work->work); + mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; } From e471859b72fc2d9fe0f3163a7c238029271259ec Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:08 +0100 Subject: [PATCH 1755/2177] mlxsw: reg: Add Router ECMP Configuration Register Version 2 The RECRv2 register is used for setting up the router's ECMP hash configuration. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 132 ++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index db6cd263dd61..5066553dd0b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5844,6 +5844,137 @@ static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); } +/* RECR-V2 - Router ECMP Configuration Version 2 Register + * ------------------------------------------------------ + */ +#define MLXSW_REG_RECR2_ID 0x8025 +#define MLXSW_REG_RECR2_LEN 0x38 + +MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN); + +/* reg_recr2_pp + * Per-port configuration + * Access: Index + */ +MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1); + +/* reg_recr2_sh + * Symmetric hash + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1); + +/* reg_recr2_seed + * Seed + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32); + +enum { + /* Enable IPv4 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3, + /* Enable IPv4 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4, + /* Enable IPv6 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5, + /* Enable IPv6 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6, + /* Enable TCP/UDP header fields if packet is IPv4 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7, + /* Enable TCP/UDP header fields if packet is IPv6 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8, +}; + +/* reg_recr2_outer_header_enables + * Bit mask where each bit enables a specific layer to be included in + * the hash calculation. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1); + +enum { + /* IPv4 Source IP */ + MLXSW_REG_RECR2_IPV4_SIP0 = 9, + MLXSW_REG_RECR2_IPV4_SIP3 = 12, + /* IPv4 Destination IP */ + MLXSW_REG_RECR2_IPV4_DIP0 = 13, + MLXSW_REG_RECR2_IPV4_DIP3 = 16, + /* IP Protocol */ + MLXSW_REG_RECR2_IPV4_PROTOCOL = 17, + /* IPv6 Source IP */ + MLXSW_REG_RECR2_IPV6_SIP0_7 = 21, + MLXSW_REG_RECR2_IPV6_SIP8 = 29, + MLXSW_REG_RECR2_IPV6_SIP15 = 36, + /* IPv6 Destination IP */ + MLXSW_REG_RECR2_IPV6_DIP0_7 = 37, + MLXSW_REG_RECR2_IPV6_DIP8 = 45, + MLXSW_REG_RECR2_IPV6_DIP15 = 52, + /* IPv6 Next Header */ + MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53, + /* IPv6 Flow Label */ + MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57, + /* TCP/UDP Source Port */ + MLXSW_REG_RECR2_TCP_UDP_SPORT = 74, + /* TCP/UDP Destination Port */ + MLXSW_REG_RECR2_TCP_UDP_DPORT = 75, +}; + +/* reg_recr2_outer_header_fields_enable + * Packet fields to enable for ECMP hash subject to outer_header_enable. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1); + +static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_SIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_SIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_DIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_DIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed) +{ + MLXSW_REG_ZERO(recr2, payload); + mlxsw_reg_recr2_pp_set(payload, false); + mlxsw_reg_recr2_sh_set(payload, true); + mlxsw_reg_recr2_seed_set(payload, seed); +} + /* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register * -------------------------------------------------------------- * The RMFT_V2 register is used to configure and query the multicast table. @@ -7313,6 +7444,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(raleu), MLXSW_REG(rauhtd), MLXSW_REG(rigr2), + MLXSW_REG(recr2), MLXSW_REG(rmft2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), From af658b6a0e6da7a9d9b82fa536d610a7457f37fd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:09 +0100 Subject: [PATCH 1756/2177] mlxsw: spectrum_router: Align multipath hash parameters with kernel's Up until now we used the hardware's defaults for multipath hash computation. This patch aligns the hardware's multipath parameters with the kernel's. For IPv4 packets, the parameters are determined according to the 'fib_multipath_hash_policy' sysctl during module initialization. In case L3-mode is requested, only the source and destination IP addresses are used. There is no special handling of ICMP error packets. In case L4-mode is requested, a 5-tuple is used: source and destination IP addresses, source and destination ports and IP protocol. Note that the layer 4 fields are not considered for fragmented packets. For IPv6 packets, the source and destination IP addresses are used, as well as the flow label and the next header fields. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d5094b81adbf..fe99d245dd91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -6644,6 +6645,64 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) mlxsw_sp_router_fib_flush(router->mlxsw_sp); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header) +{ + mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true); +} + +static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) +{ + mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); +} + +static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +{ + bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP); + mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl); + if (only_l3) + return; + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); +} + +static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +{ + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); + mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); +} + +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + char recr2_pl[MLXSW_REG_RECR2_LEN]; + u32 seed; + + get_random_bytes(&seed, sizeof(seed)); + mlxsw_reg_recr2_pack(recr2_pl, seed); + mlxsw_sp_mp4_hash_init(recr2_pl); + mlxsw_sp_mp6_hash_init(recr2_pl); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); +} +#else +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} +#endif + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -6727,6 +6786,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_register_netevent_notifier; + err = mlxsw_sp_mp_hash_init(mlxsw_sp); + if (err) + goto err_mp_hash_init; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -6736,6 +6799,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: +err_mp_hash_init: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: mlxsw_sp_neigh_fini(mlxsw_sp); From 28678f07f127d151354ff12b0d05557ae897e972 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 2 Nov 2017 17:14:10 +0100 Subject: [PATCH 1757/2177] mlxsw: spectrum_router: Update multipath hash parameters upon netevents Make sure the device and the kernel are performing the multipath hash according to the same parameters by updating the device whenever the relevant netevent is generated. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index fe99d245dd91..d657f01f2d79 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2078,15 +2078,29 @@ out: kfree(net_work); } -static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + + mlxsw_sp_mp_hash_init(mlxsw_sp); + kfree(net_work); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_router *router; struct mlxsw_sp *mlxsw_sp; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + struct net *net; switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -2138,6 +2152,21 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; + case NETEVENT_MULTIPATH_HASH_UPDATE: + net = ptr; + + if (!net_eq(net, &init_net)) + return NOTIFY_DONE; + + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) + return NOTIFY_BAD; + + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work); + net_work->mlxsw_sp = router->mlxsw_sp; + mlxsw_core_schedule_work(&net_work->work); + break; } return NOTIFY_DONE; From 88c1f37f05f30d74f56aec1f3bc10d238c9ba152 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Fri, 3 Nov 2017 11:51:10 +0530 Subject: [PATCH 1758/2177] net: bridge: Convert timers to use timer_setup() switch to using the new timer_setup() and from_timer() api's. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 79 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7947e0436e18..5f7f0e9d446c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head) kfree(mp); } -static void br_multicast_group_expired(unsigned long data) +static void br_multicast_group_expired(struct timer_list *t) { - struct net_bridge_mdb_entry *mp = (void *)data; + struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer); struct net_bridge *br = mp->br; struct net_bridge_mdb_htable *mdb; @@ -302,9 +302,9 @@ static void br_multicast_del_pg(struct net_bridge *br, WARN_ON(1); } -static void br_multicast_port_group_expired(unsigned long data) +static void br_multicast_port_group_expired(struct timer_list *t) { - struct net_bridge_port_group *pg = (void *)data; + struct net_bridge_port_group *pg = from_timer(pg, t, timer); struct net_bridge *br = pg->port->br; spin_lock(&br->multicast_lock); @@ -701,8 +701,7 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); + timer_setup(&mp->timer, br_multicast_group_expired, 0); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -729,8 +728,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->flags = flags; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); - setup_timer(&p->timer, br_multicast_port_group_expired, - (unsigned long)p); + timer_setup(&p->timer, br_multicast_port_group_expired, 0); if (src) memcpy(p->eth_addr, src, ETH_ALEN); @@ -843,9 +841,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, } #endif -static void br_multicast_router_expired(unsigned long data) +static void br_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = + from_timer(port, t, multicast_router_timer); struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); @@ -872,9 +871,9 @@ static void br_mc_router_state_change(struct net_bridge *p, switchdev_port_attr_set(p->dev, &attr); } -static void br_multicast_local_router_expired(unsigned long data) +static void br_multicast_local_router_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *)data; + struct net_bridge *br = from_timer(br, t, multicast_router_timer); spin_lock(&br->multicast_lock); if (br->multicast_router == MDB_RTR_TYPE_DISABLED || @@ -900,17 +899,17 @@ out: spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_querier_expired(unsigned long data) +static void br_ip4_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip4_other_query.timer); br_multicast_querier_expired(br, &br->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_querier_expired(unsigned long data) +static void br_ip6_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip6_other_query.timer); br_multicast_querier_expired(br, &br->ip6_own_query); } @@ -1011,17 +1010,17 @@ out: spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_port_query_expired(unsigned long data) +static void br_ip4_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer); br_multicast_port_query_expired(port, &port->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_port_query_expired(unsigned long data) +static void br_ip6_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer); br_multicast_port_query_expired(port, &port->ip6_own_query); } @@ -1043,13 +1042,13 @@ int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - setup_timer(&port->multicast_router_timer, br_multicast_router_expired, - (unsigned long)port); - setup_timer(&port->ip4_own_query.timer, - br_ip4_multicast_port_query_expired, (unsigned long)port); + timer_setup(&port->multicast_router_timer, + br_multicast_router_expired, 0); + timer_setup(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&port->ip6_own_query.timer, - br_ip6_multicast_port_query_expired, (unsigned long)port); + timer_setup(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, 0); #endif br_mc_disabled_update(port->dev, port->br->multicast_disabled); @@ -1933,17 +1932,17 @@ static void br_multicast_query_expired(struct net_bridge *br, spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_query_expired(unsigned long data) +static void br_ip4_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip4_own_query.timer); br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_query_expired(unsigned long data) +static void br_ip6_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip6_own_query.timer); br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); } @@ -1978,17 +1977,17 @@ void br_multicast_init(struct net_bridge *br) br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); - setup_timer(&br->multicast_router_timer, - br_multicast_local_router_expired, (unsigned long)br); - setup_timer(&br->ip4_other_query.timer, - br_ip4_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, - (unsigned long)br); + timer_setup(&br->multicast_router_timer, + br_multicast_local_router_expired, 0); + timer_setup(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, 0); + timer_setup(&br->ip4_own_query.timer, + br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&br->ip6_other_query.timer, - br_ip6_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, - (unsigned long)br); + timer_setup(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, 0); + timer_setup(&br->ip6_own_query.timer, + br_ip6_multicast_query_expired, 0); #endif } From 1a3deb11d66ec4509462bbe9f162fc4bb991b061 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Fri, 3 Nov 2017 11:51:11 +0530 Subject: [PATCH 1759/2177] net: bridge: Convert timers to use timer_setup() switch to using the new timer_setup() and from_timer() api's. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/bridge/br_stp_timer.c | 48 ++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index b54c1a331450..e7739de5f0e1 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br) return 0; } -static void br_hello_timer_expired(unsigned long arg) +static void br_hello_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *)arg; + struct net_bridge *br = from_timer(br, t, hello_timer); br_debug(br, "hello timer expired\n"); spin_lock(&br->lock); @@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_message_age_timer_expired(unsigned long arg) +static void br_message_age_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, message_age_timer); struct net_bridge *br = p->br; const bridge_id *id = &p->designated_bridge; int was_root; @@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_forward_delay_timer_expired(unsigned long arg) +static void br_forward_delay_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, forward_delay_timer); struct net_bridge *br = p->br; br_debug(br, "port %u(%s) forward delay timer\n", @@ -104,9 +104,9 @@ static void br_forward_delay_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_tcn_timer_expired(unsigned long arg) +static void br_tcn_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *) arg; + struct net_bridge *br = from_timer(br, t, tcn_timer); br_debug(br, "tcn timer expired\n"); spin_lock(&br->lock); @@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_topology_change_timer_expired(unsigned long arg) +static void br_topology_change_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *) arg; + struct net_bridge *br = from_timer(br, t, topology_change_timer); br_debug(br, "topo change timer expired\n"); spin_lock(&br->lock); @@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_hold_timer_expired(unsigned long arg) +static void br_hold_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, hold_timer); br_debug(p->br, "port %u(%s) hold timer expired\n", (unsigned int) p->port_no, p->dev->name); @@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg) void br_stp_timer_init(struct net_bridge *br) { - setup_timer(&br->hello_timer, br_hello_timer_expired, - (unsigned long) br); - - setup_timer(&br->tcn_timer, br_tcn_timer_expired, - (unsigned long) br); - - setup_timer(&br->topology_change_timer, - br_topology_change_timer_expired, - (unsigned long) br); + timer_setup(&br->hello_timer, br_hello_timer_expired, 0); + timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0); + timer_setup(&br->topology_change_timer, + br_topology_change_timer_expired, 0); } void br_stp_port_timer_init(struct net_bridge_port *p) { - setup_timer(&p->message_age_timer, br_message_age_timer_expired, - (unsigned long) p); - - setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired, - (unsigned long) p); - - setup_timer(&p->hold_timer, br_hold_timer_expired, - (unsigned long) p); + timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0); + timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0); + timer_setup(&p->hold_timer, br_hold_timer_expired, 0); } /* Report ticks left (in USER_HZ) used for API */ From 5a6d80034471d4407052c4bf3758071df5cadf33 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Nov 2017 11:15:28 +0000 Subject: [PATCH 1760/2177] net/ncsi: Make local function ncsi_get_filter() static Fixes the following sparse warnings: net/ncsi/ncsi-manage.c:41:5: warning: symbol 'ncsi_get_filter' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 28c42b22b748..47baf914eec2 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table) return sizes[table]; } -u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) +static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) { struct ncsi_channel_filter *ncf; int size; From 36bf994a80571aeee2549db1bc93e34342f40c24 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 2 Nov 2017 19:26:58 +0530 Subject: [PATCH 1761/2177] cxgb4: add new T6 pci device id's Add 0x6086 T6 device id. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 8c22bb8c9fbf..60cf9e02de5d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -205,6 +205,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */ + CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ From 9a7b96b3462679a2fcf7205d396dbf1f8f28454c Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 2 Nov 2017 19:28:20 +0530 Subject: [PATCH 1762/2177] cxgb4vf: define get_fecparam ethtool callback Add support to new ethtool operation get_fecparam to fetch FEC parameters. Original Work by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 8996ebbd222e..b48361cfdc78 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1401,6 +1401,63 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev, return 0; } +/* Translate the Firmware FEC value into the ethtool value. */ +static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec) +{ + unsigned int eth_fec = 0; + + if (fw_fec & FW_PORT_CAP32_FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +/* Translate Common Code FEC value into ethtool value. */ +static inline unsigned int cc_to_eth_fec(unsigned int cc_fec) +{ + unsigned int eth_fec = 0; + + if (cc_fec & FEC_AUTO) + eth_fec |= ETHTOOL_FEC_AUTO; + if (cc_fec & FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (cc_fec & FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +static int cxgb4vf_get_fecparam(struct net_device *dev, + struct ethtool_fecparam *fec) +{ + const struct port_info *pi = netdev_priv(dev); + const struct link_config *lc = &pi->link_cfg; + + /* Translate the Firmware FEC Support into the ethtool value. We + * always support IEEE 802.3 "automatic" selection of Link FEC type if + * any FEC is supported. + */ + fec->fec = fwcap_to_eth_fec(lc->pcaps); + if (fec->fec != ETHTOOL_FEC_OFF) + fec->fec |= ETHTOOL_FEC_AUTO; + + /* Translate the current internal FEC parameters into the + * ethtool values. + */ + fec->active_fec = cc_to_eth_fec(lc->fec); + return 0; +} + /* * Return our driver information. */ @@ -1774,6 +1831,7 @@ static void cxgb4vf_get_wol(struct net_device *dev, static const struct ethtool_ops cxgb4vf_ethtool_ops = { .get_link_ksettings = cxgb4vf_get_link_ksettings, + .get_fecparam = cxgb4vf_get_fecparam, .get_drvinfo = cxgb4vf_get_drvinfo, .get_msglevel = cxgb4vf_get_msglevel, .set_msglevel = cxgb4vf_set_msglevel, From 4bb1b116b7f37a64c08d28213a2e6f87fcef2d8b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 2 Nov 2017 15:07:01 +0100 Subject: [PATCH 1763/2177] net: sched: move block offload unbind after all chains are flushed Currently, the offload unbind is done before the chains are flushed. That causes driver to unregister block callback before it can get all the callback calls done during flush, leaving the offloaded tps inside the HW. So fix the order to prevent this situation and restore the original behaviour. Reported-by: Alexander Duyck Reported-by: Jakub Kicinski Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a26c690b48ac..3364347bc699 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -343,11 +343,11 @@ void tcf_block_put_ext(struct tcf_block *block, if (!block) return; - tcf_block_offload_unbind(block, q, ei); - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_flush(chain); + tcf_block_offload_unbind(block, q, ei); + INIT_WORK(&block->work, tcf_block_put_final); /* Wait for existing RCU callbacks to cool down, make sure their works * have been queued before this. We can not flush pending works here From fa36882682db0692ecbea20f859180f978923d72 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 2 Nov 2017 15:44:15 +0100 Subject: [PATCH 1764/2177] tipc: eliminate unnecessary probing The neighbor monitor employs a threshold, default set to 32 peer nodes, where it activates the "Overlapping Neighbor Monitoring" algorithm. Below that threshold, monitoring is full-mesh, and no "domain records" are passed between the nodes. Because of this, a node never received a peer's ack that it has received the most recent update of the own domain. Hence, the field 'acked_gen' in struct tipc_monitor_state remains permamently at zero, whereas the own domain generation is incremented for each added or removed peer. This has the effect that the function tipc_mon_get_state() always sets the field 'probing' in struct tipc_monitor_state true, again leading the tipc_link_timeout() of the link in question to always send out a probe, even when link->silent_intv_count is zero. This is functionally harmless, but leads to some unncessary probing, which can easily be eliminated by setting the 'probing' field of the said struct correctly in such cases. At the same time, we explictly invalidate the sent domain records when the algorithm is not activated. This will eliminate any risk that an invalid domain record might be inadverently accepted by the peer. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/monitor.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index b9c32557d73c..8e884ed06d4b 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen, u16 gen = mon->dom_gen; u16 len; - if (!tipc_mon_is_active(net, mon)) + /* Send invalid record if not active */ + if (!tipc_mon_is_active(net, mon)) { + dom->len = 0; return; + } /* Send only a dummy record with ack if peer has acked our last sent */ if (likely(state->acked_gen == gen)) { @@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr, struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; + if (!tipc_mon_is_active(net, mon)) { + state->probing = false; + state->monitoring = true; + return; + } + /* Used cached state if table has not changed */ if (!state->probing && (state->list_gen == mon->list_gen) && From 8c01c4f896aa3404af948880dcb29a2d51c833dc Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 2 Nov 2017 11:18:01 -0400 Subject: [PATCH 1765/2177] bpf: fix verifier NULL pointer dereference do_check() can fail early without allocating env->cur_state under memory pressure. Syzkaller found the stack below on the linux-next tree because of this. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 27062 Comm: syz-executor5 Not tainted 4.14.0-rc7+ #106 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801c2c74700 task.stack: ffff8801c3e28000 RIP: 0010:free_verifier_state kernel/bpf/verifier.c:347 [inline] RIP: 0010:bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 RSP: 0018:ffff8801c3e2f5c8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 00000000fffffff4 RCX: 0000000000000000 RDX: 0000000000000070 RSI: ffffffff817d5aa9 RDI: 0000000000000380 RBP: ffff8801c3e2f668 R08: 0000000000000000 R09: 1ffff100387c5d9f R10: 00000000218c4e80 R11: ffffffff85b34380 R12: ffff8801c4dc6a28 R13: 0000000000000000 R14: ffff8801c4dc6a00 R15: ffff8801c4dc6a20 FS: 00007f311079b700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004d4a24 CR3: 00000001cbcd0000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bpf_prog_load+0xcbb/0x18e0 kernel/bpf/syscall.c:1166 SYSC_bpf kernel/bpf/syscall.c:1690 [inline] SyS_bpf+0xae9/0x4620 kernel/bpf/syscall.c:1652 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x452869 RSP: 002b:00007f311079abe8 EFLAGS: 00000212 ORIG_RAX: 0000000000000141 RAX: ffffffffffffffda RBX: 0000000000758020 RCX: 0000000000452869 RDX: 0000000000000030 RSI: 0000000020168000 RDI: 0000000000000005 RBP: 00007f311079aa20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: 00000000004b7550 R13: 00007f311079ab58 R14: 00000000004b7560 R15: 0000000000000000 Code: df 48 c1 ea 03 80 3c 02 00 0f 85 e6 0b 00 00 4d 8b 6e 20 48 b8 00 00 00 00 00 fc ff df 49 8d bd 80 03 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 b6 0b 00 00 49 8b bd 80 03 00 00 e8 d6 0c 26 RIP: free_verifier_state kernel/bpf/verifier.c:347 [inline] RSP: ffff8801c3e2f5c8 RIP: bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 RSP: ffff8801c3e2f5c8 ---[ end trace c8d37f339dc64004 ]--- Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Fixes: 1969db47f8d0 ("bpf: fix verifier memory leaks") Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ab5aa5497666..04357ad5a812 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4534,8 +4534,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; + if (env->cur_state) { + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + } skip_full_check: while (!pop_stack(env, NULL, NULL)); @@ -4643,8 +4645,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; + if (env->cur_state) { + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + } skip_full_check: while (!pop_stack(env, NULL, NULL)); From 9eba9353388da7bfac9694dfe94c15365d49ebd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Nov 2017 11:53:04 -0700 Subject: [PATCH 1766/2177] tcp: fix a lockdep issue in tcp_fastopen_reset_cipher() icsk_accept_queue.fastopenq.lock is only fully initialized at listen() time. LOCKDEP is not happy if we attempt a spin_lock_bh() on it, because of missing annotation. (Although kernel runs just fine) Lets use net->ipv4.tcp_fastopen_ctx_lock to protect ctx access. Fixes: 1fba70e5b6be ("tcp: socket option to set TCP fast open key") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Christoph Paasch Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e0a4b56644aa..91762be58acc 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -92,20 +92,18 @@ error: kfree(ctx); memcpy(ctx->key, key, len); + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - spin_lock_bh(&q->lock); octx = rcu_dereference_protected(q->ctx, - lockdep_is_held(&q->lock)); + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); rcu_assign_pointer(q->ctx, ctx); - spin_unlock_bh(&q->lock); } else { - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); } + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); From 53b3847be5cbdf819cf366ea9160369ff00dcf8e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Nov 2017 20:04:12 +0000 Subject: [PATCH 1767/2177] net: sched: cls_bpf: use bitwise & rather than logical && on gen_flags Currently gen_flags is being operated on by a logical && operator rather than a bitwise & operator. This looks incorrect as these should be bit flag operations. Fix this. Detected by CoverityScan, CID#1460305 ("Logical vs. bitwise operator") Fixes: 3f7889c4c79b ("net: sched: cls_bpf: call block callbacks for offload) Signed-off-by: Colin Ian King Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5f701c8670a2..bc3edde1b9d7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -174,7 +174,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, } } - if (addorrep && skip_sw && !(prog->gen_flags && TCA_CLS_FLAGS_IN_HW)) + if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) return -EINVAL; return 0; From 5212dfa3ea591df2dc1520675622856ac0bb63e4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 2 Nov 2017 16:14:14 -0500 Subject: [PATCH 1768/2177] ISDN: eicon: message: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114780 Addresses-Coverity-ID: 114781 Addresses-Coverity-ID: 114782 Addresses-Coverity-ID: 114783 Addresses-Coverity-ID: 114784 Addresses-Coverity-ID: 114785 Addresses-Coverity-ID: 114786 Addresses-Coverity-ID: 114787 Addresses-Coverity-ID: 114788 Addresses-Coverity-ID: 114789 Addresses-Coverity-ID: 114790 Addresses-Coverity-ID: 114791 Addresses-Coverity-ID: 114792 Addresses-Coverity-ID: 114793 Addresses-Coverity-ID: 114794 Addresses-Coverity-ID: 114795 Addresses-Coverity-ID: 200521 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/isdn/hardware/eicon/message.c | 70 ++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index eadd1ed1e014..def7992a38e6 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -4501,6 +4501,7 @@ static void control_rc(PLCI *plci, byte req, byte rc, byte ch, byte global_req, plci->channels++; a->ncci_state[ncci] = OUTG_CON_PENDING; } + /* fall through */ default: if (plci->internal_command_queue[0]) @@ -7020,6 +7021,7 @@ static void nl_ind(PLCI *plci) plci->NL.RNum = 1; return; } + /* fall through */ case N_BDATA: case N_DATA: if (((a->ncci_state[ncci] != CONNECTED) && (plci->B2_prot == 1)) /* transparent */ @@ -9626,9 +9628,9 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) { case DTMF_LISTEN_TONE_START: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_LISTEN_MF_START: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_LISTEN_START: switch (internal_command) @@ -9636,6 +9638,7 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) default: adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities | B1_FACILITY_DTMFR), DTMF_COMMAND_1); + /* fall through */ case DTMF_COMMAND_1: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -9646,6 +9649,7 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) } if (plci->internal_command) return; + /* fall through */ case DTMF_COMMAND_2: if (plci_nl_busy(plci)) { @@ -9673,9 +9677,9 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) case DTMF_LISTEN_TONE_STOP: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_LISTEN_MF_STOP: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_LISTEN_STOP: switch (internal_command) @@ -9710,6 +9714,7 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) */ adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities & ~(B1_FACILITY_DTMFX | B1_FACILITY_DTMFR)), DTMF_COMMAND_3); + /* fall through */ case DTMF_COMMAND_3: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -9726,9 +9731,9 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) case DTMF_SEND_TONE: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_SEND_MF: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_DIGITS_SEND: switch (internal_command) @@ -9737,6 +9742,7 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities | ((plci->dtmf_parameter_length != 0) ? B1_FACILITY_DTMFX | B1_FACILITY_DTMFR : B1_FACILITY_DTMFX)), DTMF_COMMAND_1); + /* fall through */ case DTMF_COMMAND_1: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -9747,6 +9753,7 @@ static void dtmf_command(dword Id, PLCI *plci, byte Rc) } if (plci->internal_command) return; + /* fall through */ case DTMF_COMMAND_2: if (plci_nl_busy(plci)) { @@ -9863,7 +9870,7 @@ static byte dtmf_request(dword Id, word Number, DIVA_CAPI_ADAPTER *a, PLCI *plci case DTMF_LISTEN_TONE_START: case DTMF_LISTEN_TONE_STOP: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_LISTEN_MF_START: case DTMF_LISTEN_MF_STOP: mask <<= 1; @@ -9875,6 +9882,7 @@ static byte dtmf_request(dword Id, word Number, DIVA_CAPI_ADAPTER *a, PLCI *plci PUT_WORD(&result[1], DTMF_UNKNOWN_REQUEST); break; } + /* fall through */ case DTMF_LISTEN_START: case DTMF_LISTEN_STOP: @@ -9904,7 +9912,7 @@ static byte dtmf_request(dword Id, word Number, DIVA_CAPI_ADAPTER *a, PLCI *plci case DTMF_SEND_TONE: - mask <<= 1; + mask <<= 1; /* fall through */ case DTMF_SEND_MF: mask <<= 1; if (!((plci->requested_options_conn | plci->requested_options | plci->adapter->requested_options_table[appl->Id - 1]) @@ -9915,6 +9923,7 @@ static byte dtmf_request(dword Id, word Number, DIVA_CAPI_ADAPTER *a, PLCI *plci PUT_WORD(&result[1], DTMF_UNKNOWN_REQUEST); break; } + /* fall through */ case DTMF_DIGITS_SEND: if (api_parse(&msg[1].info[1], msg[1].length, "wwws", dtmf_parms)) @@ -11315,6 +11324,7 @@ static word mixer_restore_config(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_MIXER_5; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_MIXER_2: case ADJUST_B_RESTORE_MIXER_3: case ADJUST_B_RESTORE_MIXER_4: @@ -11344,10 +11354,12 @@ static word mixer_restore_config(dword Id, PLCI *plci, byte Rc) plci->internal_command = plci->adjust_b_command; break; } + /* fall through */ case ADJUST_B_RESTORE_MIXER_5: xconnect_write_coefs(plci, plci->adjust_b_command); plci->adjust_b_state = ADJUST_B_RESTORE_MIXER_6; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_MIXER_6: if (!xconnect_write_coefs_process(Id, plci, Rc)) { @@ -11392,6 +11404,7 @@ static void mixer_command(dword Id, PLCI *plci, byte Rc) adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities | B1_FACILITY_MIXER), MIXER_COMMAND_1); } + /* fall through */ case MIXER_COMMAND_1: if (plci->li_channel_bits & LI_CHANNEL_INVOLVED) { @@ -11419,6 +11432,7 @@ static void mixer_command(dword Id, PLCI *plci, byte Rc) mixer_indication_coefs_set(Id, plci); } while (plci->li_plci_b_read_pos != plci->li_plci_b_req_pos); } + /* fall through */ case MIXER_COMMAND_2: if ((plci->li_channel_bits & LI_CHANNEL_INVOLVED) || ((get_b1_facilities(plci, plci->B1_resource) & B1_FACILITY_MIXER) @@ -11450,6 +11464,7 @@ static void mixer_command(dword Id, PLCI *plci, byte Rc) adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities & ~B1_FACILITY_MIXER), MIXER_COMMAND_3); } + /* fall through */ case MIXER_COMMAND_3: if (!(plci->li_channel_bits & LI_CHANNEL_INVOLVED)) { @@ -12602,6 +12617,7 @@ static void ec_command(dword Id, PLCI *plci, byte Rc) default: adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities | B1_FACILITY_EC), EC_COMMAND_1); + /* fall through */ case EC_COMMAND_1: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -12612,6 +12628,7 @@ static void ec_command(dword Id, PLCI *plci, byte Rc) } if (plci->internal_command) return; + /* fall through */ case EC_COMMAND_2: if (plci->sig_req) { @@ -12650,6 +12667,7 @@ static void ec_command(dword Id, PLCI *plci, byte Rc) return; } Rc = OK; + /* fall through */ case EC_COMMAND_2: if ((Rc != OK) && (Rc != OK_FC)) { @@ -12660,6 +12678,7 @@ static void ec_command(dword Id, PLCI *plci, byte Rc) } adjust_b1_resource(Id, plci, NULL, (word)(plci->B1_facilities & ~B1_FACILITY_EC), EC_COMMAND_3); + /* fall through */ case EC_COMMAND_3: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -13485,6 +13504,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SAVE_MIXER_1; Rc = OK; + /* fall through */ case ADJUST_B_SAVE_MIXER_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SAVE) { @@ -13496,6 +13516,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SAVE_DTMF_1; Rc = OK; + /* fall through */ case ADJUST_B_SAVE_DTMF_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SAVE) { @@ -13506,6 +13527,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_REMOVE_L23_1; + /* fall through */ case ADJUST_B_REMOVE_L23_1: if ((plci->adjust_b_mode & ADJUST_B_MODE_REMOVE_L23) && plci->NL.Id && !plci->nl_remove_id) @@ -13530,6 +13552,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_REMOVE_L23_2; Rc = OK; + /* fall through */ case ADJUST_B_REMOVE_L23_2: if ((Rc != OK) && (Rc != OK_FC)) { @@ -13548,6 +13571,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SAVE_EC_1; Rc = OK; + /* fall through */ case ADJUST_B_SAVE_EC_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SAVE) { @@ -13559,6 +13583,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SAVE_DTMF_PARAMETER_1; Rc = OK; + /* fall through */ case ADJUST_B_SAVE_DTMF_PARAMETER_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SAVE) { @@ -13570,6 +13595,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SAVE_VOICE_1; Rc = OK; + /* fall through */ case ADJUST_B_SAVE_VOICE_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SAVE) { @@ -13578,6 +13604,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) break; } plci->adjust_b_state = ADJUST_B_SWITCH_L1_1; + /* fall through */ case ADJUST_B_SWITCH_L1_1: if (plci->adjust_b_mode & ADJUST_B_MODE_SWITCH_L1) { @@ -13608,6 +13635,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_SWITCH_L1_2; Rc = OK; + /* fall through */ case ADJUST_B_SWITCH_L1_2: if ((Rc != OK) && (Rc != OK_FC)) { @@ -13619,6 +13647,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_VOICE_1; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_VOICE_1: case ADJUST_B_RESTORE_VOICE_2: if (plci->adjust_b_mode & ADJUST_B_MODE_RESTORE) @@ -13629,6 +13658,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_DTMF_PARAMETER_1; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_DTMF_PARAMETER_1: case ADJUST_B_RESTORE_DTMF_PARAMETER_2: if (plci->adjust_b_mode & ADJUST_B_MODE_RESTORE) @@ -13641,6 +13671,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_EC_1; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_EC_1: case ADJUST_B_RESTORE_EC_2: if (plci->adjust_b_mode & ADJUST_B_MODE_RESTORE) @@ -13652,6 +13683,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_ASSIGN_L23_1; + /* fall through */ case ADJUST_B_ASSIGN_L23_1: if (plci->adjust_b_mode & ADJUST_B_MODE_ASSIGN_L23) { @@ -13681,6 +13713,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_ASSIGN_L23_2; Rc = ASSIGN_OK; + /* fall through */ case ADJUST_B_ASSIGN_L23_2: if ((Rc != OK) && (Rc != OK_FC) && (Rc != ASSIGN_OK)) { @@ -13703,6 +13736,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) break; } plci->adjust_b_state = ADJUST_B_CONNECT_1; + /* fall through */ case ADJUST_B_CONNECT_1: if (plci->adjust_b_mode & ADJUST_B_MODE_CONNECT) { @@ -13716,6 +13750,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_DTMF_1; Rc = OK; + /* fall through */ case ADJUST_B_CONNECT_2: case ADJUST_B_CONNECT_3: case ADJUST_B_CONNECT_4: @@ -13751,6 +13786,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) break; } Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_DTMF_1: case ADJUST_B_RESTORE_DTMF_2: if (plci->adjust_b_mode & ADJUST_B_MODE_RESTORE) @@ -13763,6 +13799,7 @@ static word adjust_b_process(dword Id, PLCI *plci, byte Rc) } plci->adjust_b_state = ADJUST_B_RESTORE_MIXER_1; Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_MIXER_1: case ADJUST_B_RESTORE_MIXER_2: case ADJUST_B_RESTORE_MIXER_3: @@ -13827,6 +13864,7 @@ static void adjust_b_restore(dword Id, PLCI *plci, byte Rc) break; } Rc = OK; + /* fall through */ case ADJUST_B_RESTORE_1: if ((Rc != OK) && (Rc != OK_FC)) { @@ -13841,6 +13879,7 @@ static void adjust_b_restore(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: Adjust B restore...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case ADJUST_B_RESTORE_2: if (adjust_b_process(Id, plci, Rc) != GOOD) { @@ -13877,6 +13916,7 @@ static void reset_b3_command(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: Reset B3...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case RESET_B3_COMMAND_1: Info = adjust_b_process(Id, plci, Rc); if (Info != GOOD) @@ -13930,6 +13970,7 @@ static void select_b_command(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: Select B protocol...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case SELECT_B_COMMAND_1: Info = adjust_b_process(Id, plci, Rc); if (Info != GOOD) @@ -13965,7 +14006,7 @@ static void fax_connect_ack_command(dword Id, PLCI *plci, byte Rc) switch (internal_command) { default: - plci->command = 0; + plci->command = 0; /* fall through */ case FAX_CONNECT_ACK_COMMAND_1: if (plci_nl_busy(plci)) { @@ -14013,6 +14054,7 @@ static void fax_edata_ack_command(dword Id, PLCI *plci, byte Rc) { default: plci->command = 0; + /* fall through */ case FAX_EDATA_ACK_COMMAND_1: if (plci_nl_busy(plci)) { @@ -14052,7 +14094,7 @@ static void fax_connect_info_command(dword Id, PLCI *plci, byte Rc) switch (internal_command) { default: - plci->command = 0; + plci->command = 0; /* fall through */ case FAX_CONNECT_INFO_COMMAND_1: if (plci_nl_busy(plci)) { @@ -14112,6 +14154,7 @@ static void fax_adjust_b23_command(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: FAX adjust B23...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case FAX_ADJUST_B23_COMMAND_1: Info = adjust_b_process(Id, plci, Rc); if (Info != GOOD) @@ -14122,6 +14165,7 @@ static void fax_adjust_b23_command(dword Id, PLCI *plci, byte Rc) } if (plci->internal_command) return; + /* fall through */ case FAX_ADJUST_B23_COMMAND_2: if (plci_nl_busy(plci)) { @@ -14194,7 +14238,7 @@ static void rtp_connect_b3_req_command(dword Id, PLCI *plci, byte Rc) switch (internal_command) { default: - plci->command = 0; + plci->command = 0; /* fall through */ case RTP_CONNECT_B3_REQ_COMMAND_1: if (plci_nl_busy(plci)) { @@ -14245,7 +14289,7 @@ static void rtp_connect_b3_res_command(dword Id, PLCI *plci, byte Rc) switch (internal_command) { default: - plci->command = 0; + plci->command = 0; /* fall through */ case RTP_CONNECT_B3_RES_COMMAND_1: if (plci_nl_busy(plci)) { @@ -14310,6 +14354,7 @@ static void hold_save_command(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: HOLD save...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case HOLD_SAVE_COMMAND_1: Info = adjust_b_process(Id, plci, Rc); if (Info != GOOD) @@ -14349,6 +14394,7 @@ static void retrieve_restore_command(dword Id, PLCI *plci, byte Rc) plci->adjust_b_state = ADJUST_B_START; dbug(1, dprintf("[%06lx] %s,%d: RETRIEVE restore...", UnMapId(Id), (char *)(FILE_), __LINE__)); + /* fall through */ case RETRIEVE_RESTORE_COMMAND_1: Info = adjust_b_process(Id, plci, Rc); if (Info != GOOD) From c509a8229d8df29c8308c4b03a1f6d69eb287acd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Nov 2017 16:18:07 -0700 Subject: [PATCH 1769/2177] mISDN: hfcpci: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Karsten Keil Cc: "David S. Miller" Cc: Arvind Yadav Cc: Geliang Tang Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcpci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index d2e401a8090e..e4ebbee863a1 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -2265,7 +2265,7 @@ static struct pci_driver hfc_driver = { }; static int -_hfcpci_softirq(struct device *dev, void *arg) +_hfcpci_softirq(struct device *dev, void *unused) { struct hfc_pci *hc = dev_get_drvdata(dev); struct bchannel *bch; @@ -2290,9 +2290,9 @@ _hfcpci_softirq(struct device *dev, void *arg) } static void -hfcpci_softirq(void *arg) +hfcpci_softirq(struct timer_list *unused) { - WARN_ON_ONCE(driver_for_each_device(&hfc_driver.driver, NULL, arg, + WARN_ON_ONCE(driver_for_each_device(&hfc_driver.driver, NULL, NULL, _hfcpci_softirq) != 0); /* if next event would be in the past ... */ @@ -2327,9 +2327,7 @@ HFC_init(void) if (poll != HFCPCI_BTRANS_THRESHOLD) { printk(KERN_INFO "%s: Using alternative poll value of %d\n", __func__, poll); - hfc_tl.function = (void *)hfcpci_softirq; - hfc_tl.data = 0; - init_timer(&hfc_tl); + timer_setup(&hfc_tl, hfcpci_softirq, 0); hfc_tl.expires = jiffies + tics; hfc_jiffies = hfc_tl.expires; add_timer(&hfc_tl); From f67971e683e81d7ba4739728511ae6e52a1b6321 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Nov 2017 18:10:03 -0700 Subject: [PATCH 1770/2177] tcp: tcp_fragment() should not assume rtx skbs While stress testing MTU probing, we had crashes in list_del() that we root-caused to the fact that tcp_fragment() is unconditionally inserting the freshly allocated skb into tsorted_sent_queue list. But this list is supposed to contain skbs that were sent. This was mostly harmless until MTU probing was enabled. Fortunately we can use the tcp_queue enum added later (but in same linux version) for rtx-rb-tree to fix the bug. Fixes: e2080072ed2d ("tcp: new list for sent but unacked skbs for RACK recovery") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Soheil Hassas Yeganeh Cc: Alexei Starovoitov Cc: Priyaranjan Jha Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 06a0c89ffe40..822962ece284 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1395,7 +1395,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, /* Link BUFF into the send queue. */ __skb_header_release(buff); tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); - list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); + if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE) + list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); return 0; } From b67ce55dbd964f1e80652007bfb83d09c393300d Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 15 Oct 2017 13:52:54 +0300 Subject: [PATCH 1771/2177] iwlwifi: mvm: use RS macro instead of duplicating the code There is a macro for converting TX response rate to a rate scale value, use it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 00a0efab20e3..d88c3685a6dd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1594,8 +1594,7 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm, mvmsta->tid_data[tid].tx_time = le16_to_cpu(tx_resp->wireless_media_time); mvmsta->tid_data[tid].lq_color = - (tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >> - TX_RES_RATE_TABLE_COLOR_POS; + TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info); } rcu_read_unlock(); From 0ec9257b0a2ceb4eeec552684b390b06705bbad7 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 16 Oct 2017 09:45:10 +0300 Subject: [PATCH 1772/2177] iwlwifi: mvm: cleanup references to aggregation count limit Currently the code is mixing defines and is inconsistent. When enabling a queue, we usually configure the scheduler with IWL_FRAME_LIMIT - 64. When sending to firmware the rate scaling, we limit aggregation to LINK_QUAL_AGG_FRAME_LIMIT_DEF - 63, due to a scheduler bug. Given that, clean up the following: - Fix a stray queue enablement with LINK_QUAL_AGG_FRAME_LIMIT_DEF. - Change the comparison that tests if queue needs to be reconfigured to be compared directly to how it was configured. This also saves the redundant round down of the buffer size just for the sake of comparing it, making the code more readable. - Better document gen2 logic Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 23787cc9c89e..c8febde2dffb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -644,8 +644,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, /* Redirect to lower AC */ iwl_mvm_reconfig_scd(mvm, queue, iwl_mvm_ac_to_tx_fifo[ac], - cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, - ssn); + cmd.sta_id, tid, IWL_FRAME_LIMIT, ssn); /* Update AC marking of the queue */ spin_lock_bh(&mvm->queue_info_lock); @@ -2544,12 +2543,6 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, BUILD_BUG_ON((sizeof(mvmsta->agg_tids) * BITS_PER_BYTE) != IWL_MAX_TID_COUNT); - if (!mvm->trans->cfg->gen2) - buf_size = min_t(int, buf_size, LINK_QUAL_AGG_FRAME_LIMIT_DEF); - else - buf_size = min_t(int, buf_size, - LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF); - spin_lock_bh(&mvmsta->lock); ssn = tid_data->ssn; queue = tid_data->txq_id; @@ -2561,10 +2554,17 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (iwl_mvm_has_new_tx_api(mvm)) { /* - * If no queue iwl_mvm_sta_tx_agg_start() would have failed so - * no need to check queue's status + * If there is no queue for this tid, iwl_mvm_sta_tx_agg_start() + * would have failed, so if we are here there is no need to + * allocate a queue. + * However, if aggregation size is different than the default + * size, the scheduler should be reconfigured. + * We cannot do this with the new TX API, so return unsupported + * for now, until it will be offloaded to firmware.. + * Note that if SCD default value changes - this condition + * should be updated as well. */ - if (buf_size < mvmsta->max_agg_bufsize) + if (buf_size < IWL_FRAME_LIMIT) return -ENOTSUPP; ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); @@ -2587,7 +2587,7 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, * Only reconfig the SCD for the queue if the window size has * changed from current (become smaller) */ - if (!alloc_queue && buf_size < mvmsta->max_agg_bufsize) { + if (!alloc_queue && buf_size < IWL_FRAME_LIMIT) { /* * If reconfiguring an existing queue, it first must be * drained From 5d39051a328c0508b0fb0247227197059edd2da9 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 17 Oct 2017 16:26:00 +0300 Subject: [PATCH 1773/2177] iwlwifi: mvm: reset seq num after restart After a FW reset on A000 NICs, the driver doesn't set the seq number when re-allocating the queues. This in turn leads to a mismatch between the seq number the driver thinks each frame has, and the actual seq num given by the HW. This especially causes issues with aggregations, since the driver could be waiting to start an aggregation and queue traffic from the mac80211 until then, when actually it shouldn't be waiting. Fixes: 310181ec34e2 ("iwlwifi: move to TVQM mode") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index c8febde2dffb..039efcd2735d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1257,6 +1257,14 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, mvm_sta->sta_id, i, wdg_timeout); tid_data->txq_id = txq_id; + + /* + * Since we don't set the seq number after reset, and HW + * sets it now, FW reset will cause the seq num to start + * at 0 again, so driver will need to update it + * internally as well, so it keeps in sync with real val + */ + tid_data->seq_number = 0; } else { u16 seq = IEEE80211_SEQ_TO_SN(tid_data->seq_number); From 82d2b9a62699f3d6a61323774c41cebe920e5dc5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 18 Oct 2017 13:09:38 +0300 Subject: [PATCH 1774/2177] iwlwifi: mvm: rs: remove the ANT C from the toogle antenna logic We don't plan to have products with 3 antennas in the near future. All the rest of the code follows the same assumption as well. Remove the support for antenna C from rs_toggle_ant. When trying to toggle from ANT_B, this avoids to go through ANT_C, discover that it doesn't exist and continue to ANT_A. In MIMO, this avoids to do ANT_AB -> ANT_BC -> ANT_AC and back to ANT_AB. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 0fe723ca844e..c69515ed72df 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -67,12 +67,8 @@ static u8 rs_ht_to_legacy[] = { static const u8 ant_toggle_lookup[] = { [ANT_NONE] = ANT_NONE, [ANT_A] = ANT_B, - [ANT_B] = ANT_C, - [ANT_AB] = ANT_BC, - [ANT_C] = ANT_A, - [ANT_AC] = ANT_AB, - [ANT_BC] = ANT_AC, - [ANT_ABC] = ANT_ABC, + [ANT_B] = ANT_A, + [ANT_AB] = ANT_AB, }; #define IWL_DECLARE_RATE_INFO(r, s, rp, rn) \ @@ -975,7 +971,7 @@ static int rs_toggle_antenna(u32 valid_ant, struct rs_rate *rate) { u8 new_ant_type; - if (!rate->ant || rate->ant > ANT_ABC) + if (!rate->ant || WARN_ON_ONCE(rate->ant & ANT_C)) return 0; if (!rs_is_valid_ant(valid_ant, rate->ant)) From 14a1f85bdc27283898358af09ee506d7f3344d3c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 17 Oct 2017 12:06:52 +0300 Subject: [PATCH 1775/2177] iwlwifi: mvm: improve latency when there is a reorder timeout When there is a reorder timeout, we may get to a situation where we have the timeout latency for all the next 64 frames. This happens since NSSN is behind for a while, and the driver won't release the frames, since it is not allowed by NSSN. As a result the frame is stored in the reorder buffer although there is no hole, and released 100 ms later. Add a direct comparison to the reorder buffer head, and release immediately if possible. For example: Frame 0 is missed. We receive frame 1, and store it in the buffer. After 100 ms, frame 1 is released and reorder buffer head is 2. We then receive frame 2, with NSSN 0, and store it instead of releasing it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index b84756dc9d6c..9852a4d62337 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -719,6 +719,22 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, return false; } + /* + * release immediately if there are no stored frames, and the sn is + * equal to the head. + * This can happen due to reorder timer, where NSSN is behind head_sn. + * When we released everything, and we got the next frame in the + * sequence, according to the NSSN we can't release immediately, + * while technically there is no hole and we can move forward. + */ + if (!buffer->num_stored && sn == buffer->head_sn) { + if (!amsdu || last_subframe) + buffer->head_sn = ieee80211_sn_inc(buffer->head_sn); + /* No need to update AMSDU last SN - we are moving the head */ + spin_unlock_bh(&buffer->lock); + return false; + } + index = sn % buffer->buf_size; /* From fb7eba711d2169fbd40bc487c191f360332e8b22 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Oct 2017 10:30:24 +0300 Subject: [PATCH 1776/2177] iwlwifi: remove dead code for internal devices only We had a bunch of code that was relevant for internal devices only. Those devices are now being depreceated. Kill all the now unneeded code. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/8000.c | 51 ------------------- drivers/net/wireless/intel/iwlwifi/fw/file.h | 2 +- drivers/net/wireless/intel/iwlwifi/fw/img.h | 8 --- .../net/wireless/intel/iwlwifi/iwl-config.h | 3 -- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 2 - drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 6 --- .../net/wireless/intel/iwlwifi/iwl-trans.h | 17 ------- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 15 ------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 - drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 - 10 files changed, 1 insertion(+), 105 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c index 1dce74afcd75..9bb7c19d48eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c @@ -100,14 +100,6 @@ #define NVM_HW_SECTION_NUM_FAMILY_8000 10 #define DEFAULT_NVM_FILE_FAMILY_8000C "nvmData-8000C" -/* Max SDIO RX/TX aggregation sizes of the ADDBA request/response */ -#define MAX_RX_AGG_SIZE_8260_SDIO 21 -#define MAX_TX_AGG_SIZE_8260_SDIO 40 - -/* Max A-MPDU exponent for HT and VHT */ -#define MAX_HT_AMPDU_EXPONENT_8260_SDIO IEEE80211_HT_MAX_AMPDU_32K -#define MAX_VHT_AMPDU_EXPONENT_8260_SDIO IEEE80211_VHT_MAX_AMPDU_32K - static const struct iwl_base_params iwl8000_base_params = { .eeprom_size = OTP_LOW_IMAGE_SIZE_FAMILY_8000, .num_of_queues = 31, @@ -234,48 +226,5 @@ const struct iwl_cfg iwl4165_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; -const struct iwl_cfg iwl8260_2ac_sdio_cfg = { - .name = "Intel(R) Dual Band Wireless-AC 8260", - .fw_name_pre = IWL8000_FW_PRE, - IWL_DEVICE_8260, - .ht_params = &iwl8000_ht_params, - .nvm_ver = IWL8000_NVM_VERSION, - .nvm_calib_ver = IWL8000_TX_POWER_VERSION, - .max_rx_agg_size = MAX_RX_AGG_SIZE_8260_SDIO, - .max_tx_agg_size = MAX_TX_AGG_SIZE_8260_SDIO, - .disable_dummy_notification = true, - .max_ht_ampdu_exponent = MAX_HT_AMPDU_EXPONENT_8260_SDIO, - .max_vht_ampdu_exponent = MAX_VHT_AMPDU_EXPONENT_8260_SDIO, -}; - -const struct iwl_cfg iwl8265_2ac_sdio_cfg = { - .name = "Intel(R) Dual Band Wireless-AC 8265", - .fw_name_pre = IWL8265_FW_PRE, - IWL_DEVICE_8265, - .ht_params = &iwl8000_ht_params, - .nvm_ver = IWL8000_NVM_VERSION, - .nvm_calib_ver = IWL8000_TX_POWER_VERSION, - .max_rx_agg_size = MAX_RX_AGG_SIZE_8260_SDIO, - .max_tx_agg_size = MAX_TX_AGG_SIZE_8260_SDIO, - .disable_dummy_notification = true, - .max_ht_ampdu_exponent = MAX_HT_AMPDU_EXPONENT_8260_SDIO, - .max_vht_ampdu_exponent = MAX_VHT_AMPDU_EXPONENT_8260_SDIO, -}; - -const struct iwl_cfg iwl4165_2ac_sdio_cfg = { - .name = "Intel(R) Dual Band Wireless-AC 4165", - .fw_name_pre = IWL8000_FW_PRE, - IWL_DEVICE_8000, - .ht_params = &iwl8000_ht_params, - .nvm_ver = IWL8000_NVM_VERSION, - .nvm_calib_ver = IWL8000_TX_POWER_VERSION, - .max_rx_agg_size = MAX_RX_AGG_SIZE_8260_SDIO, - .max_tx_agg_size = MAX_TX_AGG_SIZE_8260_SDIO, - .bt_shared_single_ant = true, - .disable_dummy_notification = true, - .max_ht_ampdu_exponent = MAX_HT_AMPDU_EXPONENT_8260_SDIO, - .max_vht_ampdu_exponent = MAX_VHT_AMPDU_EXPONENT_8260_SDIO, -}; - MODULE_FIRMWARE(IWL8000_MODULE_FIRMWARE(IWL8000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL8265_MODULE_FIRMWARE(IWL8265_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index efd7fb65de8b..740d97093d1c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -136,7 +136,7 @@ enum iwl_ucode_tlv_type { IWL_UCODE_TLV_N_SCAN_CHANNELS = 31, IWL_UCODE_TLV_PAGING = 32, IWL_UCODE_TLV_SEC_RT_USNIFFER = 34, - IWL_UCODE_TLV_SDIO_ADMA_ADDR = 35, + /* 35 is unused */ IWL_UCODE_TLV_FW_VERSION = 36, IWL_UCODE_TLV_FW_DBG_DEST = 38, IWL_UCODE_TLV_FW_DBG_CONF = 39, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h index e6bc9cb60700..985496cc01d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h @@ -138,11 +138,6 @@ struct fw_img { u32 paging_mem_size; }; -struct iwl_sf_region { - u32 addr; - u32 size; -}; - /* * Block paging calculations */ @@ -257,7 +252,6 @@ enum iwl_fw_type { * @type: firmware type (&enum iwl_fw_type) * @cipher_scheme: optional external cipher scheme. * @human_readable: human readable version - * @sdio_adma_addr: the default address to set for the ADMA in SDIO mode until * we get the ALIVE from the uCode * @dbg_dest_tlv: points to the destination TLV for debug * @dbg_conf_tlv: array of pointers to configuration TLVs for debug @@ -290,8 +284,6 @@ struct iwl_fw { struct iwl_fw_cipher_scheme cs[IWL_UCODE_MAX_CS]; u8 human_readable[FW_VER_HUMAN_READABLE_SZ]; - u32 sdio_adma_addr; - struct iwl_fw_dbg_dest_tlv *dbg_dest_tlv; struct iwl_fw_dbg_conf_tlv *dbg_conf_tlv[FW_DBG_CONF_MAX]; size_t dbg_conf_tlv_len[FW_DBG_CONF_MAX]; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 86a796025750..d1263a554420 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -467,9 +467,6 @@ extern const struct iwl_cfg iwl8260_2ac_cfg; extern const struct iwl_cfg iwl8265_2ac_cfg; extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; -extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; -extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; -extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index b03e0f975b5a..4f0d070eda54 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -218,7 +218,6 @@ #define CSR_INT_BIT_FH_TX (1 << 27) /* Tx DMA FH_INT[1:0] */ #define CSR_INT_BIT_SCD (1 << 26) /* TXQ pointer advanced */ #define CSR_INT_BIT_SW_ERR (1 << 25) /* uCode error */ -#define CSR_INT_BIT_PAGING (1 << 24) /* SDIO PAGING */ #define CSR_INT_BIT_RF_KILL (1 << 7) /* HW RFKILL switch GP_CNTRL[27] toggled */ #define CSR_INT_BIT_CT_KILL (1 << 6) /* Critical temp (chip too hot) rfkill */ #define CSR_INT_BIT_SW_RX (1 << 3) /* Rx, command responses */ @@ -229,7 +228,6 @@ CSR_INT_BIT_HW_ERR | \ CSR_INT_BIT_FH_TX | \ CSR_INT_BIT_SW_ERR | \ - CSR_INT_BIT_PAGING | \ CSR_INT_BIT_RF_KILL | \ CSR_INT_BIT_SW_RX | \ CSR_INT_BIT_WAKEUP | \ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index ccdb247d68c5..4b224d7d967c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1039,12 +1039,6 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, drv->fw.img[usniffer_img].paging_mem_size = paging_mem_size; break; - case IWL_UCODE_TLV_SDIO_ADMA_ADDR: - if (tlv_len != sizeof(u32)) - goto invalid_tlv_len; - drv->fw.sdio_adma_addr = - le32_to_cpup((__le32 *)tlv_data); - break; case IWL_UCODE_TLV_FW_GSCAN_CAPA: /* * Don't return an error in case of a shorter tlv_len diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index bba4f54cbbbb..10d330603886 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -398,8 +398,6 @@ struct iwl_hcmd_arr { * @command_groups: array of command groups, each member is an array of the * commands in the group; for debugging only * @command_groups_size: number of command groups, to avoid illegal access - * @sdio_adma_addr: the default address to set for the ADMA in SDIO mode until - * we get the ALIVE from the uCode * @cb_data_offs: offset inside skb->cb to store transport data at, must have * space for at least two pointers */ @@ -419,8 +417,6 @@ struct iwl_trans_config { const struct iwl_hcmd_arr *command_groups; int command_groups_size; - u32 sdio_adma_addr; - u8 cb_data_offs; }; @@ -531,8 +527,6 @@ struct iwl_trans_ops { void (*op_mode_leave)(struct iwl_trans *iwl_trans); int (*start_fw)(struct iwl_trans *trans, const struct fw_img *fw, bool run_in_rfkill); - int (*update_sf)(struct iwl_trans *trans, - struct iwl_sf_region *st_fwrd_space); void (*fw_alive)(struct iwl_trans *trans, u32 scd_addr); void (*stop_device)(struct iwl_trans *trans, bool low_power); @@ -828,17 +822,6 @@ static inline int iwl_trans_start_fw(struct iwl_trans *trans, return trans->ops->start_fw(trans, fw, run_in_rfkill); } -static inline int iwl_trans_update_sf(struct iwl_trans *trans, - struct iwl_sf_region *st_fwrd_space) -{ - might_sleep(); - - if (trans->ops->update_sf) - return trans->ops->update_sf(trans, st_fwrd_space); - - return 0; -} - static inline void _iwl_trans_stop_device(struct iwl_trans *trans, bool low_power) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 0296df625cd5..c0de7bb86cf7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -196,8 +196,6 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, mvm->error_event_table[1] = le32_to_cpu(lmac2->error_event_table_ptr); mvm->log_event_table = le32_to_cpu(lmac1->log_event_table_ptr); - mvm->sf_space.addr = le32_to_cpu(lmac1->st_fwrd_addr); - mvm->sf_space.size = le32_to_cpu(lmac1->st_fwrd_size); umac_error_event_table = le32_to_cpu(umac->error_info_addr); @@ -266,7 +264,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, int ret, i; enum iwl_ucode_type old_type = mvm->fwrt.cur_fw_img; static const u16 alive_cmd[] = { MVM_ALIVE }; - struct iwl_sf_region st_fwrd_space; if (ucode_type == IWL_UCODE_REGULAR && iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) && @@ -320,18 +317,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, return -EIO; } - /* - * update the sdio allocation according to the pointer we get in the - * alive notification. - */ - st_fwrd_space.addr = mvm->sf_space.addr; - st_fwrd_space.size = mvm->sf_space.size; - ret = iwl_trans_update_sf(mvm->trans, &st_fwrd_space); - if (ret) { - IWL_ERR(mvm, "Failed to update SF size. ret %d\n", ret); - return ret; - } - iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index e34b3eb8e08b..2f3d5bef4b9e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -754,7 +754,6 @@ struct iwl_mvm { u32 log_event_table; u32 umac_error_event_table; bool support_umac_log; - struct iwl_sf_region sf_space; u32 ampdu_ref; bool ampdu_toggle; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index e82b4462722b..ce718e9c63ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -703,7 +703,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info, driver_data[2]); - trans_cfg.sdio_adma_addr = fw->sdio_adma_addr; trans_cfg.sw_csum_tx = IWL_MVM_SW_TX_CSUM_OFFLOAD; /* Set a short watchdog for the command queue */ From 4ae827ffc173dc969c67e4f8b35d17a44f76ed40 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Oct 2017 14:11:16 +0300 Subject: [PATCH 1777/2177] iwlwifi: remove host assisted paging This was used for internal devices that are now deprecated. All the currently existing devices can do paging without any help from the host. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/paging.h | 24 ---- .../net/wireless/intel/iwlwifi/fw/paging.c | 103 ++---------------- .../net/wireless/intel/iwlwifi/iwl-trans.h | 14 --- 3 files changed, 12 insertions(+), 129 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h b/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h index e76f9cd4473d..721b9fed7201 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h @@ -81,28 +81,4 @@ struct iwl_fw_paging_cmd { __le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS]; } __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */ -/** - * enum iwl_fw_item_id - FW item IDs - * - * @IWL_FW_ITEM_ID_PAGING: Address of the pages that the FW will upload - * download - */ -enum iwl_fw_item_id { - IWL_FW_ITEM_ID_PAGING = 3, -}; - -/** - * struct iwl_fw_get_item_cmd - get an item from the fw - * @item_id: ID of item to obtain, see &enum iwl_fw_item_id - */ -struct iwl_fw_get_item_cmd { - __le32 item_id; -} __packed; /* FW_GET_ITEM_CMD_API_S_VER_1 */ - -struct iwl_fw_get_item_resp { - __le32 item_id; - __le32 item_byte_cnt; - __le32 item_val; -} __packed; /* FW_GET_ITEM_RSP_S_VER_1 */ - #endif /* __iwl_fw_api_paging_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/paging.c b/drivers/net/wireless/intel/iwlwifi/fw/paging.c index 1610722b8099..1fec8e3a6b35 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/paging.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/paging.c @@ -87,9 +87,6 @@ void iwl_free_fw_paging(struct iwl_fw_runtime *fwrt) get_order(paging->fw_paging_size)); paging->fw_paging_block = NULL; } - kfree(fwrt->trans->paging_download_buf); - fwrt->trans->paging_download_buf = NULL; - fwrt->trans->paging_db = NULL; memset(fwrt->fw_paging_db, 0, sizeof(fwrt->fw_paging_db)); } @@ -100,13 +97,11 @@ static int iwl_alloc_fw_paging_mem(struct iwl_fw_runtime *fwrt, { struct page *block; dma_addr_t phys = 0; - int blk_idx, order, num_of_pages, size, dma_enabled; + int blk_idx, order, num_of_pages, size; if (fwrt->fw_paging_db[0].fw_paging_block) return 0; - dma_enabled = is_device_dma_capable(fwrt->trans->dev); - /* ensure BLOCK_2_EXP_SIZE is power of 2 of PAGING_BLOCK_SIZE */ BUILD_BUG_ON(BIT(BLOCK_2_EXP_SIZE) != PAGING_BLOCK_SIZE); @@ -139,24 +134,18 @@ static int iwl_alloc_fw_paging_mem(struct iwl_fw_runtime *fwrt, fwrt->fw_paging_db[blk_idx].fw_paging_block = block; fwrt->fw_paging_db[blk_idx].fw_paging_size = size; - if (dma_enabled) { - phys = dma_map_page(fwrt->trans->dev, block, 0, - PAGE_SIZE << order, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(fwrt->trans->dev, phys)) { - /* - * free the previous pages and the current one - * since we failed to map_page. - */ - iwl_free_fw_paging(fwrt); - return -ENOMEM; - } - fwrt->fw_paging_db[blk_idx].fw_paging_phys = phys; - } else { - fwrt->fw_paging_db[blk_idx].fw_paging_phys = - PAGING_ADDR_SIG | - blk_idx << BLOCK_2_EXP_SIZE; + phys = dma_map_page(fwrt->trans->dev, block, 0, + PAGE_SIZE << order, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(fwrt->trans->dev, phys)) { + /* + * free the previous pages and the current one + * since we failed to map_page. + */ + iwl_free_fw_paging(fwrt); + return -ENOMEM; } + fwrt->fw_paging_db[blk_idx].fw_paging_phys = phys; if (!blk_idx) IWL_DEBUG_FW(fwrt, @@ -312,60 +301,6 @@ static int iwl_send_paging_cmd(struct iwl_fw_runtime *fwrt, return iwl_trans_send_cmd(fwrt->trans, &hcmd); } -/* - * Send paging item cmd to FW in case CPU2 has paging image - */ -static int iwl_trans_get_paging_item(struct iwl_fw_runtime *fwrt) -{ - int ret; - struct iwl_fw_get_item_cmd fw_get_item_cmd = { - .item_id = cpu_to_le32(IWL_FW_ITEM_ID_PAGING), - }; - struct iwl_fw_get_item_resp *item_resp; - struct iwl_host_cmd cmd = { - .id = iwl_cmd_id(FW_GET_ITEM_CMD, IWL_ALWAYS_LONG_GROUP, 0), - .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, - .data = { &fw_get_item_cmd, }, - .len = { sizeof(fw_get_item_cmd), }, - }; - - ret = iwl_trans_send_cmd(fwrt->trans, &cmd); - if (ret) { - IWL_ERR(fwrt, - "Paging: Failed to send FW_GET_ITEM_CMD cmd (err = %d)\n", - ret); - return ret; - } - - item_resp = (void *)((struct iwl_rx_packet *)cmd.resp_pkt)->data; - if (item_resp->item_id != cpu_to_le32(IWL_FW_ITEM_ID_PAGING)) { - IWL_ERR(fwrt, - "Paging: got wrong item in FW_GET_ITEM_CMD resp (item_id = %u)\n", - le32_to_cpu(item_resp->item_id)); - ret = -EIO; - goto exit; - } - - /* Add an extra page for headers */ - fwrt->trans->paging_download_buf = kzalloc(PAGING_BLOCK_SIZE + - FW_PAGING_SIZE, - GFP_KERNEL); - if (!fwrt->trans->paging_download_buf) { - ret = -ENOMEM; - goto exit; - } - fwrt->trans->paging_req_addr = le32_to_cpu(item_resp->item_val); - fwrt->trans->paging_db = fwrt->fw_paging_db; - IWL_DEBUG_FW(fwrt, - "Paging: got paging request address (paging_req_addr 0x%08x)\n", - fwrt->trans->paging_req_addr); - -exit: - iwl_free_resp(&cmd); - - return ret; -} - int iwl_init_paging(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type type) { const struct fw_img *fw = &fwrt->fw->img[type]; @@ -382,20 +317,6 @@ int iwl_init_paging(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type type) if (!fw->paging_mem_size) return 0; - /* - * When dma is not enabled, the driver needs to copy / write - * the downloaded / uploaded page to / from the smem. - * This gets the location of the place were the pages are - * stored. - */ - if (!is_device_dma_capable(fwrt->trans->dev)) { - ret = iwl_trans_get_paging_item(fwrt); - if (ret) { - IWL_ERR(fwrt, "failed to get FW paging item\n"); - return ret; - } - } - ret = iwl_save_fw_paging(fwrt, fw); if (ret) { IWL_ERR(fwrt, "failed to save the FW paging image\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 10d330603886..3138f637ab2a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -694,12 +694,6 @@ enum iwl_plat_pm_mode { * @dbg_conf_tlv: array of pointers to configuration TLVs for debug * @dbg_trigger_tlv: array of pointers to triggers TLVs for debug * @dbg_dest_reg_num: num of reg_ops in %dbg_dest_tlv - * @paging_req_addr: The location were the FW will upload / download the pages - * from. The address is set by the opmode - * @paging_db: Pointer to the opmode paging data base, the pointer is set by - * the opmode. - * @paging_download_buf: Buffer used for copying all of the pages before - * downloading them to the FW. The buffer is allocated in the opmode * @system_pm_mode: the system-wide power management mode in use. * This mode is set dynamically, depending on the WoWLAN values * configured from the userspace at runtime. @@ -748,14 +742,6 @@ struct iwl_trans { struct iwl_fw_dbg_trigger_tlv * const *dbg_trigger_tlv; u8 dbg_dest_reg_num; - /* - * Paging parameters - All of the parameters should be set by the - * opmode when paging is enabled - */ - u32 paging_req_addr; - struct iwl_fw_paging *paging_db; - void *paging_download_buf; - enum iwl_plat_pm_mode system_pm_mode; enum iwl_plat_pm_mode runtime_pm_mode; bool suspending; From fb12777ab59b4c8319c931970e28a5406d1aa702 Mon Sep 17 00:00:00 2001 From: Kirtika Ruchandani Date: Sun, 8 Oct 2017 14:20:42 -0700 Subject: [PATCH 1778/2177] iwlwifi: Add more call-sites for pcie reg dumper Commit a6d24fad00d9 ("iwlwifi: pcie: dump registers when HW becomes inaccessible") added a function to dump pcie config registers and memory mapped registers on a failure. It is currently only accessible within trans.c. Add it to struct iwl_trans_ops, so that failure cases in other files can call it. While there, add a call to this function from iwl_pcie_load_firmware_chunk in pcie/tx.c, since this is a common failure case seen on some platforms. Signed-off-by: Kirtika Ruchandani [modified the commit message slightly] Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 7 ++++--- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 3138f637ab2a..ca0b5536a8a6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -520,6 +520,9 @@ struct iwl_trans_txq_scd_cfg { * @dump_data: return a vmalloc'ed buffer with debug data, maybe containing last * TX'ed commands and similar. The buffer will be vfree'd by the caller. * Note that the transport must fill in the proper file headers. + * @dump_regs: dump using IWL_ERR configuration space and memory mapped + * registers of the device to diagnose failure, e.g., when HW becomes + * inaccessible. */ struct iwl_trans_ops { @@ -587,6 +590,8 @@ struct iwl_trans_ops { struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans, const struct iwl_fw_dbg_trigger_tlv *trigger); + + void (*dump_regs)(struct iwl_trans *trans); }; /** @@ -865,6 +870,12 @@ iwl_trans_dump_data(struct iwl_trans *trans, return trans->ops->dump_data(trans, trigger); } +static inline void iwl_trans_dump_regs(struct iwl_trans *trans) +{ + if (trans->ops->dump_regs) + trans->ops->dump_regs(trans); +} + static inline struct iwl_device_cmd * iwl_trans_alloc_tx_cmd(struct iwl_trans *trans) { diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 0008ea323be3..8d992d5ba064 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -88,7 +88,7 @@ #define IWL_FW_MEM_EXTENDED_START 0x40000 #define IWL_FW_MEM_EXTENDED_END 0x57FFF -static void iwl_trans_pcie_err_dump(struct iwl_trans *trans) +static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans) { #define PCI_DUMP_SIZE 64 #define PREFIX_LEN 32 @@ -736,7 +736,7 @@ static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, trans_pcie->ucode_write_complete, 5 * HZ); if (!ret) { IWL_ERR(trans, "Failed to load firmware chunk!\n"); - iwl_trans_pcie_err_dump(trans); + iwl_trans_pcie_dump_regs(trans); return -ETIMEDOUT; } @@ -1956,7 +1956,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY | CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000); if (unlikely(ret < 0)) { - iwl_trans_pcie_err_dump(trans); + iwl_trans_pcie_dump_regs(trans); iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI); WARN_ONCE(1, "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", @@ -3021,6 +3021,7 @@ static void iwl_trans_pcie_resume(struct iwl_trans *trans) .ref = iwl_trans_pcie_ref, \ .unref = iwl_trans_pcie_unref, \ .dump_data = iwl_trans_pcie_dump_data, \ + .dump_regs = iwl_trans_pcie_dump_regs, \ .d3_suspend = iwl_trans_pcie_d3_suspend, \ .d3_resume = iwl_trans_pcie_d3_resume diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e93c471ef9bf..b5c459cd70ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1909,6 +1909,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, } if (test_bit(STATUS_FW_ERROR, &trans->status)) { + iwl_trans_dump_regs(trans); IWL_ERR(trans, "FW error in SYNC CMD %s\n", iwl_get_cmd_string(trans, cmd->id)); dump_stack(); From cb8550e15bd1c90ebce9444769a856b799340320 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 19 Oct 2017 08:46:50 +0300 Subject: [PATCH 1779/2177] iwlwifi: fix multi queue notification for a000 devices Currently we return early from sync_rx_queues for a000 devices. This may cause, in case of a non-empty reorder buffer, a warning later on since the RX queue isn't getting the notification to empty it. A better approach would be to send the notification for the default queue only. Do this hard coded for now, until we will have the API to enable multi queue for a000 devices. Fixes: bc0294696456 ("iwlwifi: mvm: disable RX queue notification for a000 devices") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 0b3d275fe177..26caea62df9b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4294,9 +4294,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); - /* TODO - remove a000 disablement when we have RXQ config API */ - if (!iwl_mvm_has_new_rx_api(mvm) || - mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000) + if (!iwl_mvm_has_new_rx_api(mvm)) return; notif->cookie = mvm->queue_sync_cookie; @@ -4305,6 +4303,13 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, atomic_set(&mvm->queue_sync_counter, mvm->trans->num_rx_queues); + /* TODO - remove this when we have RXQ config API */ + if (mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000) { + qmask = BIT(0); + if (notif->sync) + atomic_set(&mvm->queue_sync_counter, 1); + } + ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif, size); if (ret) { IWL_ERR(mvm, "Failed to trigger RX queues sync (%d)\n", ret); From 435d0827fe1f3eb7c47f3090c51d55f9dc729d94 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 12 Sep 2017 11:06:24 +0300 Subject: [PATCH 1780/2177] iwlwifi: mvm: refactor iwl_mvm_flush_no_vif This function is very indented and hard to read. Refactor it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 26caea62df9b..941dbdebe143 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4002,6 +4002,8 @@ out_unlock: static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) { + int i; + if (drop) { if (iwl_mvm_has_new_tx_api(mvm)) /* TODO new tx api */ @@ -4009,32 +4011,28 @@ static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) "Need to implement flush TX queue\n"); else iwl_mvm_flush_tx_path(mvm, - iwl_mvm_flushable_queues(mvm) & queues, - 0); - } else { - if (iwl_mvm_has_new_tx_api(mvm)) { - struct ieee80211_sta *sta; - int i; - - mutex_lock(&mvm->mutex); - - for (i = 0; i < ARRAY_SIZE(mvm->fw_id_to_mac_id); i++) { - sta = rcu_dereference_protected( - mvm->fw_id_to_mac_id[i], - lockdep_is_held(&mvm->mutex)); - if (IS_ERR_OR_NULL(sta)) - continue; - - iwl_mvm_wait_sta_queues_empty(mvm, - iwl_mvm_sta_from_mac80211(sta)); - } - - mutex_unlock(&mvm->mutex); - } else { - iwl_trans_wait_tx_queues_empty(mvm->trans, - queues); - } + iwl_mvm_flushable_queues(mvm) & queues, 0); + return; } + + if (!iwl_mvm_has_new_tx_api(mvm)) { + iwl_trans_wait_tx_queues_empty(mvm->trans, queues); + return; + } + + mutex_lock(&mvm->mutex); + for (i = 0; i < ARRAY_SIZE(mvm->fw_id_to_mac_id); i++) { + struct ieee80211_sta *sta; + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[i], + lockdep_is_held(&mvm->mutex)); + if (IS_ERR_OR_NULL(sta)) + continue; + + iwl_mvm_wait_sta_queues_empty(mvm, + iwl_mvm_sta_from_mac80211(sta)); + } + mutex_unlock(&mvm->mutex); } static void iwl_mvm_mac_flush(struct ieee80211_hw *hw, From 06195639c66d80d21146d0e982f7fc94406ed331 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 12 Sep 2017 10:52:53 +0300 Subject: [PATCH 1781/2177] iwlwifi: mvm: add missing implementation of flush for a000 devices In the mac flush flow, we should flush all existing queues. Since FW API for a000 devices is flush per RA-TID, simply flush all stations with all tids. From FW perspective, asking to flush a TID that doesn't have a queue is valid, so we can just set all bits in the TID mask. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 941dbdebe143..ae2eb0c8cbeb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4004,19 +4004,12 @@ static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) { int i; - if (drop) { - if (iwl_mvm_has_new_tx_api(mvm)) - /* TODO new tx api */ - WARN_ONCE(1, - "Need to implement flush TX queue\n"); - else + if (!iwl_mvm_has_new_tx_api(mvm)) { + if (drop) iwl_mvm_flush_tx_path(mvm, iwl_mvm_flushable_queues(mvm) & queues, 0); - return; - } - - if (!iwl_mvm_has_new_tx_api(mvm)) { - iwl_trans_wait_tx_queues_empty(mvm->trans, queues); + else + iwl_trans_wait_tx_queues_empty(mvm->trans, queues); return; } @@ -4029,8 +4022,11 @@ static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) if (IS_ERR_OR_NULL(sta)) continue; - iwl_mvm_wait_sta_queues_empty(mvm, - iwl_mvm_sta_from_mac80211(sta)); + if (drop) + iwl_mvm_flush_sta_tids(mvm, i, 0xFF, 0); + else + iwl_mvm_wait_sta_queues_empty(mvm, + iwl_mvm_sta_from_mac80211(sta)); } mutex_unlock(&mvm->mutex); } From 309c4848c0481f71f4cb20290e6f5bded1e54131 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 24 Oct 2017 22:04:15 +0300 Subject: [PATCH 1782/2177] iwlwifi: mvm: hold mutex when flushing in iwl_mvm_flush_no_vif() The iwl_mvm_flush_tx_path() function sends a synchronous command to the firmware. When doing that, we must hold the mutex. The iwl_mvm_flush_no_vif() function was mistakenly not holding the mutex. Fix it. Fixes: 6110d9e5bdd1 ("iwlwifi: mvm: Flush non STA TX queues") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index ae2eb0c8cbeb..3e92a117c0b8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4005,11 +4005,14 @@ static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) int i; if (!iwl_mvm_has_new_tx_api(mvm)) { - if (drop) + if (drop) { + mutex_lock(&mvm->mutex); iwl_mvm_flush_tx_path(mvm, iwl_mvm_flushable_queues(mvm) & queues, 0); - else + mutex_unlock(&mvm->mutex); + } else { iwl_trans_wait_tx_queues_empty(mvm->trans, queues); + } return; } From 8cef5344b5f24883c97180c15e17b35d46fc4f37 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 02:29:37 -0700 Subject: [PATCH 1783/2177] iwlwifi: mvm: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. The RCU lifetime on baid_data is unclear, so this adds a direct copy of the rcu_ptr passed to the original callback. It may be possible to improve this to just use baid_data->mvm->baid_map[baid_data->baid] instead. Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Luca Coelho Cc: Intel Linux Wireless Cc: Kalle Valo Cc: Sara Sharon Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 18 +++++++++--------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2f3d5bef4b9e..0e18c5066f04 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -652,6 +652,7 @@ struct iwl_mvm_baid_data { u16 entries_per_queue; unsigned long last_rx; struct timer_list session_timer; + struct iwl_mvm_baid_data __rcu **rcu_ptr; struct iwl_mvm *mvm; struct iwl_mvm_reorder_buffer reorder_buf[IWL_MAX_RX_HW_QUEUES]; struct iwl_mvm_reorder_buf_entry entries[]; @@ -1853,7 +1854,7 @@ void iwl_mvm_tdls_ch_switch_work(struct work_struct *work); void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, struct iwl_mvm_internal_rxq_notif *notif, u32 size); -void iwl_mvm_reorder_timer_expired(unsigned long data); +void iwl_mvm_reorder_timer_expired(struct timer_list *t); struct ieee80211_vif *iwl_mvm_get_bss_vif(struct iwl_mvm *mvm); bool iwl_mvm_is_vif_assoc(struct iwl_mvm *mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 9852a4d62337..343bdc4266cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -460,9 +460,9 @@ set_timer: } } -void iwl_mvm_reorder_timer_expired(unsigned long data) +void iwl_mvm_reorder_timer_expired(struct timer_list *t) { - struct iwl_mvm_reorder_buffer *buf = (void *)data; + struct iwl_mvm_reorder_buffer *buf = from_timer(buf, t, reorder_timer); struct iwl_mvm_baid_data *baid_data = iwl_mvm_baid_data_from_reorder_buf(buf); struct iwl_mvm_reorder_buf_entry *entries = diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 039efcd2735d..c19f98489d4e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -252,9 +252,11 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, return ret; } -static void iwl_mvm_rx_agg_session_expired(unsigned long data) +static void iwl_mvm_rx_agg_session_expired(struct timer_list *t) { - struct iwl_mvm_baid_data __rcu **rcu_ptr = (void *)data; + struct iwl_mvm_baid_data *data = + from_timer(data, t, session_timer); + struct iwl_mvm_baid_data __rcu **rcu_ptr = data->rcu_ptr; struct iwl_mvm_baid_data *ba_data; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvm_sta; @@ -2160,10 +2162,8 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm, reorder_buf->head_sn = ssn; reorder_buf->buf_size = buf_size; /* rx reorder timer */ - reorder_buf->reorder_timer.function = - iwl_mvm_reorder_timer_expired; - reorder_buf->reorder_timer.data = (unsigned long)reorder_buf; - init_timer(&reorder_buf->reorder_timer); + timer_setup(&reorder_buf->reorder_timer, + iwl_mvm_reorder_timer_expired, 0); spin_lock_init(&reorder_buf->lock); reorder_buf->mvm = mvm; reorder_buf->queue = i; @@ -2286,9 +2286,9 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, baid_data->baid = baid; baid_data->timeout = timeout; baid_data->last_rx = jiffies; - setup_timer(&baid_data->session_timer, - iwl_mvm_rx_agg_session_expired, - (unsigned long)&mvm->baid_map[baid]); + baid_data->rcu_ptr = &mvm->baid_map[baid]; + timer_setup(&baid_data->session_timer, + iwl_mvm_rx_agg_session_expired, 0); baid_data->mvm = mvm; baid_data->tid = tid; baid_data->sta_id = mvm_sta->sta_id; From 364a1ab91df160f96da5f8d9f778cfbafd5f6d81 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Wed, 25 Oct 2017 11:40:24 +0300 Subject: [PATCH 1784/2177] iwlwifi: drop RX frames during hardware restart In case of a hardware restart the BA session data in HW is lost so the reorder buffer simply passes the frames to mac80211 as is as there is no NSSN set. Instead, we will drop these frames before they reach the reorder buffer. mac80211 drops such frames anyway, but we shouldn't rely on that. In addition it saves some processing time Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 343bdc4266cd..76dc58381e1c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -834,6 +834,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, struct sk_buff *skb; u8 crypt_len = 0; + if (unlikely(test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))) + return; + /* Dont use dev_alloc_skb(), we'll have enough headroom once * ieee80211_hdr pulled. */ From d669fc2d42a43ee0abcf2396df6e9c5a124aa984 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Tue, 24 Oct 2017 17:36:43 +0300 Subject: [PATCH 1785/2177] iwlwifi: add new cards for 8260 series add three new PCI ID'S for 8260 series Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index aa3c07192624..a988f41125e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -467,6 +467,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F4, 0xC030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F4, 0xD030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)}, @@ -485,6 +487,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x4010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0110, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x1110, iwl8265_2ac_cfg)}, From 7cddbef445631109bd530ce7cdacaa04ff0a62d1 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Tue, 24 Oct 2017 17:38:12 +0300 Subject: [PATCH 1786/2177] iwlwifi: add new cards for 8265 series add two new PCI ID'S for 8265 series Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index a988f41125e0..efa2fbb926cc 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -513,6 +513,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x3E01, iwl8275_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x1012, iwl8275_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0014, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)}, /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, From 57b36f7fcb39c5eae8c1f463699f747af69643ba Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Tue, 24 Oct 2017 17:04:24 +0300 Subject: [PATCH 1787/2177] iwlwifi: add new cards for a000 series add four new PCI ID'S for a000 series Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index efa2fbb926cc..4a21c12276d7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -586,6 +586,11 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0030, iwla000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x1080, iwla000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0090, iwla000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x0310, iwla000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x40C0, 0x0000, iwla000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x40C0, 0x0A10, iwla000_2ax_cfg_hr)}, + #endif /* CONFIG_IWLMVM */ {0} From 8f918d3ff4a1283a1693afe2b4c8e1844674ca15 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 2 Nov 2017 17:32:08 -0700 Subject: [PATCH 1788/2177] net_sched: check NULL in tcf_block_put() Callers of tcf_block_put() could pass NULL so we can't use block->q before checking if block is NULL or not. tcf_block_put_ext() callers are fine, it is always non-NULL. Fixes: 8c4083b30e56 ("net: sched: add block bind/unbind notif. and extended block_get/put") Reported-by: Dave Taht Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3364347bc699..8d1885abee83 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -340,9 +340,6 @@ void tcf_block_put_ext(struct tcf_block *block, { struct tcf_chain *chain, *tmp; - if (!block) - return; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_flush(chain); @@ -362,6 +359,8 @@ void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; + if (!block) + return; tcf_block_put_ext(block, NULL, block->q, &ei); } From de4a10ef6eff0eb0ced97a39dc3edd0d3101b6ed Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Fri, 3 Nov 2017 03:32:38 -0400 Subject: [PATCH 1789/2177] bnxt_en: fix typo in bnxt_set_coalesce Recent refactoring of coalesce settings contained a typo that prevents receive settings from being set properly. Fixes: 18775aa8a91f ("bnxt_en: Reorganize the coalescing parameters.") Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 5cd1a501c62b..7ce1d4b7e67d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -84,7 +84,7 @@ static int bnxt_set_coalesce(struct net_device *dev, hw_coal->coal_ticks_irq = coal->rx_coalesce_usecs_irq; hw_coal->coal_bufs_irq = coal->rx_max_coalesced_frames_irq * mult; - hw_coal = &bp->rx_coal; + hw_coal = &bp->tx_coal; mult = hw_coal->bufs_per_record; hw_coal->coal_ticks = coal->tx_coalesce_usecs; hw_coal->coal_bufs = coal->tx_max_coalesced_frames * mult; From b153cbc507946f52d5aa687fd64f45d82cb36a3b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 3 Nov 2017 03:32:39 -0400 Subject: [PATCH 1790/2177] bnxt_en: Fix IRQ coalescing regression. Recent IRQ coalescing clean up has removed a guard-rail for the max DMA buffer coalescing value. This is a 6-bit value and must not be 0. We already have a check for 0 but 64 is equivalent to 0 and will cause non-stop interrupts. Fix it by adding the proper check. Fixes: f8503969d27b ("bnxt_en: Refactor and simplify coalescing code.") Reported-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c3dfaa5151aa..4e3d569bf32e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4548,9 +4548,13 @@ static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, val = clamp_t(u16, hw_coal->coal_bufs, 1, max); req->num_cmpl_aggr_int = cpu_to_le16(val); + + /* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */ + val = min_t(u16, val, 63); req->num_cmpl_dma_aggr = cpu_to_le16(val); - val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, max); + /* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */ + val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, 63); req->num_cmpl_dma_aggr_during_int = cpu_to_le16(val); tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks); From 16b5e50147c21edef7133f204c43465f0c03c3f5 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:25 +0800 Subject: [PATCH 1791/2177] net: hns3: fix for getting autoneg in hns3_get_link_ksettings This patch fixes a bug for ethtool's get_link_ksettings(). When phy exists, we should get autoneg from phy rather than from mac. Because the value of mac.autoneg is invalid when phy exists. Fixes: 496d03e (net: hns3: Add Ethtool support to HNS3 driver) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 5cd163bdbf14..367b20cef294 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -9,6 +9,7 @@ #include #include +#include #include "hns3_enet.h" @@ -571,26 +572,25 @@ static int hns3_get_link_ksettings(struct net_device *netdev, u32 advertised_caps; u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN; u8 link_stat; - u8 auto_neg; - u8 duplex; - u32 speed; if (!h->ae_algo || !h->ae_algo->ops) return -EOPNOTSUPP; /* 1.auto_neg & speed & duplex from cmd */ - if (h->ae_algo->ops->get_ksettings_an_result) { - h->ae_algo->ops->get_ksettings_an_result(h, &auto_neg, - &speed, &duplex); - cmd->base.autoneg = auto_neg; - cmd->base.speed = speed; - cmd->base.duplex = duplex; + if (netdev->phydev) + phy_ethtool_ksettings_get(netdev->phydev, cmd); + else if (h->ae_algo->ops->get_ksettings_an_result) + h->ae_algo->ops->get_ksettings_an_result(h, + &cmd->base.autoneg, + &cmd->base.speed, + &cmd->base.duplex); + else + return -EOPNOTSUPP; - link_stat = hns3_get_link(netdev); - if (!link_stat) { - cmd->base.speed = (u32)SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; - } + link_stat = hns3_get_link(netdev); + if (!link_stat) { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } /* 2.media_type get from bios parameter block */ From 2b39cabb2a283cea0c3d96d9370575371726164f Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:26 +0800 Subject: [PATCH 1792/2177] net: hns3: fix for getting advertised_caps in hns3_get_link_ksettings This patch fixes a bug for ethtool's get_link_ksettings(). The advertising for autoneg is always added to advertised_caps whether autoneg is enable or disable. This patch fixes it. Fixes: 496d03e (net: hns3: Add Ethtool support to HNS3 driver) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 367b20cef294..0e10a43e29b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -640,6 +640,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev, break; } + if (!cmd->base.autoneg) + advertised_caps &= ~HNS3_LM_AUTONEG_BIT; + /* now, map driver link modes to ethtool link modes */ hns3_driv_to_eth_caps(supported_caps, cmd, false); hns3_driv_to_eth_caps(advertised_caps, cmd, true); From 3e1a8f10a1375133ea4a943f21138f00b4d06dc2 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:27 +0800 Subject: [PATCH 1793/2177] net: hns3: fix a bug in hns3_driv_to_eth_caps The value of link_modes.advertising and the value of link_modes.supported is initialized to zero every time in for loop in hns3_driv_to_eth_caps(). But we just want to set specified bit for them. Initialization is unnecessary. This patch fixes it. Fixes: 496d03e (net: hns3: Add Ethtool support to HNS3 driver) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 0e10a43e29b3..c7b8ebd14f33 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -359,17 +359,12 @@ static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd, if (!(caps & hns3_lm_map[i].hns3_link_mode)) continue; - if (is_advertised) { - ethtool_link_ksettings_zero_link_mode(cmd, - advertising); + if (is_advertised) __set_bit(hns3_lm_map[i].ethtool_link_mode, cmd->link_modes.advertising); - } else { - ethtool_link_ksettings_zero_link_mode(cmd, - supported); + else __set_bit(hns3_lm_map[i].ethtool_link_mode, cmd->link_modes.supported); - } } } From 80cb5f3d97aa22ce3aac2737da03d4679722c60f Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:28 +0800 Subject: [PATCH 1794/2177] net: hns3: add support for set_link_ksettings This patch adds set_link_ksettings support for ethtool cmd. Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index c7b8ebd14f33..7fe193b1ccaf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -653,6 +653,16 @@ static int hns3_get_link_ksettings(struct net_device *netdev, return 0; } +static int hns3_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + /* Only support ksettings_set for netdev with phy attached for now */ + if (netdev->phydev) + return phy_ethtool_ksettings_set(netdev->phydev, cmd); + + return -EOPNOTSUPP; +} + static u32 hns3_get_rss_key_size(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -839,6 +849,7 @@ static const struct ethtool_ops hns3_ethtool_ops = { .get_rxfh = hns3_get_rss, .set_rxfh = hns3_set_rss, .get_link_ksettings = hns3_get_link_ksettings, + .set_link_ksettings = hns3_set_link_ksettings, }; void hns3_ethtool_set_ops(struct net_device *netdev) From d63671d27cd1cc1f93f5fcb86eaeee57c8190d46 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:29 +0800 Subject: [PATCH 1795/2177] net: hns3: add support for nway_reset This patch adds nway_reset support for ethtool cmd. Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hns3_ethtool.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index 7fe193b1ccaf..a21470c72da3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -832,6 +832,23 @@ static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) } } +static int hns3_nway_reset(struct net_device *netdev) +{ + struct phy_device *phy = netdev->phydev; + + if (!netif_running(netdev)) + return 0; + + /* Only support nway_reset for netdev with phy attached for now */ + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); +} + static const struct ethtool_ops hns3_ethtool_ops = { .self_test = hns3_self_test, .get_drvinfo = hns3_get_drvinfo, @@ -850,6 +867,7 @@ static const struct ethtool_ops hns3_ethtool_ops = { .set_rxfh = hns3_set_rss, .get_link_ksettings = hns3_get_link_ksettings, .set_link_ksettings = hns3_set_link_ksettings, + .nway_reset = hns3_nway_reset, }; void hns3_ethtool_set_ops(struct net_device *netdev) From 439adf885e6dd3b2a64941a167b4c18d3728c6dc Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 3 Nov 2017 12:18:30 +0800 Subject: [PATCH 1796/2177] net: hns3: fix a bug for phy supported feature initialization This patch fixes a bug for phy supported feature initialization. Currently, the value of phydev->supported is initialized by kernel. So it includes many features that we do not support, such as SUPPORTED_FIBRE and SUPPORTED_BNC. This patch fixes it. Fixes: 256727d (net: hns3: Add MDIO support to HNS3 Ethernet driver for hip08 SoC) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index f32d719c4f77..7069e9408d7d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -14,6 +14,13 @@ #include "hclge_main.h" #include "hclge_mdio.h" +#define HCLGE_PHY_SUPPORTED_FEATURES (SUPPORTED_Autoneg | \ + SUPPORTED_TP | \ + SUPPORTED_Pause | \ + PHY_10BT_FEATURES | \ + PHY_100BT_FEATURES | \ + PHY_1000BT_FEATURES) + enum hclge_mdio_c22_op_seq { HCLGE_MDIO_C22_WRITE = 1, HCLGE_MDIO_C22_READ = 2 @@ -195,6 +202,9 @@ int hclge_mac_start_phy(struct hclge_dev *hdev) return ret; } + phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES; + phydev->advertising = phydev->supported; + phy_start(phydev); return 0; From c7eb7d7230509ec862d4144f7a831f995bc5d028 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Nov 2017 11:46:24 +0100 Subject: [PATCH 1797/2177] net: sched: introduce chain_head_change callback Add a callback that is to be called whenever head of the chain changes. Also provide a callback for the default case when the caller gets a block using non-extended getter. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 14 +++++----- include/net/sch_generic.h | 5 +++- net/sched/cls_api.c | 54 +++++++++++++++++++++++---------------- net/sched/sch_ingress.c | 36 ++++++++++++++------------ 4 files changed, 62 insertions(+), 47 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d15c40c7bde7..505d4b71975f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -26,6 +26,8 @@ enum tcf_block_binder_type { struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; }; struct tcf_block_cb; @@ -37,12 +39,10 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, void tcf_chain_put(struct tcf_chain *chain); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei); void tcf_block_put(struct tcf_block *block); -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); static inline struct Qdisc *tcf_block_q(struct tcf_block *block) @@ -82,8 +82,7 @@ int tcf_block_get(struct tcf_block **p_block, } static inline -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei) { return 0; @@ -94,8 +93,7 @@ static inline void tcf_block_put(struct tcf_block *block) } static inline -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c23e938f5b19..f230269e0bfb 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -260,9 +260,12 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); + struct tcf_chain { struct tcf_proto __rcu *filter_chain; - struct tcf_proto __rcu **p_filter_chain; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8d1885abee83..206e19f4fc01 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -195,12 +195,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, return chain; } +static void tcf_chain_head_change(struct tcf_chain *chain, + struct tcf_proto *tp_head) +{ + if (chain->chain_head_change) + chain->chain_head_change(tp_head, + chain->chain_head_change_priv); +} + static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - if (chain->p_filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, NULL); + tcf_chain_head_change(chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_chain_put(chain); @@ -242,13 +249,6 @@ void tcf_chain_put(struct tcf_chain *chain) } EXPORT_SYMBOL(tcf_chain_put); -static void -tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, - struct tcf_proto __rcu **p_filter_chain) -{ - chain->p_filter_chain = p_filter_chain; -} - static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, enum tc_block_command command) @@ -276,8 +276,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); } -int tcf_block_get_ext(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); @@ -295,7 +294,9 @@ int tcf_block_get_ext(struct tcf_block **p_block, err = -ENOMEM; goto err_chain_create; } - tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + WARN_ON(!ei->chain_head_change); + chain->chain_head_change = ei->chain_head_change; + chain->chain_head_change_priv = ei->chain_head_change_priv; block->net = qdisc_net(q); block->q = q; tcf_block_offload_bind(block, q, ei); @@ -308,12 +309,23 @@ err_chain_create: } EXPORT_SYMBOL(tcf_block_get_ext); +static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) +{ + struct tcf_proto __rcu **p_filter_chain = priv; + + rcu_assign_pointer(*p_filter_chain, tp_head); +} + int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) { - struct tcf_block_ext_info ei = {0, }; + struct tcf_block_ext_info ei = { + .chain_head_change = tcf_chain_head_change_dflt, + .chain_head_change_priv = p_filter_chain, + }; - return tcf_block_get_ext(p_block, p_filter_chain, q, &ei); + WARN_ON(!p_filter_chain); + return tcf_block_get_ext(p_block, q, &ei); } EXPORT_SYMBOL(tcf_block_get); @@ -334,8 +346,7 @@ static void tcf_block_put_final(struct work_struct *work) * actions should be all removed after flushing. However, filters are now * destroyed in tc filter workqueue with RTNL lock, they can not race here. */ -void tcf_block_put_ext(struct tcf_block *block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct tcf_chain *chain, *tmp; @@ -361,7 +372,7 @@ void tcf_block_put(struct tcf_block *block) if (!block) return; - tcf_block_put_ext(block, NULL, block->q, &ei); + tcf_block_put_ext(block, block->q, &ei); } EXPORT_SYMBOL(tcf_block_put); @@ -537,9 +548,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - if (chain->p_filter_chain && - *chain_info->pprev == chain->filter_chain) - rcu_assign_pointer(*chain->p_filter_chain, tp); + if (*chain_info->pprev == chain->filter_chain) + tcf_chain_head_change(chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); tcf_chain_hold(chain); @@ -551,8 +561,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, { struct tcf_proto *next = rtnl_dereference(chain_info->next); - if (chain->p_filter_chain && tp == chain->filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, next); + if (tp == chain->filter_chain) + tcf_chain_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); tcf_chain_put(chain); } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index b599db26d34b..811845815b8c 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -54,6 +54,13 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) return q->block; } +static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) +{ + struct tcf_proto __rcu **p_filter_chain = priv; + + rcu_assign_pointer(*p_filter_chain, tp_head); +} + static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { struct ingress_sched_data *q = qdisc_priv(sch); @@ -61,9 +68,10 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) int err; q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->block_info.chain_head_change = clsact_chain_head_change; + q->block_info.chain_head_change_priv = &dev->ingress_cl_list; - err = tcf_block_get_ext(&q->block, &dev->ingress_cl_list, - sch, &q->block_info); + err = tcf_block_get_ext(&q->block, sch, &q->block_info); if (err) return err; @@ -76,10 +84,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); - tcf_block_put_ext(q->block, &dev->ingress_cl_list, - sch, &q->block_info); + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -162,16 +168,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) int err; q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->ingress_block_info.chain_head_change = clsact_chain_head_change; + q->ingress_block_info.chain_head_change_priv = &dev->ingress_cl_list; - err = tcf_block_get_ext(&q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); if (err) return err; q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; + q->egress_block_info.chain_head_change = clsact_chain_head_change; + q->egress_block_info.chain_head_change_priv = &dev->egress_cl_list; - err = tcf_block_get_ext(&q->egress_block, &dev->egress_cl_list, - sch, &q->egress_block_info); + err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) goto err_egress_block_get; @@ -183,20 +191,16 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) return 0; err_egress_block_get: - tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); return err; } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); - tcf_block_put_ext(q->egress_block, &dev->egress_cl_list, - sch, &q->egress_block_info); - tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list, - sch, &q->ingress_block_info); + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); From 46209401f8f6116bd0b2c2d14a63958e83ffca0b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Nov 2017 11:46:25 +0100 Subject: [PATCH 1798/2177] net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath In sch_handle_egress and sch_handle_ingress tp->q is used only in order to update stats. So stats and filter list are the only things that are needed in clsact qdisc fastpath processing. Introduce new mini_Qdisc struct to hold those items. Also, introduce a helper to swap the mini_Qdisc structures in case filter list head changes. This removes need for tp->q usage without added overhead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++--- include/net/sch_generic.h | 32 +++++++++++++++++++++++++++ net/core/dev.c | 21 +++++++++--------- net/sched/sch_generic.c | 46 +++++++++++++++++++++++++++++++++++++++ net/sched/sch_ingress.c | 19 +++++++++++----- 5 files changed, 109 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e02f79b2110..7de7656550c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1559,6 +1559,8 @@ enum netdev_priv_flags { * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one + * @miniq_ingress: ingress/clsact qdisc specific data for + * ingress processing * @ingress_queue: XXX: need comments on this one * @broadcast: hw bcast address * @@ -1576,7 +1578,8 @@ enum netdev_priv_flags { * @tx_global_lock: XXX: need comments on this one * * @xps_maps: XXX: need comments on this one - * + * @miniq_egress: clsact qdisc specific data for + * egress processing * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1795,7 +1798,7 @@ struct net_device { void __rcu *rx_handler_data; #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *ingress_cl_list; + struct mini_Qdisc __rcu *miniq_ingress; #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS @@ -1826,7 +1829,7 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *egress_cl_list; + struct mini_Qdisc __rcu *miniq_egress; #endif /* These may be needed for future network-power-down code. */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f230269e0bfb..c64e62c9450a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -904,4 +904,36 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } +/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. + * The fast path only needs to access filter list and to update stats + */ +struct mini_Qdisc { + struct tcf_proto *filter_list; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; + struct rcu_head rcu; +}; + +static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); +} + +static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) +{ + this_cpu_inc(miniq->cpu_qstats->drops); +} + +struct mini_Qdisc_pair { + struct mini_Qdisc miniq1; + struct mini_Qdisc miniq2; + struct mini_Qdisc __rcu **p_miniq; +}; + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head); +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq); + #endif diff --git a/net/core/dev.c b/net/core/dev.c index 24ac9083bc13..1423cf4d695c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3274,22 +3274,22 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { - struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; - if (!cl) + if (!miniq) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); *ret = NET_XMIT_DROP; kfree_skb(skb); return NULL; @@ -4189,7 +4189,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so @@ -4197,8 +4197,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * that are not configured with an ingress qdisc will bail * out here. */ - if (!cl) + if (!miniq) return skb; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4206,15 +4207,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_at_ingress = 1; - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); kfree_skb(skb); return NULL; case TC_ACT_STOLEN: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index aa74aa42b5d7..3839cbbdc32b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1024,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, } } EXPORT_SYMBOL(psched_ratecfg_precompute); + +static void mini_qdisc_rcu_func(struct rcu_head *head) +{ +} + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head) +{ + struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + struct mini_Qdisc *miniq; + + if (!tp_head) { + RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + return; + } + + miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? + &miniqp->miniq1 : &miniqp->miniq2; + + /* We need to make sure that readers won't see the miniq + * we are about to modify. So wait until previous call_rcu_bh callback + * is done. + */ + rcu_barrier_bh(); + miniq->filter_list = tp_head; + rcu_assign_pointer(*miniqp->p_miniq, miniq); + + if (miniq_old) + /* This is counterpart of the rcu barrier above. We need to + * block potential new user of miniq_old until all readers + * are not seeing it. + */ + call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); +} +EXPORT_SYMBOL(mini_qdisc_pair_swap); + +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq) +{ + miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; + miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; + miniqp->p_miniq = p_miniq; +} +EXPORT_SYMBOL(mini_qdisc_pair_init); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 811845815b8c..5ecc38f35d47 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -21,6 +21,7 @@ struct ingress_sched_data { struct tcf_block *block; struct tcf_block_ext_info block_info; + struct mini_Qdisc_pair miniqp; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) @@ -56,9 +57,9 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) { - struct tcf_proto __rcu **p_filter_chain = priv; + struct mini_Qdisc_pair *miniqp = priv; - rcu_assign_pointer(*p_filter_chain, tp_head); + mini_qdisc_pair_swap(miniqp, tp_head); } static int ingress_init(struct Qdisc *sch, struct nlattr *opt) @@ -67,9 +68,11 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; + mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); + q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->block_info.chain_head_change = clsact_chain_head_change; - q->block_info.chain_head_change_priv = &dev->ingress_cl_list; + q->block_info.chain_head_change_priv = &q->miniqp; err = tcf_block_get_ext(&q->block, sch, &q->block_info); if (err) @@ -128,6 +131,8 @@ struct clsact_sched_data { struct tcf_block *egress_block; struct tcf_block_ext_info ingress_block_info; struct tcf_block_ext_info egress_block_info; + struct mini_Qdisc_pair miniqp_ingress; + struct mini_Qdisc_pair miniqp_egress; }; static unsigned long clsact_find(struct Qdisc *sch, u32 classid) @@ -167,17 +172,21 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); + q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->ingress_block_info.chain_head_change = clsact_chain_head_change; - q->ingress_block_info.chain_head_change_priv = &dev->ingress_cl_list; + q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); if (err) return err; + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); + q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; q->egress_block_info.chain_head_change = clsact_chain_head_change; - q->egress_block_info.chain_head_change_priv = &dev->egress_cl_list; + q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) From 796ec7769d452fd75a3afca3d768f25c120b6c50 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:29 +0100 Subject: [PATCH 1799/2177] mlxsw: spectrum: Rename IPIP-related netdevice handlers To distinguish between events related to tunnel device itself and its bound device, rename a number of functions related to handling tunneling netdevice events to include _ol_ (for "overlay") in the name. That leaves room in the namespace for underlay-related functions, which would have _ul_ in the name. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 5 ++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 12 ++--- .../ethernet/mellanox/mlxsw/spectrum_router.c | 45 ++++++++++--------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 52f38b480669..55bb3669bbcc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4542,8 +4542,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, int err = 0; mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); - if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) - err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr); + if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, + event, ptr); else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b2393bb8cef9..f01b5cb04963 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -396,13 +396,13 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); -bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); +bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); int -mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - unsigned long event, - struct netdev_notifier_changeupper_info *info); +mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_changeupper_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d657f01f2d79..96729331eb21 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1306,8 +1306,8 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, return false; } -bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) +bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); } @@ -1326,8 +1326,8 @@ mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, return NULL; } -static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1347,8 +1347,8 @@ static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1357,8 +1357,8 @@ static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); } -static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static int mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1375,8 +1375,8 @@ static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1385,8 +1385,8 @@ static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } -static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1424,26 +1424,27 @@ static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp, return 0; } -int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev, - unsigned long event, - struct netdev_notifier_changeupper_info *info) +int +mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_changeupper_info *info) { switch (event) { case NETDEV_REGISTER: - return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev); + return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); case NETDEV_UNREGISTER: - mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev); + mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); return 0; case NETDEV_UP: - return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev); + return mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); case NETDEV_DOWN: - mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); + mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); return 0; case NETDEV_CHANGEUPPER: if (netif_is_l3_master(info->upper_dev)) - return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp, - ol_dev); + return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev); return 0; } return 0; From cafdb2a0d4216c694971a06edf26029a08026ba4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:30 +0100 Subject: [PATCH 1800/2177] mlxsw: spectrum_router: Extract mlxsw_sp_netdevice_ipip_can_offload() Some of the code down the road needs this logic as well. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 96729331eb21..97f062a4ca64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1326,18 +1326,28 @@ mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, return NULL; } +static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ops + = mlxsw_sp->router->ipip_ops_arr[ipipt]; + + /* For deciding whether decap should be offloaded, we don't care about + * overlay protocol, so ask whether either one is supported. + */ + return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) || + ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6); +} + static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { - struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_ipip_type ipipt; mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); - if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, - MLXSW_SP_L3_PROTO_IPV4) || - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, - MLXSW_SP_L3_PROTO_IPV6)) { + if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) { ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, ol_dev); if (IS_ERR(ipip_entry)) From 474f0ff618ae4305637e972746b42fabe2245b99 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:31 +0100 Subject: [PATCH 1801/2177] mlxsw: spectrum: Move mlxsw_sp_ipip_netdev_{s, d}addr{, 4}() These functions ideologically belong to the IPIP module, and some follow-up work will benefit from their presence there. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ipip.c | 53 +++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 4 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 53 ------------------- .../ethernet/mellanox/mlxsw/spectrum_router.h | 7 --- 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 702fe945227c..8a9fbb64f4ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -68,6 +68,59 @@ static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) be32_to_cpu(tun->parms.o_key) : 0; } +static __be32 +mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.saddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.daddr; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_ipip_entry *ipip_entry) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 6fb49129ce87..87becd152a5c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -38,6 +38,10 @@ #include "spectrum_router.h" #include +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); + enum mlxsw_sp_ipip_type { MLXSW_SP_IPIP_TYPE_GRE4, MLXSW_SP_IPIP_TYPE_MAX, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 97f062a4ca64..ec90c6b6d126 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1016,59 +1016,6 @@ mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) kfree(ipip_entry); } -static __be32 -mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) -{ - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.saddr; -} - -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - }; - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - -__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) -{ - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.daddr; -} - -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - }; - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, const union mlxsw_sp_l3addr *addr2) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 39e5811ed263..8120b01a9c36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -103,13 +103,6 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding); bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev); -__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh); bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); From 9fb7bd77d11ab03b4a969279de9f54d8fd6fe988 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:32 +0100 Subject: [PATCH 1802/2177] mlxsw: spectrum_ipip: Split accessor functions To implement NETDEV_CHANGE notifications on IP-in-IP tunnels, the handler needs to figure out what actually changed, to understand how exactly to update the offloads. It will do so by storing struct ip_tunnel_parm with previous configuration, and comparing that to the new version. To facilitate these comparisons, extract the code that operates on struct ip_tunnel_parm from the existing accessor functions, and make those a thin wrapper that extracts tunnel parameters and dispatches. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ipip.c | 138 +++++++++++------- .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 3 + 2 files changed, 89 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 8a9fbb64f4ad..1850080aacbc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -36,89 +36,123 @@ #include "spectrum_ipip.h" -static bool -mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev) { struct ip_tunnel *tun = netdev_priv(ol_dev); - return !!(tun->parms.i_flags & TUNNEL_KEY); + return tun->parms; } -static bool -mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms) { - struct ip_tunnel *tun = netdev_priv(ol_dev); + return !!(parms.i_flags & TUNNEL_KEY); +} - return !!(tun->parms.o_flags & TUNNEL_KEY); +static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms) +{ + return !!(parms.o_flags & TUNNEL_KEY); +} + +static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms) +{ + return mlxsw_sp_ipip_parms_has_ikey(parms) ? + be32_to_cpu(parms.i_key) : 0; +} + +static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms) +{ + return mlxsw_sp_ipip_parms_has_okey(parms) ? + be32_to_cpu(parms.o_key) : 0; +} + +static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms) +{ + return parms.iph.saddr; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto, + struct ip_tunnel_parm parms) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_parms_saddr4(parms), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms) +{ + return parms.iph.daddr; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto, + struct ip_tunnel_parm parms) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_parms_daddr4(parms), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); +} + +static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); } static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) { - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ? - be32_to_cpu(tun->parms.i_key) : 0; + return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); } static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) { - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? - be32_to_cpu(tun->parms.o_key) : 0; -} - -static __be32 -mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) -{ - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.saddr; + return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); } union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - } - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; + return mlxsw_sp_ipip_parms_saddr(proto, + mlxsw_sp_ipip_netdev_parms(ol_dev)); } static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) { - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.daddr; + return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev)); } static union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - } - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; + return mlxsw_sp_ipip_parms_daddr(proto, + mlxsw_sp_ipip_netdev_parms(ol_dev)); } static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 87becd152a5c..918d74b4e8d7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -38,6 +38,9 @@ #include "spectrum_router.h" #include +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev); + union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev); From a3fe198ecda678e7360c9a08942f0a0e43b6bb2c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:33 +0100 Subject: [PATCH 1803/2177] mlxsw: spectrum_router: Extract mlxsw_sp_ipip_entry_ol_down_event() Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ec90c6b6d126..da8fe7ef8f1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1332,14 +1332,22 @@ static int mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, return 0; } +static void +mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { struct mlxsw_sp_ipip_entry *ipip_entry; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (ipip_entry && ipip_entry->decap_fib_entry) - mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, From 6d4de44550a8a434b89666088a8f98850e6348c2 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:34 +0100 Subject: [PATCH 1804/2177] mlxsw: spectrum_router: Make mlxsw_sp_netdevice_ipip_ol_up_event() void This function only ever returns 0, so don't pretend it returns anything useful and just make it void. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index da8fe7ef8f1d..2b05f9ff7ff4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1314,8 +1314,8 @@ static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); } -static int mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) +static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) { struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1329,7 +1329,6 @@ static int mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, decap_fib_entry); } - return 0; } static void @@ -1402,7 +1401,8 @@ mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); return 0; case NETDEV_UP: - return mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); + mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); + return 0; case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); return 0; From 47518ca5d293dd62ca428581941ee51271a4e468 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:35 +0100 Subject: [PATCH 1805/2177] mlxsw: spectrum_router: Extract mlxsw_sp_ipip_entry_ol_up_event() The piece of logic to promote decap route, if any, is useful for generic tunnel updates, not just for handling of NETDEV_UP events on tunnel interfaces. Extract it to a separate function. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2b05f9ff7ff4..ce0d4625c996 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1314,21 +1314,26 @@ static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); } +static void +mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); +} + static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { - struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (ipip_entry) { - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, - ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - } - + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); } static void From 7e75af6366b90bbd0cfb62c9c5aeb5e3ec37bcd4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:36 +0100 Subject: [PATCH 1806/2177] mlxsw: spectrum: Propagate extack for tunnel events The function mlxsw_sp_rif_create() takes an extack parameter. So far, for creation of loopback interfaces, NULL was passed. For some events however the extack can be extracted and passed along. So do that for NETDEV_CHANGEUPPER handler. Use the opportunity to update the type of info argument that mlxsw_sp_netdevice_ipip_ol_event() takes. Follow-up patches will introduce handling of more changes, and some of them carry an extack as well, but in an info structure of a different type. Though not strictly erroneous (the pointer could be cast whichever way), it makes no sense to pretend the value is always of a certain type, when in fact it isn't. So change the prototype of the above-mentioned function as well. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 +++--- .../ethernet/mellanox/mlxsw/spectrum_router.c | 31 ++++++++++++------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index f01b5cb04963..07cba529b8a9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -398,11 +398,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -int -mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - unsigned long event, - struct netdev_notifier_changeupper_info *info); +int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ce0d4625c996..c4f1881cfedf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -961,7 +961,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_rif_ipip_lb * mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) + struct net_device *ol_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_params_ipip_lb lb_params; const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -974,7 +975,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; - rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, NULL); + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack); if (IS_ERR(rif)) return ERR_CAST(rif); return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); @@ -993,7 +994,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, - ol_dev); + ol_dev, NULL); if (IS_ERR(ipip_entry->ol_lb)) { ret = ERR_CAST(ipip_entry->ol_lb); goto err_ol_ipip_lb_create; @@ -1355,7 +1356,8 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev) + struct net_device *ol_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; @@ -1376,7 +1378,7 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt, - ol_dev); + ol_dev, extack); if (IS_ERR(lb_rif)) return PTR_ERR(lb_rif); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); @@ -1393,12 +1395,14 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, return 0; } -int -mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev, - unsigned long event, - struct netdev_notifier_changeupper_info *info) +int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_info *info) { + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + switch (event) { case NETDEV_REGISTER: return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); @@ -1412,9 +1416,12 @@ mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); return 0; case NETDEV_CHANGEUPPER: - if (netif_is_l3_master(info->upper_dev)) + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, - ol_dev); + ol_dev, + extack); return 0; } return 0; From 65a6121b30a65bb4b61322c895bf835fedd6e315 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:37 +0100 Subject: [PATCH 1807/2177] mlxsw: spectrum_router: Extract __mlxsw_sp_ipip_entry_update_tunnel() The work that's done by mlxsw_sp_netdevice_ipip_ol_vrf_event() is a good basis for a more versatile function that would take care of all sorts of tunnel updates requests: __mlxsw_sp_ipip_entry_update_tunnel(). Extract that function. Factor out a helper mlxsw_sp_ipip_entry_ol_lb_update() as well. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 72 ++++++++++++------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c4f1881cfedf..e2795b889068 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1355,46 +1355,64 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev, - struct netlink_ext_ack *extack) +static int +mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) { - struct mlxsw_sp_fib_entry *decap_fib_entry; - struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif_ipip_lb *lb_rif; + struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb; + struct mlxsw_sp_rif_ipip_lb *new_lb_rif; - ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (!ipip_entry) - return 0; + new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, + ipip_entry->ipipt, + ipip_entry->ol_dev, + extack); + if (IS_ERR(new_lb_rif)) + return PTR_ERR(new_lb_rif); + ipip_entry->ol_lb = new_lb_rif; + mlxsw_sp_rif_destroy(&old_lb_rif->common); - /* When a tunneling device is moved to a different VRF, we need to - * update the backing loopback. Since RIFs can't be edited, we need to - * destroy and recreate it. That might create a window of opportunity - * where RALUE and RATR registers end up referencing a RIF that's - * already gone. RATRs are handled by the RIF destroy, and to take care + return 0; +} + +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + int err; + + /* RIFs can't be edited, so to update loopback, we need to destroy and + * recreate it. That creates a window of opportunity where RALUE and + * RATR registers end up referencing a RIF that's already gone. RATRs + * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care * of RALUE, demote the decap route back. */ if (ipip_entry->decap_fib_entry) mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); - lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt, - ol_dev, extack); - if (IS_ERR(lb_rif)) - return PTR_ERR(lb_rif); - mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); - ipip_entry->ol_lb = lb_rif; + err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry, extack); + if (err) + return err; - if (ol_dev->flags & IFF_UP) { - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, - ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - } + if (ipip_entry->ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); return 0; } +static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = + mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + + if (!ipip_entry) + return 0; + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + extack); +} + int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, unsigned long event, From 0c5f1cd5ba8c03567c67910816a7a0fb9fee5746 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:38 +0100 Subject: [PATCH 1808/2177] mlxsw: spectrum_router: Generalize __mlxsw_sp_ipip_entry_update_tunnel() The work that needs to be done to update HW configuration in response to changes is similar to what __mlxsw_sp_ipip_entry_update_tunnel() already does, but with a number of twists: each change requires a different subset of things to happen. Extend the function to support all these uses, and allow finely-grained configuration of what should happen at each call through a suite of function arguments. Publish the updated function to allow use from the spectrum_ipip module. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 47 +++++++++++++++++-- .../ethernet/mellanox/mlxsw/spectrum_router.h | 7 +++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e2795b889068..1376a9738b3c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1355,9 +1355,12 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, + bool keep_encap, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb; @@ -1370,13 +1373,32 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(new_lb_rif)) return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; + + if (keep_encap) { + list_splice_init(&old_lb_rif->common.nexthop_list, + &new_lb_rif->common.nexthop_list); + mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common); + } + mlxsw_sp_rif_destroy(&old_lb_rif->common); return 0; } +/** + * Update the offload related to an IPIP entry. This always updates decap, and + * in addition to that it also: + * @recreate_loopback: recreates the associated loopback RIF + * @keep_encap: updates next hops that use the tunnel netdevice. This is only + * relevant when recreate_loopback is true. + * @update_nexthops: updates next hops, keeping the current loopback RIF. This + * is only relevant when recreate_loopback is false. + */ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, struct netlink_ext_ack *extack) { int err; @@ -1390,9 +1412,15 @@ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, if (ipip_entry->decap_fib_entry) mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); - err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry, extack); - if (err) - return err; + if (recreate_loopback) { + err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry, + keep_encap, extack); + if (err) + return err; + } else if (update_nexthops) { + mlxsw_sp_nexthop_rif_update(mlxsw_sp, + &ipip_entry->ol_lb->common); + } if (ipip_entry->ol_dev->flags & IFF_UP) mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); @@ -1410,7 +1438,7 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, if (!ipip_entry) return 0; return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, - extack); + true, false, false, extack); } int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, @@ -3285,6 +3313,17 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_nexthop *nh; + + list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, false); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 8120b01a9c36..4b8a12a4f493 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -63,6 +63,7 @@ enum mlxsw_sp_rif_counter_dir { struct mlxsw_sp_neigh_entry; struct mlxsw_sp_nexthop; +struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); @@ -103,6 +104,12 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding); bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, + struct netlink_ext_ack *extack); struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh); bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); From 4526cc8aed2b4bf481709911fc1fee9f040ccda1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:39 +0100 Subject: [PATCH 1809/2177] mlxsw: spectrum_router: Fix saddr deduction in mlxsw_sp_ipip_entry_create() When trying to determine whether there are other offloaded tunnels with the same local address, mlxsw_sp_ipip_entry_create() should look for a tunnel with matching UL protocol, matching saddr, in the same VRF. However instead of taking into account the UL protocol of the tunnel netdevice (which mlxsw_sp_ipip_entry_saddr_matches() then compares to the UL protocol of inspected IPIP entry), it deduces the UL protocol from the inspected IPIP entry (and that's compared to itself). This is currently immaterial, because only one tunnel type is offloaded, and therefore the UL protocol always matches, but introducing support for a tunnel with IPv6 underlay would uncover this error. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1376a9738b3c..897a3841e52f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1169,10 +1169,10 @@ mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, * in the same underlay table needs special treatment in the HW. That is * currently not implemented in the driver. */ + ul_proto = router->ipip_ops_arr[ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; - saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, ul_tb_id, ipip_entry)) return ERR_PTR(-EEXIST); From af641713e97da4126439c3fb1dee031f7e497654 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:40 +0100 Subject: [PATCH 1810/2177] mlxsw: spectrum_router: Onload conflicting tunnels The approach for offloading IP tunnels implemented currently by mlxsw doesn't allow two tunnels that have the same local IP address in the same (underlay) VRF. Previously, offloads were introduced on demand as encap routes were formed. When such a route was created that would cause offload of a conflicting tunnel, mlxsw_sp_ipip_entry_create() would detect it and return -EEXIST, which would propagate up and cause FIB abort. Now however IPIP entries are created as soon as an offloadable netdevice is created, and the failure prevents creation of such device. Furthermore, if the driver is installed at the point where such conflicting tunnels exist, the failure actually prevents successful modprobe. Furthermore, follow-up patches implement handling of NETDEV_CHANGE due to the local address change. However, NETDEV_CHANGE can't be vetoed. The failure merely means that the offloads weren't updated, but the change in Linux configuration is not rolled back. It is thus desirable to have a robust way of handling these conflicts, which can later be reused for handling NETDEV_CHANGE as well. To fix this, when a conflicting tunnel is created, instead of failing, simply pull the old tunnel to slow path and reject offloading the new one. Introduce two functions: mlxsw_sp_ipip_entry_demote_tunnel() and mlxsw_sp_ipip_demote_tunnel_by_saddr() to handle this. Make them both public, because they will be useful later on in this patchset. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 73 +++++++++++++------ .../ethernet/mellanox/mlxsw/spectrum_router.h | 8 ++ 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 897a3841e52f..832bfa125512 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1159,24 +1159,7 @@ mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, struct net_device *ol_dev) { - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); - struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_ipip_entry *ipip_entry; - enum mlxsw_sp_l3proto ul_proto; - union mlxsw_sp_l3addr saddr; - - /* The configuration where several tunnels have the same local address - * in the same underlay table needs special treatment in the HW. That is - * currently not implemented in the driver. - */ - ul_proto = router->ipip_ops_arr[ipipt]->ul_proto; - saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); - list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, - ipip_list_node) { - if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, - ul_tb_id, ipip_entry)) - return ERR_PTR(-EEXIST); - } ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); if (IS_ERR(ipip_entry)) @@ -1292,14 +1275,24 @@ static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; enum mlxsw_sp_ipip_type ipipt; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) { - ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, - ol_dev); - if (IS_ERR(ipip_entry)) - return PTR_ERR(ipip_entry); + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + NULL)) { + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } } return 0; @@ -1441,6 +1434,44 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, true, false, false, extack); } +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct net_device *ol_dev = ipip_entry->ol_dev; + + if (ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +/* The configuration where several tunnels have the same local address in the + * same underlay table needs special treatment in the HW. That is currently not + * implemented in the driver. This function finds and demotes the first tunnel + * with a given source address, except the one passed in in the argument + * `except'. + */ +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry != except && + mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return true; + } + } + + return false; +} + int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, unsigned long event, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 4b8a12a4f493..5dd650bdcff6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -110,6 +110,14 @@ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, bool keep_encap, bool update_nexthops, struct netlink_ext_ack *extack); +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry); +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except); struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh); bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); From 61481f2fcea9112944330b34767192d7f1696fca Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:41 +0100 Subject: [PATCH 1811/2177] mlxsw: spectrum: Support IPIP underlay VRF migration When a bound device of a tunnel netdevice changes VRF, the loopback RIF that backs the tunnel needs to be updated and existing encapsulating routes need to be refreshed. Note that several tunnels can share the same bound device, in which case all the impacted tunnels need to be updated. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 3 + .../net/ethernet/mellanox/mlxsw/spectrum.h | 7 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 100 ++++++++++++++++++ 3 files changed, 110 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 55bb3669bbcc..63e50877796b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4545,6 +4545,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, + event, ptr); else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 07cba529b8a9..47dd7e06fd29 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -398,10 +398,17 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); +bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, unsigned long event, struct netdev_notifier_info *info); +int +mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 832bfa125512..aa7b820e8408 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1257,6 +1257,33 @@ mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, return NULL; } +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + struct mlxsw_sp_ipip_entry *start) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list, + ipip_list_node); + list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ipip_ul_dev = + __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + + if (ipip_ul_dev == ul_dev) + return ipip_entry; + } + + return NULL; +} + +bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); +} + static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev, enum mlxsw_sp_ipip_type ipipt) @@ -1434,6 +1461,16 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, true, false, false, extack); } +static int +mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + struct netlink_ext_ack *extack) +{ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, false, extack); +} + void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { @@ -1472,6 +1509,21 @@ mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, return false; } +static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ipip_ul_dev = + __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + + if (ipip_ul_dev == ul_dev) + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + } +} + int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, unsigned long event, @@ -1504,6 +1556,54 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, return 0; } +static int +__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + + switch (event) { + case NETDEV_CHANGEUPPER: + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) + return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp, + ipip_entry, + ul_dev, + extack); + break; + } + return 0; +} + +int +mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + int err; + + while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, + ul_dev, + ipip_entry))) { + err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry, + ul_dev, event, info); + if (err) { + mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, + ul_dev); + return err; + } + } + + return 0; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; From 4cf04f3ff4da9dd536d9f70127868908a03aaf0a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:42 +0100 Subject: [PATCH 1812/2177] mlxsw: spectrum: Handle NETDEV_CHANGE on L3 tunnels Changes to L3 tunnel netdevices (through `ip tunnel change' as well as `ip link set') lead to NETDEV_CHANGE being generated on the tunnel device. Because what is relevant for the tunnel in question depends on the tunnel type, handling of the event is dispatched to the IPIP module through a newly-added interface mlxsw_sp_ipip_ops.ol_netdev_change(). IPIP tunnels now remember the last set of tunnel parameters in struct mlxsw_sp_ipip_entry.parms, and use it to figure out what exactly has changed. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ipip.c | 67 +++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 5 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 42 ++++++++++-- .../ethernet/mellanox/mlxsw/spectrum_router.h | 7 ++ 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 1850080aacbc..5f78fc5e7724 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -287,6 +287,72 @@ mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, }; } +static int +mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + union mlxsw_sp_l3addr old_saddr, new_saddr; + union mlxsw_sp_l3addr old_daddr, new_daddr; + struct ip_tunnel_parm new_parms; + bool update_tunnel = false; + bool update_decap = false; + bool update_nhs = false; + int err = 0; + + new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev); + + new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, + new_parms); + old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->parms); + new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, + new_parms); + old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->parms); + + if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) { + u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + + /* Since the local address has changed, if there is another + * tunnel with a matching saddr, both need to be demoted. + */ + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, + MLXSW_SP_L3_PROTO_IPV4, + new_saddr, ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + update_tunnel = true; + } else if (mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != + mlxsw_sp_ipip_parms_okey(new_parms)) { + update_tunnel = true; + } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { + update_nhs = true; + } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) != + mlxsw_sp_ipip_parms_ikey(new_parms)) { + update_decap = true; + } + + if (update_tunnel) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, true, + extack); + else if (update_nhs) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, + extack); + else if (update_decap) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, false, + extack); + + ipip_entry->parms = new_parms; + return err; +} + static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { .dev_type = ARPHRD_IPGRE, .ul_proto = MLXSW_SP_L3_PROTO_IPV4, @@ -294,6 +360,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4, .can_offload = mlxsw_sp_ipip_can_offload_gre4, .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4, }; const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 918d74b4e8d7..04b08d9d76e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -56,6 +56,7 @@ struct mlxsw_sp_ipip_entry { struct mlxsw_sp_rif_ipip_lb *ol_lb; struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; + struct ip_tunnel_parm parms; }; struct mlxsw_sp_ipip_ops { @@ -78,6 +79,10 @@ struct mlxsw_sp_ipip_ops { struct mlxsw_sp_ipip_entry *ipip_entry, enum mlxsw_reg_ralue_op op, u32 tunnel_index); + + int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack); }; extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index aa7b820e8408..c1928561c412 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -943,7 +943,7 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) return __dev_get_by_index(net, tun->parms.link); } -static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) { struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); @@ -1002,6 +1002,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, ipip_entry->ipipt = ipipt; ipip_entry->ol_dev = ol_dev; + ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev); return ipip_entry; @@ -1017,12 +1018,6 @@ mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) kfree(ipip_entry); } -static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, - const union mlxsw_sp_l3addr *addr2) -{ - return !memcmp(addr1, addr2, sizeof(*addr1)); -} - static bool mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, const enum mlxsw_sp_l3proto ul_proto, @@ -1471,6 +1466,35 @@ mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, true, true, false, extack); } +static int +mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; + int err; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + /* A change might make a tunnel eligible for offloading, but + * that is currently not implemented. What falls to slow path + * stays there. + */ + return 0; + + /* A change might make a tunnel not eligible for offloading. */ + if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, + ipip_entry->ipipt)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack); + return err; +} + void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { @@ -1552,6 +1576,10 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, ol_dev, extack); return 0; + case NETDEV_CHANGE: + extack = info->extack; + return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 5dd650bdcff6..1fb82246ce96 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -70,6 +70,7 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); @@ -138,4 +139,10 @@ void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh); +static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} + #endif /* _MLXSW_ROUTER_H_*/ From 89c2b7dabaafee2220e516d314c9b7757fc8176e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:43 +0100 Subject: [PATCH 1813/2177] mlxsw: spectrum_ipip: Handle underlay device change When a bound device of an IP-in-IP tunnel changes, such as through 'ip tunnel change name $name dev $dev', the loopback backing the tunnel needs to be recreated. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 5f78fc5e7724..7502e53447bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -326,8 +326,9 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, } update_tunnel = true; - } else if (mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != - mlxsw_sp_ipip_parms_okey(new_parms)) { + } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != + mlxsw_sp_ipip_parms_okey(new_parms)) || + ipip_entry->parms.link != new_parms.link) { update_tunnel = true; } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { update_nhs = true; From 44b0fff1d8a461a5cd66cfc3a15ff05959d77df5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 3 Nov 2017 10:03:44 +0100 Subject: [PATCH 1814/2177] mlxsw: spectrum_router: Handle down of tunnel underlay When the bound device of a tunnel device is down, encapsulated packets are not egressed anymore, but tunnel decap still works. Extend mlxsw_sp_nexthop_rif_update() to take IFF_UP into consideration when deciding whether a given next hop should be offloaded. Because the new logic was added to mlxsw_sp_nexthop_rif_update(), this fixes the case where a newly-added tunnel has a down bound device, which would previously be fully offloaded. Now the down state of the bound device is noted and next hops forwarding to such tunnel are not offloaded. In addition to that, notice NETDEV_UP and NETDEV_DOWN of a bound device to force refresh of tunnel encap route offloads. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 57 ++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c1928561c412..e9187841d82a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1466,6 +1466,28 @@ mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, true, true, false, extack); } +static int +mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + +static int +mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + /* A down underlay device causes encapsulated packets to not be + * forwarded, but decap still works. So refresh next hops without + * touching anything else. + */ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + static int mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, @@ -1604,6 +1626,14 @@ __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, ul_dev, extack); break; + + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry, + ul_dev); + case NETDEV_DOWN: + return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp, + ipip_entry, + ul_dev); } return 0; } @@ -3297,10 +3327,19 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) +{ + struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + + return ul_dev ? (ul_dev->flags & IFF_UP) : true; +} + static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct net_device *ol_dev) { + bool removing; + if (!nh->nh_grp->gateway || nh->ipip_entry) return 0; @@ -3308,7 +3347,8 @@ static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, if (!nh->ipip_entry) return -ENOENT; - __mlxsw_sp_nexthop_neigh_update(nh, false); + removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev); + __mlxsw_sp_nexthop_neigh_update(nh, removing); return 0; } @@ -3476,9 +3516,22 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh; + bool removing; list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { - __mlxsw_sp_nexthop_neigh_update(nh, false); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + removing = false; + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + break; + default: + WARN_ON(1); + continue; + } + + __mlxsw_sp_nexthop_neigh_update(nh, removing); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } } From bf5345882bd18bc1b4966d170c0491ebe5c9a7d6 Mon Sep 17 00:00:00 2001 From: Vijaya Mohan Guvva Date: Fri, 3 Nov 2017 12:17:44 -0700 Subject: [PATCH 1815/2177] liquidio: Fix an issue with multiple switchdev enable disables Return success if the same dispatch function is being registered for a given opcode and subcode, there by allow multiple switchdev enable and disables. Signed-off-by: Vijaya Mohan Guvva Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/octeon_device.c | 4 ++++ drivers/net/ethernet/cavium/liquidio/octeon_droq.c | 4 ++-- drivers/net/ethernet/cavium/liquidio/octeon_droq.h | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index e4aa3395a578..2c615ab09e64 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1180,6 +1180,10 @@ octeon_register_dispatch_fn(struct octeon_device *oct, spin_unlock_bh(&oct->dispatch.lock); } else { + if (pfn == fn && + octeon_get_dispatch_arg(oct, opcode, subcode) == fn_arg) + return 0; + dev_err(&oct->pci_dev->dev, "Found previously registered dispatch fn for opcode/subcode: %x/%x\n", opcode, subcode); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 9372d4ce9954..3461d65ff4eb 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -52,8 +52,8 @@ struct __dispatch { * @return Failure: NULL * */ -static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, - u16 opcode, u16 subcode) +void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, + u16 opcode, u16 subcode) { int idx; struct list_head *dispatch; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index f91bc84d1719..815a9f56fd59 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -400,6 +400,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct, u16 subcode, octeon_dispatch_fn_t fn, void *fn_arg); +void *octeon_get_dispatch_arg(struct octeon_device *oct, + u16 opcode, u16 subcode); + void octeon_droq_print_stats(void); u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq); From 1ec6e53029e7fddd73bb92500a2dca05fc393e8d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Nov 2017 11:14:48 +0000 Subject: [PATCH 1816/2177] phylink: make local function phylink_phy_change() static Fixes the following sparse warnings: drivers/net/phy/phylink.c:570:6: warning: symbol 'phylink_phy_change' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 05c8f1c10e36..e3bbc70372d3 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -567,7 +567,8 @@ void phylink_destroy(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_destroy); -void phylink_phy_change(struct phy_device *phydev, bool up, bool do_carrier) +static void phylink_phy_change(struct phy_device *phydev, bool up, + bool do_carrier) { struct phylink *pl = phydev->phylink; From 9d917c207d6b212eb5fb4d8ca31826e5b75b4d28 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 2 Nov 2017 15:49:08 +0300 Subject: [PATCH 1817/2177] add support of IFF_XMIT_DST_RELEASE bit in vlan Some time ago Eric Dumazet suggested a "hack the IFF_XMIT_DST_RELEASE flag on the vlan netdev". But the last comment was "does not support properly bonding/team.(If the real_dev->privflags IFF_XMIT_DST_RELEASE bit changes, we want to update all the vlans at the same time )" I've extended that patch to support changes of IFF_XMIT_DST_RELEASE in bonding/team. Both bonding and team call netdev_change_features() after recalculation of features including priv_flags IFF_XMIT_DST_RELEASE bit. So the only thing needed to support is to recheck this bit in vlan_transfer_features(). Suggested-by: Eric Dumazet Signed-off-by: Vadim Fedorenko Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 +++ net/8021q/vlan_netlink.c | 1 + 2 files changed, 4 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 71c3e045505b..a0103500cc6d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif + vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE); + netdev_update_features(vlandev); } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 6e7c5a6a7930..6689c0b272a7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, vlan->vlan_proto = proto; vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); vlan->real_dev = real_dev; + dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE); vlan->flags = VLAN_FLAG_REORDER_HDR; err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); From 991a26af2e65e0422b2a899e373c0146f2436cae Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 2 Nov 2017 17:07:05 +0300 Subject: [PATCH 1818/2177] tcp_nv: use do_div() instead of expensive div64_u64() Average RTT is 32-bit thus full 64-bit division is redundant. Signed-off-by: Konstantin Khlebnikov Suggested-by: Stephen Hemminger Suggested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_nv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 0529e29e2941..0b5a05bd82e3 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -242,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) struct tcp_sock *tp = tcp_sk(sk); struct tcpnv *ca = inet_csk_ca(sk); unsigned long now = jiffies; - s64 rate64 = 0; + u64 rate64; u32 rate, max_win, cwnd_by_slope; u32 avg_rtt; u32 bytes_acked = 0; @@ -284,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) } /* rate in 100's bits per second */ - rate64 = ((u64)sample->in_flight) * 8000000; - rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100); + rate64 = ((u64)sample->in_flight) * 80000; + do_div(rate64, avg_rtt ?: 1); + rate = (u32)rate64; /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be From 0f04d057515275099c6e4f767b95a278be4681bf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 3 Nov 2017 08:09:45 +0000 Subject: [PATCH 1819/2177] net: sched: cls_u32: use bitwise & rather than logical && on n->flags Currently n->flags is being operated on by a logical && operator rather than a bitwise & operator. This looks incorrect as these should be bit flag operations. Fix this. Detected by CoverityScan, CID#1460398 ("Logical vs. bitwise operator") Fixes: 245dc5121a9b ("net: sched: cls_u32: call block callbacks for offload") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 86145867b424..2737b71854c9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -572,7 +572,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, n->flags |= TCA_CLS_FLAGS_IN_HW; } - if (skip_sw && !(n->flags && TCA_CLS_FLAGS_IN_HW)) + if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) return -EINVAL; return 0; From df7e8e2e3e59fe29c24f09d7c3b68e732d661af3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Nov 2017 08:27:14 -0700 Subject: [PATCH 1820/2177] pktgen: do not abuse IN6_ADDR_HSIZE pktgen accidentally used IN6_ADDR_HSIZE, instead of using the size of an IPv6 address. Since IN6_ADDR_HSIZE recently was increased from 16 to 256, this old bug is hitting us. Fixes: 3f27fb23219e ("ipv6: addrconf: add per netns perturbation in inet6_addr_hash()") Signed-off-by: Eric Dumazet Reported-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6e1e10ff433a..e3fa53a07d34 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) + pkt_dev->pkt_overhead; } - for (i = 0; i < IN6_ADDR_HSIZE; i++) + for (i = 0; i < sizeof(struct in6_addr); i++) if (pkt_dev->cur_in6_saddr.s6_addr[i]) { set = 1; break; From 27c565ae9d554fa1c00c799754cff43476c8d3b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Nov 2017 08:53:27 -0700 Subject: [PATCH 1821/2177] ipv6: remove IN6_ADDR_HSIZE from addrconf.h IN6_ADDR_HSIZE is private to addrconf.c, move it here to avoid confusion. Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 --- net/ipv6/addrconf.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3357332ea375..b623b65a79d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -59,9 +59,6 @@ struct in6_validator_info { struct netlink_ext_ack *extack; }; -#define IN6_ADDR_HSIZE_SHIFT 8 -#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) - int addrconf_init(void); void addrconf_cleanup(void); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 69b8cdb43aa2..66d8c3d912fd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -157,6 +157,8 @@ static int ipv6_generate_stable_address(struct in6_addr *addr, u8 dad_count, const struct inet6_dev *idev); +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) /* * Configured unicast address hash table */ From ee20598194500e82c477cf13e52b58e569446ed0 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 15:42:15 -0500 Subject: [PATCH 1822/2177] net/dcb: Add dscp to priority selector type IEEE specification P802.1Qcd/D2.1 defines priority selector 5. This APP TLV selector defines DSCP to priority map. This patch defines such DSCP selector. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/uapi/linux/dcbnl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index b6170a6af7c2..2c0c6453c3f4 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -206,6 +206,7 @@ struct cee_pfc { #define IEEE_8021QAZ_APP_SEL_STREAM 2 #define IEEE_8021QAZ_APP_SEL_DGRAM 3 #define IEEE_8021QAZ_APP_SEL_ANY 4 +#define IEEE_8021QAZ_APP_SEL_DSCP 5 /* This structure contains the IEEE 802.1Qaz APP managed object. This * object is also used for the CEE std as well. From c02762eb20cb57ec5b7c037b056c37d5838c803f Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:03:17 -0500 Subject: [PATCH 1823/2177] net/mlx5: QCAM register firmware command support The QCAM register provides capability bit for all the QoS registers using ACCESS_REG command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 10 +++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + .../net/ethernet/mellanox/mlx5/core/port.c | 12 ++++++ include/linux/mlx5/device.h | 14 +++++++ include/linux/mlx5/driver.h | 2 + include/linux/mlx5/mlx5_ifc.h | 40 ++++++++++++++++++- 6 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 2c71557d1cee..5ef1b56b6a96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -106,6 +106,13 @@ static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) MLX5_MCAM_REGS_FIRST_128); } +static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_qcam_reg(dev, dev->caps.qcam, + MLX5_QCAM_FEATURE_ENHANCED_FEATURES, + MLX5_QCAM_REGS_FIRST_128); +} + int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; @@ -182,6 +189,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, mcam_reg)) mlx5_get_mcam_reg(dev); + if (MLX5_CAP_GEN(dev, qcam_reg)) + mlx5_get_qcam_reg(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 8f00de2fe283..ff4a0b889a6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -122,6 +122,8 @@ int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, u8 access_reg_group); int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, u8 access_reg_group); +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e07061f565d6..b6553be841f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -98,6 +98,18 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); } +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(qcam_reg); + + MLX5_SET(qcam_reg, in, feature_group, feature_group); + MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0); +} + struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e32dbc4934db..6d79b3f79458 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1000,6 +1000,14 @@ enum mlx5_mcam_feature_groups { MLX5_MCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; +enum mlx5_qcam_reg_groups { + MLX5_QCAM_REGS_FIRST_128 = 0x0, +}; + +enum mlx5_qcam_feature_groups { + MLX5_QCAM_FEATURE_ENHANCED_FEATURES = 0x0, +}; + /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) @@ -1108,6 +1116,12 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) +#define MLX5_CAP_QCAM_REG(mdev, fld) \ + MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_access_reg_cap_mask.reg_cap.fld) + +#define MLX5_CAP_QCAM_FEATURE(mdev, fld) \ + MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_feature_cap_mask.feature_cap.fld) + #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 08c77b7e59cb..ed5be52282ea 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -109,6 +109,7 @@ enum { enum { MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, @@ -798,6 +799,7 @@ struct mlx5_core_dev { u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; + u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..f127c5b310c5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -838,7 +838,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cc_modify_allowed[0x1]; u8 start_pad[0x1]; u8 cache_line_128byte[0x1]; - u8 reserved_at_165[0xb]; + u8 reserved_at_165[0xa]; + u8 qcam_reg[0x1]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; @@ -7890,6 +7891,43 @@ struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_1c0[0x80]; }; +struct mlx5_ifc_qcam_access_reg_cap_mask { + u8 qcam_access_reg_cap_mask_127_to_20[0x6C]; + u8 qpdpm[0x1]; + u8 qcam_access_reg_cap_mask_18_to_4[0x0F]; + u8 qdpm[0x1]; + u8 qpts[0x1]; + u8 qcap[0x1]; + u8 qcam_access_reg_cap_mask_0[0x1]; +}; + +struct mlx5_ifc_qcam_qos_feature_cap_mask { + u8 qcam_qos_feature_cap_mask_127_to_1[0x7F]; + u8 qpts_trust_both[0x1]; +}; + +struct mlx5_ifc_qcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + u8 reserved_at_20[0x20]; + + union { + struct mlx5_ifc_qcam_access_reg_cap_mask reg_cap; + u8 reserved_at_0[0x80]; + } qos_access_reg_cap_mask; + + u8 reserved_at_c0[0x80]; + + union { + struct mlx5_ifc_qcam_qos_feature_cap_mask feature_cap; + u8 reserved_at_0[0x80]; + } qos_feature_cap_mask; + + u8 reserved_at_1c0[0x80]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; From 71c70eb21c33c60433b95e72a59d40bb128db649 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 2 Aug 2017 21:36:23 -0500 Subject: [PATCH 1824/2177] net/mlx5: Add MLX5_SET16 and MLX5_GET16 Add MLX5_SET16 and MLX5_GET16 for 16bit structure field in firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6d79b3f79458..409ffb14298a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -49,11 +49,15 @@ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) #define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld)) +#define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) +#define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0xf)) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld)) +#define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) +#define __mlx5_16_mask(typ, fld) (__mlx5_mask16(typ, fld) << __mlx5_16_bit_off(typ, fld)) #define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) @@ -116,6 +120,19 @@ __mlx5_mask(typ, fld)) ___t; \ }) +#define MLX5_GET16(typ, p, fld) ((be16_to_cpu(*((__be16 *)(p) +\ +__mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \ +__mlx5_mask16(typ, fld)) + +#define MLX5_SET16(typ, p, fld, v) do { \ + u16 _v = v; \ + BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 16); \ + *((__be16 *)(p) + __mlx5_16_off(typ, fld)) = \ + cpu_to_be16((be16_to_cpu(*((__be16 *)(p) + __mlx5_16_off(typ, fld))) & \ + (~__mlx5_16_mask(typ, fld))) | (((_v) & __mlx5_mask16(typ, fld)) \ + << __mlx5_16_bit_off(typ, fld))); \ +} while (0) + /* Big endian getters */ #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ __mlx5_64_off(typ, fld))) From 415a64aa8dc6b4fc478609c549ca652d95a12f13 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:08:46 -0500 Subject: [PATCH 1825/2177] net/mlx5: QPTS and QPDPM register firmware command support The QPTS register allows changing the priority trust state between pcp and dscp. Add support to get/set trust state from device. When the port is in pcp/dscp trust state, packet is routed by hardware to matching priority based on its pcp/dscp value respectively. The QPDPM register allow channing the dscp to priority mapping. Add support to get/set dscp to priority mapping from device. Note that to change a dscp mapping, the "e" bit of this dscp structure must be set in the QPDPM firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/port.c | 99 +++++++++++++++++++ include/linux/mlx5/driver.h | 7 ++ include/linux/mlx5/mlx5_ifc.h | 20 ++++ include/linux/mlx5/port.h | 5 + 4 files changed, 131 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index b6553be841f9..c37d00cd472a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -971,3 +971,102 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTPPSE, 0, 1); } + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + MLX5_SET(qpts_reg, in, trust_state, trust_state); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 1); + return err; +} + +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 0); + if (!err) + *trust_state = MLX5_GET(qpts_reg, out, trust_state); + + return err; +} + +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(qpdpm_reg, in, local_port, 1); + + /* Update the corresponding dscp entry */ + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +/* dscp2prio[i]: priority that dscp i mapped to */ +#define MLX5E_SUPPORTED_DSCP 64 +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) { + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]); + dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio); + } + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed5be52282ea..a886b51511ab 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,8 +107,10 @@ enum { }; enum { + MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_QPDPM = 0x4013, MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, @@ -142,6 +144,11 @@ enum { MLX5_REG_MCAM = 0x907f, }; +enum mlx5_qpts_trust_state { + MLX5_QPTS_TRUST_PCP = 1, + MLX5_QPTS_TRUST_DSCP = 2, +}; + enum mlx5_dcbx_oper_mode { MLX5E_DCBX_PARAM_VER_OPER_HOST = 0x0, MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f127c5b310c5..3e5363f760dd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8578,6 +8578,26 @@ struct mlx5_ifc_qetc_reg_bits { struct mlx5_ifc_ets_global_config_reg_bits global_configuration; }; +struct mlx5_ifc_qpdpm_dscp_reg_bits { + u8 e[0x1]; + u8 reserved_at_01[0x0b]; + u8 prio[0x04]; +}; + +struct mlx5_ifc_qpdpm_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; + struct mlx5_ifc_qpdpm_dscp_reg_bits dscp[64]; +}; + +struct mlx5_ifc_qpts_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2d]; + u8 trust_state[0x3]; +}; + struct mlx5_ifc_qtct_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index c59af8ab753a..035f0d4dc9fe 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -179,4 +179,9 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); #endif /* __MLX5_PORT_H__ */ From 2a5e7a1344f4dff71bb921ee0c9ecf7f5932e570 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:23:36 -0500 Subject: [PATCH 1826/2177] net/mlx5e: Add dcbnl dscp to priority support This patch implements dcbnl hooks to set and delete DSCP to priority map as defined by the DCB subsystem. Device maintains internal trust state which needs to be set to DSCP state for performing DSCP to priority mapping. When the first dscp to priority APP entry is added by the user, the trust state is changed to dscp. When the last dscp to priority APP entry is deleted by the user, the trust state is changed to pcp. If user sends multiple dscp to priority APP entries on the same dscp, the last sent one will take effect. All the previous sent will be deleted. The dscp to priority APP entries are added and deleted in the net/dcb APP database using dcb_ieee_setapp/getapp. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 15 +- .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 204 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 15 +- 3 files changed, 232 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e613ce02216d..ab6f0c18850f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,6 +57,7 @@ #define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) #define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) +#define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_NUM_TC 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 @@ -260,11 +261,17 @@ enum { struct mlx5e_dcbx { enum mlx5_dcbx_oper_mode mode; struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; /* The only setting that cannot be read from FW */ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; u8 cap; }; + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; #endif enum { @@ -742,6 +749,9 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx_dp dcbx_dp; +#endif /* priv data path fields - end */ unsigned long state; @@ -800,6 +810,8 @@ struct mlx5e_profile { mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; } rx_handlers; + void (*netdev_registered_init)(struct mlx5e_priv *priv); + void (*netdev_registered_remove)(struct mlx5e_priv *priv); int max_tc; }; @@ -968,6 +980,8 @@ extern const struct ethtool_ops mlx5e_ethtool_ops; extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); #endif #ifndef CONFIG_RFS_ACCEL @@ -1069,5 +1083,4 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 max_channels); - #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 51c4cc00a186..aa59c4324159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -46,6 +46,13 @@ enum { MLX5E_LOWEST_PRIO_GROUP = 0, }; +#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ + MLX5_CAP_QCAM_REG(mdev, qpts) && \ + MLX5_CAP_QCAM_REG(mdev, qpdpm)) + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state); +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio); + /* If dcbx mode is non-host set the dcbx mode to host. */ static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, @@ -381,6 +388,113 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) return 0; } +static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct dcb_app temp; + bool is_new; + int err; + + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) + return -EINVAL; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return -EINVAL; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EINVAL; + + if (app->protocol >= MLX5E_MAX_DSCP) + return -EINVAL; + + /* Save the old entry info */ + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + temp.protocol = app->protocol; + temp.priority = priv->dcbx_dp.dscp2prio[app->protocol]; + + /* Check if need to switch to dscp trust state */ + if (!priv->dcbx.dscp_app_cnt) { + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP); + if (err) + return err; + } + + /* Skip the fw command if new and old mapping are the same */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) { + err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority); + if (err) + goto fw_err; + } + + /* Delete the old entry if exists */ + is_new = false; + err = dcb_ieee_delapp(dev, &temp); + if (err) + is_new = true; + + /* Add new entry and update counter */ + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + if (is_new) + priv->dcbx.dscp_app_cnt++; + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) + return -EINVAL; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return -EINVAL; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EINVAL; + + if (app->protocol >= MLX5E_MAX_DSCP) + return -EINVAL; + + /* Skip if no dscp app entry */ + if (!priv->dcbx.dscp_app_cnt) + return -ENOENT; + + /* Check if the entry matches fw setting */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) + return -ENOENT; + + /* Delete the app entry */ + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + /* Reset the priority mapping back to zero */ + err = mlx5e_set_dscp2prio(priv, app->protocol, 0); + if (err) + goto fw_err; + + priv->dcbx.dscp_app_cnt--; + + /* Check if need to switch to pcp trust state */ + if (!priv->dcbx.dscp_app_cnt) + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, struct ieee_maxrate *maxrate) { @@ -740,6 +854,8 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .ieee_setapp = mlx5e_dcbnl_ieee_setapp, + .ieee_delapp = mlx5e_dcbnl_ieee_delapp, .getdcbx = mlx5e_dcbnl_getdcbx, .setdcbx = mlx5e_dcbnl_setdcbx, @@ -801,10 +917,98 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) mlx5e_dcbnl_ieee_setets_core(priv, &ets); } +enum { + INIT, + DELETE, +}; + +static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action) +{ + struct dcb_app temp; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return; + + /* No SEL_DSCP entry in non DSCP state */ + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP) + return; + + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + for (i = 0; i < MLX5E_MAX_DSCP; i++) { + temp.protocol = i; + temp.priority = priv->dcbx_dp.dscp2prio[i]; + if (action == INIT) + dcb_ieee_setapp(priv->netdev, &temp); + else + dcb_ieee_delapp(priv->netdev, &temp); + } + + priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0; +} + +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, INIT); +} + +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, DELETE); +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) +{ + int err; + + err = mlx5_set_trust_state(priv->mdev, trust_state); + if (err) + return err; + priv->dcbx_dp.trust_state = trust_state; + + return err; +} + +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) +{ + int err; + + err = mlx5_set_dscp2prio(priv->mdev, dscp, prio); + if (err) + return err; + + priv->dcbx_dp.dscp2prio[dscp] = prio; + return err; +} + +static int mlx5e_trust_initialize(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_DSCP_SUPPORTED(mdev)) + return 0; + + err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + if (err) + return err; + + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); + if (err) + return err; + + return 0; +} + void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) { struct mlx5e_dcbx *dcbx = &priv->dcbx; + mlx5e_trust_initialize(priv); + if (!MLX5_CAP_GEN(priv->mdev, qos)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 28ae00b3eb88..8633476fb536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4374,7 +4374,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (netdev->reg_state != NETREG_REGISTERED) return; - +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif /* Device already registered: sync netdev system state */ if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); @@ -4395,6 +4397,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); +#endif + rtnl_lock(); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); @@ -4615,6 +4622,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_detach; } +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif return priv; err_detach: @@ -4631,6 +4641,9 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; void *ppriv = priv->ppriv; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); From fbcb127e89ba8a4ccbec609a27f8d110474044c8 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:34:51 -0500 Subject: [PATCH 1827/2177] net/mlx5e: Support DSCP trust state to Ethernet's IP packet on SQ If the port is in DSCP trust state, packets are placed in the right priority queue based on the dscp value. This is done by selecting the transmit queue based on the dscp of the skb. Until now select_queue honors priority only from the vlan header. However that is not sufficient in cases where port trust state is DSCP mode as packet might not even contain vlan header. Therefore if the port is in dscp trust state and vport's min inline mode is not NONE, copy the IP header to the eseg's inline header if the skb has it. This is done by changing the transmit queue sq's min inline mode to L3. Note that the min inline mode of sqs that belong to other features such as xdpsq, icosq are not modified. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../ethernet/mellanox/mlx5/core/en_common.c | 12 ++++++ .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 37 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 5 +-- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 24 +++++++++++- 5 files changed, 73 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ab6f0c18850f..fae7b62d173f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1083,4 +1083,5 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 max_channels); +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 157d02917237..784e282803db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -171,3 +171,15 @@ out: return err; } + +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) +{ + u8 min_inline_mode; + + mlx5_query_min_inline(mdev, &min_inline_mode); + if (min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + min_inline_mode = MLX5_INLINE_MODE_L2; + + return min_inline_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index aa59c4324159..b402d69a701b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -960,6 +960,40 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) mlx5e_dcbnl_dscp_app(priv, DELETE); } +static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && + params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) + params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; +} + +static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + new_channels.params = priv->channels.params; + mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + + /* Skip if tx_min_inline is the same */ + if (new_channels.params.tx_min_inline_mode == + priv->channels.params.tx_min_inline_mode) + goto out; + + if (mlx5e_open_channels(priv, &new_channels)) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); +} + static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { int err; @@ -968,6 +1002,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) if (err) return err; priv->dcbx_dp.trust_state = trust_state; + mlx5e_trust_update_sq_inline_mode(priv); return err; } @@ -996,6 +1031,8 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) if (err) return err; + mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params); + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8633476fb536..a97ee38143aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4071,10 +4071,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* TX inline */ params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE && - !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - params->tx_min_inline_mode = MLX5_INLINE_MODE_L2; + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index a7c208a1ad83..de651de35c9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -32,6 +32,7 @@ #include #include +#include #include "en.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" @@ -86,6 +87,20 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) } } +#ifdef CONFIG_MLX5_CORE_EN_DCB +static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -97,8 +112,13 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (!netdev_get_num_tc(dev)) return channel_ix; - if (skb_vlan_tag_present(skb)) - up = skb->vlan_tci >> VLAN_PRIO_SHIFT; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) + up = mlx5e_get_dscp_up(priv, skb); + else +#endif + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc From 79c48764e1da40341b0e8149417c00efc9849b43 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 28 Jul 2015 09:35:31 +0300 Subject: [PATCH 1828/2177] net/mlx5e: Add support for ethtool msglvl support Use ethtool -s msglvl on/off to toggle debug messages. Signed-off-by: Gal Pressman Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 +++++++++++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 3 files changed, 25 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index fae7b62d173f..8c872e2e1aa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -127,6 +127,16 @@ #define MLX5E_NUM_MAIN_GROUPS 9 +#define MLX5E_MSG_LEVEL NETIF_MSG_LINK + +#define mlx5e_dbg(mlevel, priv, format, ...) \ +do { \ + if (NETIF_MSG_##mlevel & (priv)->msglevel) \ + netdev_warn(priv->netdev, format, \ + ##__VA_ARGS__); \ +} while (0) + + static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { switch (wq_type) { @@ -754,6 +764,7 @@ struct mlx5e_priv { #endif /* priv data path fields - end */ + u32 msglevel; unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b34aa8efb036..63d1ac695a75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1340,6 +1340,16 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static u32 mlx5e_get_msglevel(struct net_device *dev) +{ + return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; +} + +static void mlx5e_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; +} + static int mlx5e_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state) { @@ -1672,4 +1682,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, + .get_msglevel = mlx5e_get_msglevel, + .set_msglevel = mlx5e_set_msglevel, + }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a97ee38143aa..73d7c672c4ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4091,6 +4091,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->msglevel = MLX5E_MSG_LEVEL; priv->hard_mtu = MLX5E_ETH_HARD_MTU; mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); From 5da8bc3effb61f0f165ca45c80f4818a234c9f91 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Sun, 18 Jun 2017 09:47:35 +0300 Subject: [PATCH 1829/2177] net/mlx5e: DCBNL, Add debug messages log Add debug print when changing the configuration of QoS through dcbnl. Use ethtool -s msglvl hw on/off to toggle debug messages. Signed-off-by: Inbar Karmy Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index b402d69a701b..c6d90b6dd80e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -241,7 +241,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; u8 tc_group[IEEE_8021QAZ_MAX_TCS]; int max_tc = mlx5_max_tc(mdev); - int err; + int err, i; mlx5e_build_tc_group(ets, tc_group, max_tc); mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); @@ -260,6 +260,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return err; memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); + } + return err; } @@ -345,6 +353,11 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); mlx5_toggle_port_link(mdev); + if (!ret) { + mlx5e_dbg(HW, priv, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); + } return ret; } @@ -560,6 +573,11 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); + } + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); } @@ -585,6 +603,10 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; + mlx5e_dbg(HW, priv, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); } err = mlx5e_dbcnl_validate_ets(netdev, &ets); From 21b9c1449d21f347e57ba5e69eec460066e5182a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 12 Jan 2017 16:19:29 +0200 Subject: [PATCH 1830/2177] net/mlx5: Enlarge the NIC TC offload table size The NIC TC offload table size was hard coded to 1k. Change it to be min(max NIC RX table size, min(max flow counters, 64k) * num flow groups) where the max values are read from the firmware and the number of flow groups is hard-coded as before this change. We don't know upfront the division of flows to groups (== different masks). This setup allows each group to be of size up to the where we want to go (when supported, all offloaded flows use counters). Thus, we don't expect multiple occurences for a group which in turn would add steering hops. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9ba1f72060aa..55979ec2e88a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -90,8 +90,8 @@ enum { MLX5_HEADER_TYPE_NVGRE = 0x1, }; -#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16) struct mod_hdr_key { int num_actions; @@ -263,10 +263,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, MLX5E_TC_PRIO, - MLX5E_TC_TABLE_NUM_ENTRIES, + tc_tbl_size, MLX5E_TC_TABLE_NUM_GROUPS, 0, 0); if (IS_ERR(priv->fs.tc.t)) { From 4c5009c5256d065696d280f3a8f16af090bea3e2 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Wed, 18 Oct 2017 17:58:42 +0300 Subject: [PATCH 1831/2177] net/mlx5: Initialize destination_flow struct to 0 This is needed in order to enlarge it with more members that will get value of 0 when not set. Signed-off-by: Rabie Loulou Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 12d3ced61114..610d485c4b03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -92,7 +92,7 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; @@ -126,7 +126,7 @@ int mlx5e_arfs_disable(struct mlx5e_priv *priv) int mlx5e_arfs_enable(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; int err = 0; int tt; int i; @@ -175,7 +175,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; struct mlx5e_tir *tir = priv->indir_tir; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; enum mlx5e_traffic_types tt; @@ -466,7 +466,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; @@ -557,7 +557,7 @@ out: static void arfs_modify_rule_rq(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, u16 rxq) { - struct mlx5_flow_destination dst; + struct mlx5_flow_destination dst = {}; int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 850cdc980ab5..8016c8aa946d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -162,7 +162,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, u16 vid, struct mlx5_flow_spec *spec) { struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rule_p; MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; @@ -738,7 +738,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5e_ttc_table *ttc; struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; @@ -909,7 +909,7 @@ mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rules; struct mlx5e_ttc_table *ttc; struct mlx5_flow_table *ft; @@ -1106,7 +1106,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { struct mlx5_flow_table *ft = priv->fs.l2.ft.t; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c77f4c0c7769..bbb140f517c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -157,7 +157,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_spec *spec; void *mv_misc = NULL; void *mc_misc = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d9fd8570b07c..1143d80119bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -306,7 +306,7 @@ static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -395,7 +395,7 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; int err = 0; @@ -670,7 +670,7 @@ struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c7fa1389bace..c70fd663a633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -912,7 +912,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, struct mlx5_flow_table *new_next_ft, struct mlx5_flow_table *old_next_ft) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_rule *iter; int err = 0; @@ -1820,7 +1820,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); - struct mlx5_flow_destination gen_dest; + struct mlx5_flow_destination gen_dest = {}; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; From 458821c72bd02fcd484b9e46526c55e4ab6f57a4 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Thu, 7 Sep 2017 16:05:10 +0300 Subject: [PATCH 1832/2177] net/mlx5e: IPoIB, Add inner TTC table to IPoIB flow steering For supported platforms, add inner TTC flow table to enhanced IPoIB flow steering. Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8c872e2e1aa0..95facdf62c77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1045,6 +1045,9 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv); +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv); + int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, u32 underlay_qpn, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 8016c8aa946d..f0d11ad05ed2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1005,7 +1005,7 @@ err: return err; } -static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; struct mlx5_flow_table_attr ft_attr = {}; @@ -1041,7 +1041,7 @@ err: return err; } -static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index abf270d7f556..d2a66dc4adc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -255,15 +255,24 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_inner_ttc_table(priv); if (err) { - netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } + err = mlx5e_create_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_inner_ttc_table; + } + return 0; +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -273,6 +282,7 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); } From 0088cbbc4b66b287132a8a04b3e2509d44a6387c Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Tue, 26 Sep 2017 16:20:43 +0300 Subject: [PATCH 1833/2177] net/mlx5e: Enable CQE based moderation on TX CQ By using CQE based moderation on TX CQ we can reduce the number of TX interrupt rate. Besides the benefit of less interrupts, this also allows the kernel to better utilize TSO. Since TSO has some CPU overhead, it might not aggregate when CPU is under high stress. By reducing the interrupt rate and the CPU utilization, we can get better aggregation and better overall throughput. The feature is enabled by default and has a private flag in ethtool for control. Throughput, interrupt rate and TSO utilization improvements: (ConnectX-4Lx 40GbE, unidirectional, 1/16 TCP streams, 64B packets) --------------------------------------------------------- Metric | Streams | CQE Based | EQE Based | improvement --------------------------------------------------------- BW | 1 | 2.4Gb/s | 2.15Gb/s | +11.6% IR | 1 | 27Kips | 50.6Kips | -46.7% TSO Util | 1 | 74.6% | 71% | +5% BW | 16 | 29Gb/s | 25.85Gb/s | +12.2% IR | 16 | 482Kips | 745Kips | -35.3% TSO Util | 16 | 69.1% | 49% | +41.1% *BW = Bandwidth, IR = Interrupt rate, ips = interrupt per second. TSO Util = bytes in TSO sessions / all bytes transferred Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 ++++- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 39 +++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/en_main.c | 38 ++++++++++++------ .../ethernet/mellanox/mlx5/core/en_rx_am.c | 8 +++- 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 95facdf62c77..751f62cae969 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -106,6 +106,7 @@ #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 @@ -198,12 +199,14 @@ extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { "rx_cqe_moder", + "tx_cqe_moder", "rx_cqe_compress", }; enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), - MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1), + MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), + MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -223,6 +226,7 @@ enum mlx5e_priv_flag { struct mlx5e_cq_moder { u16 usec; u16 pkts; + u8 cq_period_mode; }; struct mlx5e_params { @@ -234,7 +238,6 @@ struct mlx5e_params { u8 log_rq_size; u16 num_channels; u8 num_tc; - u8 rx_cq_period_mode; bool rx_cqe_compress_def; struct mlx5e_cq_moder rx_cq_moderation; struct mlx5e_cq_moder tx_cq_moderation; @@ -926,6 +929,8 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 63d1ac695a75..23425f028405 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1454,29 +1454,36 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); -static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, + bool is_rx_cq) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - bool rx_mode_changed; - u8 rx_cq_period_mode; + bool mode_changed; + u8 cq_period_mode, current_cq_period_mode; int err = 0; - rx_cq_period_mode = enable ? + cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode; + current_cq_period_mode = is_rx_cq ? + priv->channels.params.rx_cq_moderation.cq_period_mode : + priv->channels.params.tx_cq_moderation.cq_period_mode; + mode_changed = cq_period_mode != current_cq_period_mode; - if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) return -EOPNOTSUPP; - if (!rx_mode_changed) + if (!mode_changed) return 0; new_channels.params = priv->channels.params; - mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode); + if (is_rx_cq) + mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode); + else + mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -1491,6 +1498,16 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, false); +} + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, true); +} + int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); @@ -1578,6 +1595,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) if (err) goto out; + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + set_pflag_tx_cqe_based_moder); + if (err) + goto out; + err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_CQE_COMPRESS, set_pflag_rx_cqe_compress); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 73d7c672c4ff..d1c3dc946486 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -681,7 +681,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = params->rx_cq_period_mode; + rq->am.mode = params->rx_cq_moderation.cq_period_mode; rq->page_cache.head = 0; rq->page_cache.tail = 0; @@ -1974,7 +1974,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, } mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_period_mode; + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -1986,8 +1986,7 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, @@ -3987,14 +3986,32 @@ static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) (pci_bw <= 16000) && (pci_bw < link_speed)); } +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + params->tx_cq_moderation.cq_period_mode = cq_period_mode; + + params->tx_cq_moderation.pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + params->tx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + params->tx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { - params->rx_cq_period_mode = cq_period_mode; + params->rx_cq_moderation.cq_period_mode = cq_period_mode; params->rx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) params->rx_cq_moderation.usec = @@ -4002,10 +4019,11 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) if (params->rx_am_enabled) params->rx_cq_moderation = - mlx5e_am_get_def_profile(params->rx_cq_period_mode); + mlx5e_am_get_def_profile(cq_period_mode); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) @@ -4065,9 +4083,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - - params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + mlx5e_set_tx_cq_mode_params(params, cq_period_mode); /* TX inline */ params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index acf32fe952cd..e401d9d245f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -63,7 +63,11 @@ profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = { static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix) { - return profile[cq_period_mode][ix]; + struct mlx5e_cq_moder cq_moder; + + cq_moder = profile[cq_period_mode][ix]; + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; } struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) @@ -75,7 +79,7 @@ struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */ default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE; - return profile[rx_cq_period_mode][default_profile_ix]; + return mlx5e_am_get_profile(rx_cq_period_mode, default_profile_ix); } /* Adaptive moderation logic */ From 7cbebc8a142238da3c2966f87b81ace491c8f089 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 2 Nov 2017 17:04:36 -0200 Subject: [PATCH 1834/2177] net: export peernet2id_alloc It will be used by openvswitch. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/core/net_namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6cfdc7c84c48..b797832565d3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } +EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) From 9354d452034273a50a4fd703bea31e5d6b1fc20b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 2 Nov 2017 17:04:37 -0200 Subject: [PATCH 1835/2177] openvswitch: reliable interface indentification in port dumps This patch allows reliable identification of netdevice interfaces connected to openvswitch bridges. In particular, user space queries the netdev interfaces belonging to the ports for statistics, up/down state, etc. Datapath dump needs to provide enough information for the user space to be able to do that. Currently, only interface names are returned. This is not sufficient, as openvswitch allows its ports to be in different name spaces and the interface name is valid only in its name space. What is needed and generally used in other netlink APIs, is the pair ifindex+netnsid. The solution is addition of the ifindex+netnsid pair (or only ifindex if in the same name space) to vport get/dump operation. On request side, ideally the ifindex+netnsid pair could be used to get/set/del the corresponding vport. This is not implemented by this patch and can be added later if needed. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 ++ net/openvswitch/datapath.c | 47 +++++++++++++++++++++++--------- net/openvswitch/datapath.h | 4 +-- net/openvswitch/dp_notify.c | 4 +-- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ffe397daad49..501e4c4e2a03 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -258,6 +258,8 @@ enum ovs_vport_attr { /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ OVS_VPORT_ATTR_PAD, + OVS_VPORT_ATTR_IFINDEX, + OVS_VPORT_ATTR_NETNSID, __OVS_VPORT_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index c3aec6227c91..4d38ac044cee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1848,7 +1848,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 portid, u32 seq, u32 flags, u8 cmd) + struct net *net, u32 portid, u32 seq, + u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -1864,9 +1865,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, - ovs_vport_name(vport))) + ovs_vport_name(vport)) || + nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; + if (!net_eq(net, dev_net(vport->dev))) { + int id = peernet2id_alloc(net, dev_net(vport->dev)); + + if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) + goto nla_put_failure; + } + ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats, @@ -1896,8 +1905,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void) } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, - u32 seq, u8 cmd) +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -1906,7 +1915,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; @@ -1920,6 +1929,8 @@ static struct vport *lookup_vport(struct net *net, struct datapath *dp; struct vport *vport; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return ERR_PTR(-EOPNOTSUPP); if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) @@ -1944,6 +1955,7 @@ static struct vport *lookup_vport(struct net *net, return vport; } else return ERR_PTR(-EINVAL); + } /* Called with ovs_mutex */ @@ -1983,6 +1995,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) return -EINVAL; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return -EOPNOTSUPP; port_no = a[OVS_VPORT_ATTR_PORT_NO] ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; @@ -2032,8 +2046,9 @@ restart: goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) update_headroom(dp); @@ -2090,8 +2105,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); ovs_unlock(); @@ -2128,8 +2144,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_DEL); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_DEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2169,8 +2186,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); rcu_read_unlock(); @@ -2202,6 +2220,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, + sock_net(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -2228,6 +2247,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, + [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, }; static const struct genl_ops dp_vport_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 480600649d0b..4a104ef9e12c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -200,8 +200,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, uint32_t cutlen); const char *ovs_dp_name(const struct datapath *dp); -struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, - u8 cmd); +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *, struct sw_flow_key *); diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 653d073bae45..f3ee2f2825c0 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport) struct datapath *dp; dp = vport->dp; - notify = ovs_vport_cmd_build_info(vport, 0, 0, - OVS_VPORT_CMD_DEL); + notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp), + 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, From 79e1ad148c844f5c8b9d76b36b26e3886dca95ae Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 2 Nov 2017 17:04:38 -0200 Subject: [PATCH 1836/2177] rtnetlink: use netnsid to query interface Currently, when an application gets netnsid from the kernel (for example as the result of RTM_GETLINK call on one end of the veth pair), it's not much useful. There's no reliable way to get to the netns fd from the netnsid, nor does any kernel API accept netnsid. Extend the RTM_GETLINK call to also accept netnsid. It will operate on the netns with the given netnsid in such case. Of course, the calling process needs to have enough capabilities in the target name space; for now, require CAP_NET_ADMIN. This can be relaxed in the future. To signal to the calling process that the kernel understood the new IFLA_IF_NETNSID attribute in the query, it will include it in the response. This is needed to detect older kernels, as they will just ignore IFLA_IF_NETNSID and query in the current name space. This patch implemetns IFLA_IF_NETNSID only for get and dump. For set operations, this can be extended later. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 103 +++++++++++++++++++++++++++++------ 2 files changed, 86 insertions(+), 18 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b3cf5639ac8f..19fc02660e0c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -160,6 +160,7 @@ enum { IFLA_XDP, IFLA_EVENT, IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index de24d394c69e..8a8c51937edf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -921,7 +921,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ - + + nla_total_size(4) /* IFLA_IF_NETNSID */ + + 0; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -1370,13 +1371,14 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, } static int rtnl_fill_link_netnsid(struct sk_buff *skb, - const struct net_device *dev) + const struct net_device *dev, + struct net *src_net) { if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); + int id = peernet2id_alloc(src_net, link_net); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; @@ -1427,10 +1429,11 @@ static int rtnl_fill_link_af(struct sk_buff *skb, return 0; } -static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, +static int rtnl_fill_ifinfo(struct sk_buff *skb, + struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event, int *new_nsid) + u32 event, int *new_nsid, int tgt_netnsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1448,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; + if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid)) + goto nla_put_failure; + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, @@ -1513,7 +1519,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev)) + if (rtnl_fill_link_netnsid(skb, dev, src_net)) goto nla_put_failure; if (new_nsid && @@ -1571,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_XDP] = { .type = NLA_NESTED }, [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, + [IFLA_IF_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1674,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev, return false; } +static struct net *get_target_net(struct sk_buff *skb, int netnsid) +{ + struct net *net; + + net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + if (!net) + return ERR_PTR(-EINVAL); + + /* For now, the caller is required to have CAP_NET_ADMIN in + * the user namespace owning the target net ns. + */ + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EACCES); + } + return net; +} + static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; int h, s_h; int idx = 0, s_idx; struct net_device *dev; @@ -1686,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) const struct rtnl_link_ops *kind_ops = NULL; unsigned int flags = NLM_F_MULTI; int master_idx = 0; + int netnsid = -1; int err; int hdrlen; @@ -1704,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) { + tgt_net = net; + netnsid = -1; + } + } + if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1719,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; - head = &net->dev_index_head[h]; + head = &tgt_net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) goto cont; if (idx < s_idx) goto cont; - err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + err = rtnl_fill_ifinfo(skb, dev, net, + RTM_NEWLINK, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0, NULL); + ext_filter_mask, 0, NULL, + netnsid); if (err < 0) { if (likely(skb->len)) @@ -1748,6 +1786,8 @@ out_err: cb->args[0] = h; cb->seq = net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -2360,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2454,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); @@ -2585,6 +2631,9 @@ replay: if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2818,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; struct ifinfomsg *ifm; char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; + int netnsid = -1; int err; u32 ext_filter_mask = 0; @@ -2830,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); + } + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + dev = __dev_get_by_name(tgt_net, ifname); else - return -EINVAL; + goto out; + err = -ENODEV; if (dev == NULL) - return -ENODEV; + goto out; + err = -ENOBUFS; nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) - return -ENOBUFS; + goto out; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); + err = rtnl_fill_ifinfo(nskb, dev, net, + RTM_NEWLINK, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, 0, 0, ext_filter_mask, + 0, NULL, netnsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); +out: + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -2948,8 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, - new_nsid); + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), + type, 0, 0, change, 0, 0, event, + new_nsid, -1); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); From f21506cb42112b1c0b391dae7a700e69a42128e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Nov 2017 13:52:24 +0100 Subject: [PATCH 1837/2177] dpaa_eth: avoid uninitialized variable false-positive warning We can now build this driver on ARM, so I ran into a randconfig build warning that presumably had existed on powerpc already. drivers/net/ethernet/freescale/dpaa/dpaa_eth.c: In function 'sg_fd_to_skb': drivers/net/ethernet/freescale/dpaa/dpaa_eth.c:1712:18: error: 'skb' may be used uninitialized in this function [-Werror=maybe-uninitialized] I'm slightly changing the logic here, to make it obvious to the compiler that 'skb' is always initialized. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 969f6b12952e..ebc55b6a6349 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1721,6 +1721,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, /* Iterate through the SGT entries and add data buffers to the skb */ sgt = vaddr + fd_off; + skb = NULL; for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) { /* Extension bit is not supported */ WARN_ON(qm_sg_entry_is_ext(&sgt[i])); @@ -1738,7 +1739,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size, DMA_FROM_DEVICE); - if (i == 0) { + if (!skb) { sz = dpaa_bp->size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); skb = build_skb(sg_vaddr, sz); From d0f36847016276920d860d5c089934ff3fea7e30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Nov 2017 06:09:17 -0700 Subject: [PATCH 1838/2177] tcp: tcp_mtu_probing() cleanup Reduce one indentation level to make code more readable. tcp_sync_mss() can be factorized. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 035a1ef1f2d8..16df6dd44b98 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -107,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { - struct net *net = sock_net(sk); + const struct net *net = sock_net(sk); + int mss; /* Black hole detection */ - if (net->ipv4.sysctl_tcp_mtu_probing) { - if (!icsk->icsk_mtup.enabled) { - icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } else { - struct net *net = sock_net(sk); - struct tcp_sock *tp = tcp_sk(sk); - int mss; + if (!net->ipv4.sysctl_tcp_mtu_probing) + return; - mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tp->tcp_header_len); - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } + if (!icsk->icsk_mtup.enabled) { + icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; + } else { + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } From 35e00da36cf42d54270ce25110ce304d12b18586 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Nov 2017 06:18:59 -0700 Subject: [PATCH 1839/2177] tcp: do not clear again skb->csum in tcp_init_nondata_skb() tcp_init_nondata_skb() is fed with freshly allocated skbs. They already have a cleared csum field, no need to clear it again. This is based on Neal review on commit 3b11775033dc ("tcp: do not mangle skb->cb[] in tcp_make_synack()"), noticing I did not clear skb->csum. Signed-off-by: Eric Dumazet Reported-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b98b2f7f07e7..a9d917e4dad5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -381,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; From 9525d69a36672f9505006b461fb5f7e8f580dfdd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 3 Nov 2017 09:31:53 -0500 Subject: [PATCH 1840/2177] net: plip: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114893 Addresses-Coverity-ID: 114894 Addresses-Coverity-ID: 114895 Addresses-Coverity-ID: 114896 Addresses-Coverity-ID: 114897 Addresses-Coverity-ID: 114898 Addresses-Coverity-ID: 114899 Addresses-Coverity-ID: 114900 Addresses-Coverity-ID: 114901 Addresses-Coverity-ID: 114902 Addresses-Coverity-ID: 114903 Addresses-Coverity-ID: 114904 Addresses-Coverity-ID: 114905 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/plip/plip.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 3c55ea357f35..feb92ecd1880 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -502,6 +502,7 @@ plip_receive(unsigned short nibble_timeout, struct net_device *dev, *data_p = (c0 >> 3) & 0x0f; write_data (dev, 0x10); /* send ACK */ *ns_p = PLIP_NB_1; + /* fall through */ case PLIP_NB_1: cx = nibble_timeout; @@ -597,6 +598,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, printk(KERN_DEBUG "%s: receive start\n", dev->name); rcv->state = PLIP_PK_LENGTH_LSB; rcv->nibble = PLIP_NB_BEGIN; + /* fall through */ case PLIP_PK_LENGTH_LSB: if (snd->state != PLIP_PK_DONE) { @@ -617,6 +619,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, return TIMEOUT; } rcv->state = PLIP_PK_LENGTH_MSB; + /* fall through */ case PLIP_PK_LENGTH_MSB: if (plip_receive(nibble_timeout, dev, @@ -639,6 +642,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, rcv->state = PLIP_PK_DATA; rcv->byte = 0; rcv->checksum = 0; + /* fall through */ case PLIP_PK_DATA: lbuf = rcv->skb->data; @@ -651,6 +655,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, rcv->checksum += lbuf[--rcv->byte]; } while (rcv->byte); rcv->state = PLIP_PK_CHECKSUM; + /* fall through */ case PLIP_PK_CHECKSUM: if (plip_receive(nibble_timeout, dev, @@ -663,6 +668,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, return ERROR; } rcv->state = PLIP_PK_DONE; + /* fall through */ case PLIP_PK_DONE: /* Inform the upper layer for the arrival of a packet. */ @@ -708,6 +714,7 @@ plip_send(unsigned short nibble_timeout, struct net_device *dev, case PLIP_NB_BEGIN: write_data (dev, data & 0x0f); *ns_p = PLIP_NB_1; + /* fall through */ case PLIP_NB_1: write_data (dev, 0x10 | (data & 0x0f)); @@ -722,6 +729,7 @@ plip_send(unsigned short nibble_timeout, struct net_device *dev, } write_data (dev, 0x10 | (data >> 4)); *ns_p = PLIP_NB_2; + /* fall through */ case PLIP_NB_2: write_data (dev, (data >> 4)); @@ -810,6 +818,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, &snd->nibble, snd->length.b.lsb)) return TIMEOUT; snd->state = PLIP_PK_LENGTH_MSB; + /* fall through */ case PLIP_PK_LENGTH_MSB: if (plip_send(nibble_timeout, dev, @@ -818,6 +827,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, snd->state = PLIP_PK_DATA; snd->byte = 0; snd->checksum = 0; + /* fall through */ case PLIP_PK_DATA: do { @@ -829,6 +839,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, snd->checksum += lbuf[--snd->byte]; } while (snd->byte); snd->state = PLIP_PK_CHECKSUM; + /* fall through */ case PLIP_PK_CHECKSUM: if (plip_send(nibble_timeout, dev, @@ -839,6 +850,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, dev_kfree_skb(snd->skb); dev->stats.tx_packets++; snd->state = PLIP_PK_DONE; + /* fall through */ case PLIP_PK_DONE: /* Close the connection */ @@ -927,6 +939,7 @@ plip_interrupt(void *dev_id) switch (nl->connection) { case PLIP_CN_CLOSING: netif_wake_queue (dev); + /* fall through */ case PLIP_CN_NONE: case PLIP_CN_SEND: rcv->state = PLIP_PK_TRIGGER; From 28e8c1914a20d020893978b67a6d2c618756bc3f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 3 Nov 2017 11:52:45 -0500 Subject: [PATCH 1841/2177] mISDN: l1oip_core: replace _manual_ swap with swap macro Make use of the swap macro and remove unnecessary variables skb and cnt. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/isdn/mISDN/l1oip_core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b5d590e378ac..e3654782a3e2 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -440,14 +440,8 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase, #ifdef REORDER_DEBUG if (hc->chan[channel].disorder_flag) { - struct sk_buff *skb; - int cnt; - skb = hc->chan[channel].disorder_skb; - hc->chan[channel].disorder_skb = nskb; - nskb = skb; - cnt = hc->chan[channel].disorder_cnt; - hc->chan[channel].disorder_cnt = rx_counter; - rx_counter = cnt; + swap(hc->chan[channel].disorder_skb, nskb); + swap(hc->chan[channel].disorder_cnt, rx_counter); } hc->chan[channel].disorder_flag ^= 1; if (nskb) From f4e63525ee35f9c02e9f51f90571718363e9a9a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:16 -0700 Subject: [PATCH 1842/2177] net: bpf: rename ndo_xdp to ndo_bpf ndo_xdp is a control path callback for setting up XDP in the driver. We can reuse it for other forms of communication between the eBPF stack and the drivers. Rename the callback and associated structures and definitions. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +- .../net/ethernet/cavium/thunder/nicvf_main.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +-- .../net/ethernet/mellanox/mlx4/en_netdev.c | 6 ++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +-- .../ethernet/netronome/nfp/nfp_net_common.c | 4 +-- drivers/net/ethernet/qlogic/qede/qede.h | 2 +- .../net/ethernet/qlogic/qede/qede_filter.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 +-- drivers/net/tun.c | 4 +-- drivers/net/virtio_net.c | 4 +-- include/linux/netdevice.h | 23 +++++++------ net/core/dev.c | 34 +++++++++---------- net/core/rtnetlink.c | 4 +-- 17 files changed, 56 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4e3d569bf32e..96416f5d97f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7775,7 +7775,7 @@ static const struct net_device_ops bnxt_netdev_ops = { #endif .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, - .ndo_xdp = bnxt_xdp, + .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, .ndo_get_phys_port_name = bnxt_get_phys_port_name diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 06ce63c00821..261e5847557a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -208,7 +208,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) return 0; } -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct bnxt *bp = netdev_priv(dev); int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 12a5ad66b564..414b748038ca 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -16,6 +16,6 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); #endif diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 71989e180289..a063c36c4c58 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1741,7 +1741,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) return 0; } -static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nicvf *nic = netdev_priv(netdev); @@ -1774,7 +1774,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, - .ndo_xdp = nicvf_xdp, + .ndo_bpf = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dfecaeda0654..05b94d87a6c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11648,12 +11648,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, } /** - * i40e_xdp - implements ndo_xdp for i40e + * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command **/ static int i40e_xdp(struct net_device *dev, - struct netdev_xdp *xdp) + struct netdev_bpf *xdp) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -11705,7 +11705,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, - .ndo_xdp = i40e_xdp, + .ndo_bpf = i40e_xdp, }; /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 507977994a03..e5dcb25be398 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10004,7 +10004,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return 0; } -static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -10113,7 +10113,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, - .ndo_xdp = ixgbe_xdp, + .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, .ndo_xdp_flush = ixgbe_xdp_flush, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d611df2f274d..736a6ccaf05e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2916,7 +2916,7 @@ static u32 mlx4_xdp_query(struct net_device *dev) return prog_id; } -static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -2958,7 +2958,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2995,7 +2995,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; struct mlx4_en_bond { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 28ae00b3eb88..3b7b7bb84eb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3831,7 +3831,7 @@ static u32 mlx5e_xdp_query(struct net_device *dev) return prog_id; } -static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -3883,7 +3883,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, + .ndo_bpf = mlx5e_xdp, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 185a3dd35a3f..f6c6ad4e8a59 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3378,7 +3378,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, return 0; } -static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nfp_net *nn = netdev_priv(netdev); @@ -3441,7 +3441,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_get_phys_port_name = nfp_port_get_phys_port_name, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, - .ndo_xdp = nfp_net_xdp, + .ndo_bpf = nfp_net_xdp, }; /** diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a3a70ade411f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -503,7 +503,7 @@ void qede_fill_rss_params(struct qede_dev *edev, void qede_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti); void qede_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti); -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp); #ifdef CONFIG_DCB void qede_set_dcbnl_ops(struct net_device *ndev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f79e36e4060a..c1a0708a7d7c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1065,7 +1065,7 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog) return 0; } -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct qede_dev *edev = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e5ee9f274a71..8f9b3eb82137 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -556,7 +556,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = qede_rx_flow_steer, #endif @@ -594,7 +594,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, }; /* ------------------------------------------------------------------------- diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8125956f62a1..1a326b697221 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1141,7 +1141,7 @@ static u32 tun_xdp_query(struct net_device *dev) return 0; } -static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -1185,7 +1185,7 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, - .ndo_xdp = tun_xdp, + .ndo_bpf = tun_xdp, }; static void tun_flow_init(struct tun_struct *tun) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fc059f193e7d..edf984406ba0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2088,7 +2088,7 @@ static u32 virtnet_xdp_query(struct net_device *dev) return 0; } -static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -2115,7 +2115,7 @@ static const struct net_device_ops virtnet_netdev = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif - .ndo_xdp = virtnet_xdp, + .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7de7656550c2..9af9feaaeb64 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,10 +779,10 @@ enum tc_setup_type { TC_SETUP_CBS, }; -/* These structures hold the attributes of xdp state that are being passed - * to the netdevice through the xdp op. +/* These structures hold the attributes of bpf state that are being passed + * to the netdevice through the bpf op. */ -enum xdp_netdev_command { +enum bpf_netdev_command { /* Set or clear a bpf program used in the earliest stages of packet * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee * is responsible for calling bpf_prog_put on any old progs that are @@ -801,8 +801,8 @@ enum xdp_netdev_command { struct netlink_ext_ack; -struct netdev_xdp { - enum xdp_netdev_command command; +struct netdev_bpf { + enum bpf_netdev_command command; union { /* XDP_SETUP_PROG */ struct { @@ -1124,9 +1124,10 @@ struct dev_ifalias { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. - * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); * This function is used to set or query state related to XDP on the - * netdevice. See definition of enum xdp_netdev_command for details. + * netdevice and manage BPF offload. See definition of + * enum bpf_netdev_command for details. * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. @@ -1315,8 +1316,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); - int (*ndo_xdp)(struct net_device *dev, - struct netdev_xdp *xdp); + int (*ndo_bpf)(struct net_device *dev, + struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); void (*ndo_xdp_flush)(struct net_device *dev); @@ -3311,10 +3312,10 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 1423cf4d695c..10cde58d3275 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4545,7 +4545,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) +static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); struct bpf_prog *new = xdp->prog; @@ -7090,26 +7090,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); xdp.command = XDP_QUERY_PROG; /* Query must always succeed. */ - WARN_ON(xdp_op(dev, &xdp) < 0); + WARN_ON(bpf_op(dev, &xdp) < 0); if (prog_id) *prog_id = xdp.prog_id; return xdp.prog_attached; } -static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, +static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -7120,7 +7120,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, xdp.flags = flags; xdp.prog = prog; - return xdp_op(dev, &xdp); + return bpf_op(dev, &xdp); } /** @@ -7137,24 +7137,24 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, { const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - xdp_op_t xdp_op, xdp_chk; + bpf_op_t bpf_op, bpf_chk; int err; ASSERT_RTNL(); - xdp_op = xdp_chk = ops->ndo_xdp; - if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + bpf_op = bpf_chk = ops->ndo_bpf; + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) return -EOPNOTSUPP; - if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) - xdp_op = generic_xdp_install; - if (xdp_op == xdp_chk) - xdp_chk = generic_xdp_install; + if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) + bpf_op = generic_xdp_install; + if (bpf_op == bpf_chk) + bpf_chk = generic_xdp_install; if (fd >= 0) { - if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) + if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, xdp_op, NULL)) + __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); @@ -7162,7 +7162,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return PTR_ERR(prog); } - err = dev_xdp_install(dev, xdp_op, extack, flags, prog); + err = dev_xdp_install(dev, bpf_op, extack, flags, prog); if (err < 0 && prog) bpf_prog_put(prog); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8a8c51937edf..dc5ad84ac096 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1270,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) *prog_id = generic_xdp_prog->aux->id; return XDP_ATTACHED_SKB; } - if (!ops->ndo_xdp) + if (!ops->ndo_bpf) return XDP_ATTACHED_NONE; - return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); + return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) From ab3f0063c48c26c927851b6767824e35a716d878 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:17 -0700 Subject: [PATCH 1843/2177] bpf: offload: add infrastructure for loading programs for a specific netdev The fact that we don't know which device the program is going to be used on is quite limiting in current eBPF infrastructure. We have to reverse or limit the changes which kernel makes to the loaded bytecode if we want it to be offloaded to a networking device. We also have to invent new APIs for debugging and troubleshooting support. Make it possible to load programs for a specific netdev. This helps us to bring the debug information closer to the core eBPF infrastructure (e.g. we will be able to reuse the verifer log in device JIT). It allows device JITs to perform translation on the original bytecode. __bpf_prog_get() when called to get a reference for an attachment point will now refuse to give it if program has a device assigned. Following patches will add a version of that function which passes the expected netdev in. @type argument in __bpf_prog_get() is renamed to attach_type to make it clearer that it's only set on attachment. All calls to ndo_bpf are protected by rtnl, only verifier callbacks are not. We need a wait queue to make sure netdev doesn't get destroyed while verifier is still running and calling its driver. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 36 +++++++ include/linux/bpf_verifier.h | 10 ++ include/linux/netdevice.h | 14 +++ include/uapi/linux/bpf.h | 1 + kernel/bpf/Makefile | 1 + kernel/bpf/core.c | 10 +- kernel/bpf/offload.c | 182 +++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 17 +++- kernel/bpf/verifier.c | 15 ++- 9 files changed, 278 insertions(+), 8 deletions(-) create mode 100644 kernel/bpf/offload.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 520aeebe0d93..e45d43f9ec92 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -15,6 +15,7 @@ #include #include #include +#include struct perf_event; struct bpf_prog; @@ -182,6 +183,16 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); }; +struct bpf_dev_offload { + struct bpf_prog *prog; + struct net_device *netdev; + void *dev_priv; + struct list_head offloads; + bool dev_state; + bool verifier_running; + wait_queue_head_t verifier_done; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -199,6 +210,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_dev_offload *offload; union { struct work_struct work; struct rcu_head rcu; @@ -317,6 +329,7 @@ extern const struct file_operations bpf_prog_fops; #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; @@ -491,6 +504,29 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } #endif /* CONFIG_BPF_SYSCALL */ +int bpf_prog_offload_compile(struct bpf_prog *prog); +void bpf_prog_offload_destroy(struct bpf_prog *prog); + +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return aux->offload; +} +#else +static inline int bpf_prog_offload_init(struct bpf_prog *prog, + union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return false; +} +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ + #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3b0976aaac75..e45011dbc02d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -153,6 +153,7 @@ struct bpf_verifier_env { struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ @@ -169,6 +170,15 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return env->cur_state->regs; } +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); +#else +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +{ + return -EOPNOTSUPP; +} +#endif + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9af9feaaeb64..fda527ccb263 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -797,8 +797,13 @@ enum bpf_netdev_command { * is equivalent to XDP_ATTACHED_DRV. */ XDP_QUERY_PROG, + /* BPF program for offload callbacks, invoked at program load time. */ + BPF_OFFLOAD_VERIFIER_PREP, + BPF_OFFLOAD_TRANSLATE, + BPF_OFFLOAD_DESTROY, }; +struct bpf_ext_analyzer_ops; struct netlink_ext_ack; struct netdev_bpf { @@ -815,6 +820,15 @@ struct netdev_bpf { u8 prog_attached; u32 prog_id; }; + /* BPF_OFFLOAD_VERIFIER_PREP */ + struct { + struct bpf_prog *prog; + const struct bpf_ext_analyzer_ops *ops; /* callee set */ + } verifier; + /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ + struct { + struct bpf_prog *prog; + } offload; }; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9820677c2ff..80d191a93fb0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -260,6 +260,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 16e95c8e749e..e691da0b3bab 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_BPF_SYSCALL) += disasm.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o +obj-$(CONFIG_BPF_SYSCALL) += offload.o ifeq ($(CONFIG_STREAM_PARSER),y) obj-$(CONFIG_BPF_SYSCALL) += sockmap.o endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7fe448799d76..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1380,7 +1380,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * valid program, which in this case would simply not * be JITed, but falls back to the interpreter. */ - fp = bpf_int_jit_compile(fp); + if (!bpf_prog_is_dev_bound(fp->aux)) { + fp = bpf_int_jit_compile(fp); + } else { + *err = bpf_prog_offload_compile(fp); + if (*err) + return fp; + } bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at @@ -1549,6 +1555,8 @@ static void bpf_prog_free_deferred(struct work_struct *work) struct bpf_prog_aux *aux; aux = container_of(work, struct bpf_prog_aux, work); + if (bpf_prog_is_dev_bound(aux)) + bpf_prog_offload_destroy(aux->prog); bpf_jit_free(aux->prog); } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c new file mode 100644 index 000000000000..5553e0e2f8b1 --- /dev/null +++ b/kernel/bpf/offload.c @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include + +/* protected by RTNL */ +static LIST_HEAD(bpf_prog_offload_devs); + +int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_dev_offload *offload; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (attr->prog_flags) + return -EINVAL; + + offload = kzalloc(sizeof(*offload), GFP_USER); + if (!offload) + return -ENOMEM; + + offload->prog = prog; + init_waitqueue_head(&offload->verifier_done); + + rtnl_lock(); + offload->netdev = __dev_get_by_index(net, attr->prog_target_ifindex); + if (!offload->netdev) { + rtnl_unlock(); + kfree(offload); + return -EINVAL; + } + + prog->aux->offload = offload; + list_add_tail(&offload->offloads, &bpf_prog_offload_devs); + rtnl_unlock(); + + return 0; +} + +static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, + struct netdev_bpf *data) +{ + struct net_device *netdev = prog->aux->offload->netdev; + + ASSERT_RTNL(); + + if (!netdev) + return -ENODEV; + if (!netdev->netdev_ops->ndo_bpf) + return -EOPNOTSUPP; + + data->command = cmd; + + return netdev->netdev_ops->ndo_bpf(netdev, data); +} + +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +{ + struct netdev_bpf data = {}; + int err; + + data.verifier.prog = env->prog; + + rtnl_lock(); + err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data); + if (err) + goto exit_unlock; + + env->dev_ops = data.verifier.ops; + + env->prog->aux->offload->dev_state = true; + env->prog->aux->offload->verifier_running = true; +exit_unlock: + rtnl_unlock(); + return err; +} + +static void __bpf_prog_offload_destroy(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + struct netdev_bpf data = {}; + + data.offload.prog = prog; + + if (offload->verifier_running) + wait_event(offload->verifier_done, !offload->verifier_running); + + if (offload->dev_state) + WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); + + offload->dev_state = false; + list_del_init(&offload->offloads); + offload->netdev = NULL; +} + +void bpf_prog_offload_destroy(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + + offload->verifier_running = false; + wake_up(&offload->verifier_done); + + rtnl_lock(); + __bpf_prog_offload_destroy(prog); + rtnl_unlock(); + + kfree(offload); +} + +static int bpf_prog_offload_translate(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + struct netdev_bpf data = {}; + int ret; + + data.offload.prog = prog; + + offload->verifier_running = false; + wake_up(&offload->verifier_done); + + rtnl_lock(); + ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); + rtnl_unlock(); + + return ret; +} + +static unsigned int bpf_prog_warn_on_exec(const void *ctx, + const struct bpf_insn *insn) +{ + WARN(1, "attempt to execute device eBPF program on the host!"); + return 0; +} + +int bpf_prog_offload_compile(struct bpf_prog *prog) +{ + prog->bpf_func = bpf_prog_warn_on_exec; + + return bpf_prog_offload_translate(prog); +} + +const struct bpf_prog_ops bpf_offload_prog_ops = { +}; + +static int bpf_offload_notification(struct notifier_block *notifier, + ulong event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct bpf_dev_offload *offload, *tmp; + + ASSERT_RTNL(); + + switch (event) { + case NETDEV_UNREGISTER: + list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, + offloads) { + if (offload->netdev == netdev) + __bpf_prog_offload_destroy(offload->prog); + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block bpf_offload_notifier = { + .notifier_call = bpf_offload_notification, +}; + +static int __init bpf_offload_init(void) +{ + register_netdevice_notifier(&bpf_offload_notifier); + return 0; +} + +subsys_initcall(bpf_offload_init); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 323be2473c4b..1574b9f0f24e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -824,7 +824,10 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) return -EINVAL; - prog->aux->ops = bpf_prog_types[type]; + if (!bpf_prog_is_dev_bound(prog->aux)) + prog->aux->ops = bpf_prog_types[type]; + else + prog->aux->ops = &bpf_offload_prog_ops; prog->type = type; return 0; } @@ -1054,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) +static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type) { struct fd f = fdget(ufd); struct bpf_prog *prog; @@ -1062,7 +1065,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) prog = ____bpf_prog_get(f); if (IS_ERR(prog)) return prog; - if (type && prog->type != *type) { + if (attach_type && (prog->type != *attach_type || prog->aux->offload)) { prog = ERR_PTR(-EINVAL); goto out; } @@ -1089,7 +1092,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_name +#define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex static int bpf_prog_load(union bpf_attr *attr) { @@ -1152,6 +1155,12 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; + if (attr->prog_target_ifindex) { + err = bpf_prog_offload_init(prog, attr); + if (err) + goto free_prog; + } + /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 04357ad5a812..51aabb32ad67 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3736,10 +3736,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { - if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) - return 0; + if (env->analyzer_ops && env->analyzer_ops->insn_hook) + return env->analyzer_ops->insn_hook(env, insn_idx, + prev_insn_idx); + if (env->dev_ops && env->dev_ops->insn_hook) + return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); - return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); + return 0; } static int do_check(struct bpf_verifier_env *env) @@ -4516,6 +4519,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; + if (env->prog->aux->offload) { + ret = bpf_prog_offload_verifier_prep(env); + if (ret) + goto err_unlock; + } + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; From bd601b6ada11fdfb9e277f24ad2eb54bc599156b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:18 -0700 Subject: [PATCH 1844/2177] bpf: report offload info to user space Extend struct bpf_prog_info to contain information about program being bound to a device. Since the netdev may get destroyed while program still exists we need a flag to indicate the program is loaded for a device, even if the device is gone. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/offload.c | 12 ++++++++++++ kernel/bpf/syscall.c | 5 +++++ 4 files changed, 24 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e45d43f9ec92..98bacd0fa5cc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -506,6 +506,7 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); +u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 80d191a93fb0..4455dd195201 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -895,6 +895,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -908,6 +912,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 5553e0e2f8b1..2816feb38be1 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -144,6 +144,18 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) return bpf_prog_offload_translate(prog); } +u32 bpf_prog_offload_ifindex(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + u32 ifindex; + + rtnl_lock(); + ifindex = offload->netdev ? offload->netdev->ifindex : 0; + rtnl_unlock(); + + return ifindex; +} + const struct bpf_prog_ops bpf_offload_prog_ops = { }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1574b9f0f24e..3217c20ea91b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1592,6 +1592,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } + if (bpf_prog_is_dev_bound(prog->aux)) { + info.status |= BPF_PROG_STATUS_DEV_BOUND; + info.ifindex = bpf_prog_offload_ifindex(prog); + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) From 928631e05495fa1f0e9775f555b94dbcbb4e2fb5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:19 -0700 Subject: [PATCH 1845/2177] bpftool: print program device bound info If program is bound to a device, print the name of the relevant interface or unknown if the netdev has since been removed. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 31 +++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++++ 2 files changed, 38 insertions(+) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 250f80fd46aa..d3ab808dc882 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,21 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + jsonw_name(json_wtr, "dev"); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + jsonw_printf(json_wtr, "\"ifindex:%d\"", + info->ifindex); + else + jsonw_printf(json_wtr, "\"%s\"", name); + } else { + jsonw_printf(json_wtr, "\"unknown\""); + } + } + if (info->load_time) { char buf[32]; @@ -274,6 +290,21 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + printf(" "); + + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + printf("dev "); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + printf("ifindex:%d ", info->ifindex); + else + printf("%s ", name); + } else { + printf("unknown "); + } + } printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7cebba491011..e92f62cf933a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -259,6 +259,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -893,6 +894,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -906,6 +911,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { From 248f346ffe9508dee0039db4ac839cb31ba3bdec Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:20 -0700 Subject: [PATCH 1846/2177] xdp: allow attaching programs loaded for specific device Pass the netdev pointer to bpf_prog_get_type(). This way BPF code can decide whether the device matches what the code was loaded/translated for. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 ++++++++++ kernel/bpf/syscall.c | 33 +++++++++++++++++++++++++++++---- net/core/dev.c | 6 +++++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98bacd0fa5cc..c397934f91dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -335,6 +335,8 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, + struct net_device *netdev); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -428,6 +430,14 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, + enum bpf_prog_type type, + struct net_device *netdev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3217c20ea91b..68f9123acd39 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1057,7 +1057,22 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type) +static bool bpf_prog_can_attach(struct bpf_prog *prog, + enum bpf_prog_type *attach_type, + struct net_device *netdev) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + + if (prog->type != *attach_type) + return false; + if (offload && offload->netdev != netdev) + return false; + + return true; +} + +static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, + struct net_device *netdev) { struct fd f = fdget(ufd); struct bpf_prog *prog; @@ -1065,7 +1080,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type) prog = ____bpf_prog_get(f); if (IS_ERR(prog)) return prog; - if (attach_type && (prog->type != *attach_type || prog->aux->offload)) { + if (attach_type && !bpf_prog_can_attach(prog, attach_type, netdev)) { prog = ERR_PTR(-EINVAL); goto out; } @@ -1078,12 +1093,12 @@ out: struct bpf_prog *bpf_prog_get(u32 ufd) { - return __bpf_prog_get(ufd, NULL); + return __bpf_prog_get(ufd, NULL, NULL); } struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, NULL); if (!IS_ERR(prog)) trace_bpf_prog_get_type(prog); @@ -1091,6 +1106,16 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) } EXPORT_SYMBOL_GPL(bpf_prog_get_type); +struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, + struct net_device *netdev) +{ + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, netdev); + + if (!IS_ERR(prog)) + trace_bpf_prog_get_type(prog); + return prog; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex diff --git a/net/core/dev.c b/net/core/dev.c index 10cde58d3275..30b5fe32c525 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7157,7 +7157,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (bpf_op == ops->ndo_bpf) + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + dev); + else + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } From 6c8dfe21c435cf2953e3cee43e12180cbc4f0820 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:21 -0700 Subject: [PATCH 1847/2177] cls_bpf: allow attaching programs loaded for specific device If TC program is loaded with skip_sw flag, we should allow the device-specific programs to be accepted. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 1 + net/sched/cls_bpf.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 68f9123acd39..416d70cdfc76 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1115,6 +1115,7 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, trace_bpf_prog_get_type(prog); return prog; } +EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bc3edde1b9d7..dc9bd9a0070b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -374,7 +374,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) } static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, - const struct tcf_proto *tp) + u32 gen_flags, const struct tcf_proto *tp) { struct bpf_prog *fp; char *name = NULL; @@ -382,7 +382,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, + qdisc_dev(tp->q)); + else + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -440,7 +444,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : - cls_bpf_prog_from_efd(tb, prog, tp); + cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); if (ret < 0) return ret; From 012bb8a8b5a2688590f829884acc83697d68a96d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:22 -0700 Subject: [PATCH 1848/2177] nfp: bpf: drop support for cls_bpf with legacy actions Only support BPF_PROG_TYPE_SCHED_CLS programs in direct action mode. This simplifies preparing the offload since there will now be only one mode of operation for that type of program. We need to know the attachment mode type of cls_bpf programs, because exit codes are interpreted differently for legacy vs DA mode. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 87 ++------------ drivers/net/ethernet/netronome/nfp/bpf/main.c | 33 ++---- drivers/net/ethernet/netronome/nfp/bpf/main.h | 30 +---- .../net/ethernet/netronome/nfp/bpf/offload.c | 108 +----------------- .../net/ethernet/netronome/nfp/bpf/verifier.c | 11 +- 5 files changed, 22 insertions(+), 247 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 2609a2487100..e1907a1d269e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -201,47 +201,6 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) BR_CSS_NONE, addr, defer); } -static void -__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, - u8 byte, bool equal, u16 addr, u8 defer, bool src_lmextn) -{ - u16 addr_lo, addr_hi; - u64 insn; - - addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); - addr_hi = addr != addr_lo; - - insn = OP_BBYTE_BASE | - FIELD_PREP(OP_BB_A_SRC, areg) | - FIELD_PREP(OP_BB_BYTE, byte) | - FIELD_PREP(OP_BB_B_SRC, breg) | - FIELD_PREP(OP_BB_I8, imm8) | - FIELD_PREP(OP_BB_EQ, equal) | - FIELD_PREP(OP_BB_DEFBR, defer) | - FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | - FIELD_PREP(OP_BB_ADDR_HI, addr_hi) | - FIELD_PREP(OP_BB_SRC_LMEXTN, src_lmextn); - - nfp_prog_push(nfp_prog, insn); -} - -static void -emit_br_byte_neq(struct nfp_prog *nfp_prog, - swreg src, u8 imm, u8 byte, u16 addr, u8 defer) -{ - struct nfp_insn_re_regs reg; - int err; - - err = swreg_to_restricted(reg_none(), src, reg_imm(imm), ®, true); - if (err) { - nfp_prog->error = err; - return; - } - - __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, - defer, reg.src_lmextn); -} - static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, @@ -1547,7 +1506,7 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) { if (meta->ptr.type == PTR_TO_CTX) { - if (nfp_prog->act == NN_ACT_XDP) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) return mem_ldx_xdp(nfp_prog, meta, size); else return mem_ldx_skb(nfp_prog, meta, size); @@ -2022,34 +1981,6 @@ static void nfp_intro(struct nfp_prog *nfp_prog) plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); } -static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) -{ - const u8 act2code[] = { - [NN_ACT_TC_DROP] = 0x22, - [NN_ACT_TC_REDIR] = 0x24 - }; - /* Target for aborts */ - nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - wrp_immed(nfp_prog, reg_both(0), 0); - - /* Target for normal exits */ - nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); - /* Legacy TC mode: - * 0 0x11 -> pass, count as stat0 - * -1 drop 0x22 -> drop, count as stat1 - * redir 0x24 -> redir, count as stat1 - * ife mark 0x21 -> pass, count as stat1 - * ife + tx 0x24 -> redir, count as stat1 - */ - emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); - wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); - - emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), - SHF_SC_L_SHF, 16); -} - static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) { /* TC direct-action mode: @@ -2142,17 +2073,15 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) static void nfp_outro(struct nfp_prog *nfp_prog) { - switch (nfp_prog->act) { - case NN_ACT_DIRECT: + switch (nfp_prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: nfp_outro_tc_da(nfp_prog); break; - case NN_ACT_TC_DROP: - case NN_ACT_TC_REDIR: - nfp_outro_tc_legacy(nfp_prog); - break; - case NN_ACT_XDP: + case BPF_PROG_TYPE_XDP: nfp_outro_xdp(nfp_prog); break; + default: + WARN_ON(1); } } @@ -2351,7 +2280,6 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) * nfp_bpf_jit() - translate BPF code into NFP assembly * @filter: kernel BPF filter struct * @prog_mem: memory to store assembler instructions - * @act: action attached to this eBPF program * @prog_start: offset of the first instruction when loaded * @prog_done: where to jump on exit * @prog_sz: size of @prog_mem in instructions @@ -2359,7 +2287,6 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) */ int nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, - enum nfp_bpf_action_type act, unsigned int prog_start, unsigned int prog_done, unsigned int prog_sz, struct nfp_bpf_result *res) { @@ -2371,7 +2298,7 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, return -ENOMEM; INIT_LIST_HEAD(&nfp_prog->insns); - nfp_prog->act = act; + nfp_prog->type = filter->type; nfp_prog->start_off = prog_start; nfp_prog->tgt_done = prog_done; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 8e3e89cace8d..2ff97f12c160 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -85,34 +85,10 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } -static int -nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) -{ - struct nfp_net_bpf_priv *priv; - int ret; - - priv = kmalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - nn->app_priv = priv; - spin_lock_init(&priv->rx_filter_lock); - priv->nn = nn; - timer_setup(&priv->rx_filter_stats_timer, - nfp_net_filter_stats_timer, 0); - - ret = nfp_app_nic_vnic_alloc(app, nn, id); - if (ret) - kfree(priv); - - return ret; -} - static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) { if (nn->dp.bpf_offload_xdp) nfp_bpf_xdp_offload(app, nn, NULL); - kfree(nn->app_priv); } static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, @@ -133,6 +109,13 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, if (nn->dp.bpf_offload_xdp) return -EBUSY; + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + nn_err(nn, "only direct action with no legacy actions supported\n"); + return -EOPNOTSUPP; + } + return nfp_net_bpf_offload(nn, cls_bpf); default: return -EOPNOTSUPP; @@ -184,7 +167,7 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, - .vnic_alloc = nfp_bpf_vnic_alloc, + .vnic_alloc = nfp_app_nic_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index bc604030ff6c..c5280de2ab14 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -65,13 +65,6 @@ enum pkt_vec { PKT_VEC_PKT_PTR = 2, }; -enum nfp_bpf_action_type { - NN_ACT_TC_DROP, - NN_ACT_TC_REDIR, - NN_ACT_DIRECT, - NN_ACT_XDP, -}; - #define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) #define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) @@ -147,7 +140,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @prog: machine code * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array - * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @type: BPF program type * @num_regs: number of registers used by this program * @regs_per_thread: number of basic registers allocated per thread * @start_off: address of the first instruction in the memory @@ -164,7 +157,7 @@ struct nfp_prog { unsigned int prog_len; unsigned int __prog_alloc_len; - enum nfp_bpf_action_type act; + enum bpf_prog_type type; unsigned int num_regs; unsigned int regs_per_thread; @@ -188,7 +181,7 @@ struct nfp_bpf_result { }; int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, +nfp_bpf_jit(struct bpf_prog *filter, void *prog, unsigned int prog_start, unsigned int prog_done, unsigned int prog_sz, struct nfp_bpf_result *res); @@ -197,23 +190,6 @@ int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); struct nfp_net; struct tc_cls_bpf_offload; -/** - * struct nfp_net_bpf_priv - per-vNIC BPF private data - * @rx_filter: Filter offload statistics - dropped packets/bytes - * @rx_filter_prev: Filter offload statistics - values from previous update - * @rx_filter_change: Jiffies when statistics last changed - * @rx_filter_stats_timer: Timer for polling filter offload statistics - * @rx_filter_lock: Lock protecting timer state changes (teardown) - */ -struct nfp_net_bpf_priv { - struct nfp_stat_pair rx_filter, rx_filter_prev; - unsigned long rx_filter_change; - struct timer_list rx_filter_stats_timer; - struct nfp_net *nn; - spinlock_t rx_filter_lock; -}; - int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); -void nfp_net_filter_stats_timer(struct timer_list *t); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 6d576f631392..b9b5d675c4d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,92 +51,6 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" -void nfp_net_filter_stats_timer(struct timer_list *t) -{ - struct nfp_net_bpf_priv *priv = from_timer(priv, t, - rx_filter_stats_timer); - struct nfp_net *nn = priv->nn; - struct nfp_stat_pair latest; - - spin_lock_bh(&priv->rx_filter_lock); - - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); - - spin_unlock_bh(&priv->rx_filter_lock); - - latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); - - if (latest.pkts != priv->rx_filter.pkts) - priv->rx_filter_change = jiffies; - - priv->rx_filter = latest; -} - -static void nfp_net_bpf_stats_reset(struct nfp_net *nn) -{ - struct nfp_net_bpf_priv *priv = nn->app_priv; - - priv->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - priv->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); - priv->rx_filter_prev = priv->rx_filter; - priv->rx_filter_change = jiffies; -} - -static int -nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) -{ - struct nfp_net_bpf_priv *priv = nn->app_priv; - u64 bytes, pkts; - - pkts = priv->rx_filter.pkts - priv->rx_filter_prev.pkts; - bytes = priv->rx_filter.bytes - priv->rx_filter_prev.bytes; - bytes -= pkts * ETH_HLEN; - - priv->rx_filter_prev = priv->rx_filter; - - tcf_exts_stats_update(cls_bpf->exts, - bytes, pkts, priv->rx_filter_change); - - return 0; -} - -static int -nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) -{ - const struct tc_action *a; - LIST_HEAD(actions); - - if (!cls_bpf->exts) - return NN_ACT_XDP; - - /* TC direct action */ - if (cls_bpf->exts_integrated) { - if (!tcf_exts_has_actions(cls_bpf->exts)) - return NN_ACT_DIRECT; - - return -EOPNOTSUPP; - } - - /* TC legacy mode */ - if (!tcf_exts_has_one_action(cls_bpf->exts)) - return -EOPNOTSUPP; - - tcf_exts_to_list(cls_bpf->exts, &actions); - list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) - return NN_ACT_TC_DROP; - - if (is_tcf_mirred_egress_redirect(a) && - tcf_mirred_ifindex(a) == nn->dp.netdev->ifindex) - return NN_ACT_TC_REDIR; - } - - return -EOPNOTSUPP; -} - static int nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf, @@ -144,17 +58,11 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, void **code, dma_addr_t *dma_addr, u16 max_instr) { unsigned int code_sz = max_instr * sizeof(u64); - enum nfp_bpf_action_type act; unsigned int stack_size; u16 start_off, done_off; unsigned int max_mtu; int ret; - ret = nfp_net_bpf_get_act(nn, cls_bpf); - if (ret < 0) - return ret; - act = ret; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); @@ -175,7 +83,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, if (!*code) return -ENOMEM; - ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + ret = nfp_bpf_jit(cls_bpf->prog, *code, start_off, done_off, max_instr, res); if (ret) goto out; @@ -193,7 +101,6 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, unsigned int code_sz, unsigned int n_instr, bool dense_mode) { - struct nfp_net_bpf_priv *priv = nn->app_priv; u64 bpf_addr = dma_addr; int err; @@ -218,25 +125,15 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, nn_err(nn, "FW command error while enabling BPF: %d\n", err); dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); - - nfp_net_bpf_stats_reset(nn); - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; - spin_lock_bh(&priv->rx_filter_lock); nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; - spin_unlock_bh(&priv->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); - - del_timer_sync(&priv->rx_filter_stats_timer); nn->dp.bpf_offload_skip_sw = 0; return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); @@ -292,9 +189,6 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) case TC_CLSBPF_DESTROY: return nfp_net_bpf_stop(nn); - case TC_CLSBPF_STATS: - return nfp_net_bpf_stats_update(nn, cls_bpf); - default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index a8c7615546a9..4f31bdefd331 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -81,7 +81,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; u64 imm; - if (nfp_prog->act == NN_ACT_XDP) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) return 0; if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { @@ -94,13 +94,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } imm = reg0->var_off.value; - if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) { - pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, imm); - return -EINVAL; - } - - if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT && + if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS && + imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", From 94508438e8ea4391696c5171527678e9dbd08789 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:23 -0700 Subject: [PATCH 1849/2177] nfp: bpf: remove the register renumbering leftovers The register renumbering was removed and will not be coming back in its old, naive form, given that it would be fundamentally incompatible with calling functions. Remove the leftovers. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 4 ---- drivers/net/ethernet/netronome/nfp/bpf/main.h | 6 ------ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 13 ++++--------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e1907a1d269e..ff150c27f411 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2314,9 +2314,6 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, if (ret) goto out; - nfp_prog->num_regs = MAX_BPF_REG; - nfp_prog->regs_per_thread = 32; - nfp_prog->prog = prog_mem; nfp_prog->__prog_alloc_len = prog_sz; @@ -2331,7 +2328,6 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, ret = nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)prog_mem); res->n_instr = nfp_prog->prog_len; - res->dense_mode = false; out: nfp_prog_free(nfp_prog); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index c5280de2ab14..85b7d9398cda 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -141,8 +141,6 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array * @type: BPF program type - * @num_regs: number of registers used by this program - * @regs_per_thread: number of basic registers allocated per thread * @start_off: address of the first instruction in the memory * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) @@ -159,9 +157,6 @@ struct nfp_prog { enum bpf_prog_type type; - unsigned int num_regs; - unsigned int regs_per_thread; - unsigned int start_off; unsigned int tgt_out; unsigned int tgt_abort; @@ -177,7 +172,6 @@ struct nfp_prog { struct nfp_bpf_result { unsigned int n_instr; - bool dense_mode; }; int diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index b9b5d675c4d3..268ba1ba82db 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -98,19 +98,14 @@ out: static void nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, void *code, dma_addr_t dma_addr, - unsigned int code_sz, unsigned int n_instr, - bool dense_mode) + unsigned int code_sz, unsigned int n_instr) { - u64 bpf_addr = dma_addr; int err; nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); - if (dense_mode) - bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; - nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); - nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); /* Load up the JITed code */ err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); @@ -169,7 +164,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) nfp_net_bpf_stop(nn); nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); + res.n_instr); return 0; case TC_CLSBPF_ADD: @@ -183,7 +178,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); + res.n_instr); return 0; case TC_CLSBPF_DESTROY: From 5559eedb78127d6b76c36e3918a75bbc2801653a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:24 -0700 Subject: [PATCH 1850/2177] nfp: bpf: remove unnecessary include of nfp_net.h BPF offload's main header does not need to include nfp_net.h. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 85b7d9398cda..9f0df6a9786d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -41,7 +41,6 @@ #include #include "../nfp_asm.h" -#include "../nfp_net.h" /* For branch fixup logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! From 9ce7a956327ad6c14e1a7eb9f4cb5300c8b61db6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:25 -0700 Subject: [PATCH 1851/2177] nfp: bpf: refactor offload logic We currently create a fake cls_bpf offload object when we want to offload XDP. Simplify and clarify the code by moving the TC/XDP specific logic out of common offload code. This is easy now that we don't support legacy TC actions. We only need the bpf program and state of the skip_sw flag. Temporarily set @code to NULL in nfp_net_bpf_offload(), compilers seem to have trouble recognizing it's always initialized. Next patches will eliminate that variable. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 63 +++++++------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 4 +- .../net/ethernet/netronome/nfp/bpf/offload.c | 83 ++++++++----------- 3 files changed, 70 insertions(+), 80 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 2ff97f12c160..9e1286346d42 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -54,28 +54,25 @@ static int nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { - struct tc_cls_bpf_offload cmd = { - .prog = prog, - }; + bool running, xdp_running; int ret; if (!nfp_net_ebpf_capable(nn)) return -EINVAL; - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) { - if (!nn->dp.bpf_offload_xdp) - return prog ? -EBUSY : 0; - cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY; - } else { - if (!prog) - return 0; - cmd.command = TC_CLSBPF_ADD; - } + running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + xdp_running = running && nn->dp.bpf_offload_xdp; - ret = nfp_net_bpf_offload(nn, &cmd); + if (!prog && !xdp_running) + return 0; + if (prog && running && !xdp_running) + return -EBUSY; + + ret = nfp_net_bpf_offload(nn, prog, running, true); /* Stop offload if replace not possible */ - if (ret && cmd.command == TC_CLSBPF_REPLACE) + if (ret && prog) nfp_bpf_xdp_offload(app, nn, NULL); + nn->dp.bpf_offload_xdp = prog && !ret; return ret; } @@ -96,27 +93,33 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, { struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; + bool skip_sw; - if (!tc_can_offload(nn->dp.netdev)) + if (type != TC_SETUP_CLSBPF || + !tc_can_offload(nn->dp.netdev) || + !nfp_net_ebpf_capable(nn) || + cls_bpf->common.protocol != htons(ETH_P_ALL) || + cls_bpf->common.chain_index) return -EOPNOTSUPP; + if (nn->dp.bpf_offload_xdp) + return -EBUSY; - switch (type) { - case TC_SETUP_CLSBPF: - if (!nfp_net_ebpf_capable(nn) || - cls_bpf->common.protocol != htons(ETH_P_ALL) || - cls_bpf->common.chain_index) - return -EOPNOTSUPP; - if (nn->dp.bpf_offload_xdp) - return -EBUSY; + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + nn_err(nn, "only direct action with no legacy actions supported\n"); + return -EOPNOTSUPP; + } - /* Only support TC direct action */ - if (!cls_bpf->exts_integrated || - tcf_exts_has_actions(cls_bpf->exts)) { - nn_err(nn, "only direct action with no legacy actions supported\n"); - return -EOPNOTSUPP; - } + skip_sw = !!(cls_bpf->gen_flags & TCA_CLS_FLAGS_SKIP_SW); - return nfp_net_bpf_offload(nn, cls_bpf); + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + return nfp_net_bpf_offload(nn, cls_bpf->prog, true, !skip_sw); + case TC_CLSBPF_ADD: + return nfp_net_bpf_offload(nn, cls_bpf->prog, false, !skip_sw); + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_offload(nn, NULL, true, !skip_sw); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 9f0df6a9786d..6dddab95d57a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -181,8 +181,8 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog, int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); struct nfp_net; -struct tc_cls_bpf_offload; -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, bool sw_fallback); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 268ba1ba82db..c09efa1a9649 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -52,8 +52,7 @@ #include "../nfp_net.h" static int -nfp_net_bpf_offload_prepare(struct nfp_net *nn, - struct tc_cls_bpf_offload *cls_bpf, +nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, struct nfp_bpf_result *res, void **code, dma_addr_t *dma_addr, u16 max_instr) { @@ -73,9 +72,9 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; - if (cls_bpf->prog->aux->stack_depth > stack_size) { + if (prog->aux->stack_depth > stack_size) { nn_info(nn, "stack too large: program %dB > FW stack %dB\n", - cls_bpf->prog->aux->stack_depth, stack_size); + prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } @@ -83,8 +82,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, if (!*code) return -ENOMEM; - ret = nfp_bpf_jit(cls_bpf->prog, *code, start_off, done_off, - max_instr, res); + ret = nfp_bpf_jit(prog, *code, start_off, done_off, max_instr, res); if (ret) goto out; @@ -96,13 +94,13 @@ out: } static void -nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, +nfp_net_bpf_load_and_start(struct nfp_net *nn, bool sw_fallback, void *code, dma_addr_t dma_addr, unsigned int code_sz, unsigned int n_instr) { int err; - nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + nn->dp.bpf_offload_skip_sw = !sw_fallback; nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -134,7 +132,8 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, bool sw_fallback) { struct nfp_bpf_result res; dma_addr_t dma_addr; @@ -142,49 +141,37 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) void *code; int err; + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (prog && old_prog && nn->dp.bpf_offload_skip_sw) + return -EBUSY; + + /* Something else is loaded, different program type? */ + if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + code = NULL; - switch (cls_bpf->command) { - case TC_CLSBPF_REPLACE: - /* There is nothing stopping us from implementing seamless - * replace but the simple method of loading I adopted in - * the firmware does not handle atomic replace (i.e. we have to - * stop the BPF offload and re-enable it). Leaking-in a few - * frames which didn't have BPF applied in the hardware should - * be fine if software fallback is available, though. - */ - if (nn->dp.bpf_offload_skip_sw) - return -EBUSY; - - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + if (prog) { + err = nfp_net_bpf_offload_prepare(nn, prog, &res, &code, &dma_addr, max_instr); if (err) return err; - - nfp_net_bpf_stop(nn); - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr); - return 0; - - case TC_CLSBPF_ADD: - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) - return -EBUSY; - - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, - &dma_addr, max_instr); - if (err) - return err; - - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr); - return 0; - - case TC_CLSBPF_DESTROY: - return nfp_net_bpf_stop(nn); - - default: - return -EOPNOTSUPP; } + + if (old_prog) + nfp_net_bpf_stop(nn); + + if (prog) + nfp_net_bpf_load_and_start(nn, sw_fallback, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr); + + return 0; } From e4a91cd565e2c4e299abe9eb906c506ecc01032a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:26 -0700 Subject: [PATCH 1852/2177] nfp: bpf: require seamless reload for program replace Firmware supports live replacement of programs for quite some time now. Remove the software-fallback related logic and depend on the FW for program replace. Seamless reload will become a requirement if maps are present, anyway. Load and start stages have to be split now, since replace only needs a load, start has already been done on add. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 11 ++-- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 +- .../net/ethernet/netronome/nfp/bpf/offload.c | 62 +++++++++---------- drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 - 4 files changed, 35 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 9e1286346d42..7ae7528cd96b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -68,7 +68,7 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, if (prog && running && !xdp_running) return -EBUSY; - ret = nfp_net_bpf_offload(nn, prog, running, true); + ret = nfp_net_bpf_offload(nn, prog, running); /* Stop offload if replace not possible */ if (ret && prog) nfp_bpf_xdp_offload(app, nn, NULL); @@ -93,7 +93,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, { struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; - bool skip_sw; if (type != TC_SETUP_CLSBPF || !tc_can_offload(nn->dp.netdev) || @@ -111,15 +110,13 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, return -EOPNOTSUPP; } - skip_sw = !!(cls_bpf->gen_flags & TCA_CLS_FLAGS_SKIP_SW); - switch (cls_bpf->command) { case TC_CLSBPF_REPLACE: - return nfp_net_bpf_offload(nn, cls_bpf->prog, true, !skip_sw); + return nfp_net_bpf_offload(nn, cls_bpf->prog, true); case TC_CLSBPF_ADD: - return nfp_net_bpf_offload(nn, cls_bpf->prog, false, !skip_sw); + return nfp_net_bpf_offload(nn, cls_bpf->prog, false); case TC_CLSBPF_DESTROY: - return nfp_net_bpf_offload(nn, NULL, true, !skip_sw); + return nfp_net_bpf_offload(nn, NULL, true); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 6dddab95d57a..df56f40fea7c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -183,6 +183,6 @@ int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); struct nfp_net; int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, - bool old_prog, bool sw_fallback); + bool old_prog); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index c09efa1a9649..f4b9a46c844d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -94,14 +94,11 @@ out: } static void -nfp_net_bpf_load_and_start(struct nfp_net *nn, bool sw_fallback, - void *code, dma_addr_t dma_addr, - unsigned int code_sz, unsigned int n_instr) +nfp_net_bpf_load(struct nfp_net *nn, void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr) { int err; - nn->dp.bpf_offload_skip_sw = !sw_fallback; - nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -110,14 +107,19 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, bool sw_fallback, if (err) nn_err(nn, "FW command error while loading BPF: %d\n", err); + dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); +} + +static void nfp_net_bpf_start(struct nfp_net *nn) +{ + int err; + /* Enable passing packets through BPF function */ nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF; nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); if (err) nn_err(nn, "FW command error while enabling BPF: %d\n", err); - - dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); } static int nfp_net_bpf_stop(struct nfp_net *nn) @@ -127,13 +129,12 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); - nn->dp.bpf_offload_skip_sw = 0; return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, - bool old_prog, bool sw_fallback) + bool old_prog) { struct nfp_bpf_result res; dma_addr_t dma_addr; @@ -141,37 +142,34 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, void *code; int err; - /* There is nothing stopping us from implementing seamless - * replace but the simple method of loading I adopted in - * the firmware does not handle atomic replace (i.e. we have to - * stop the BPF offload and re-enable it). Leaking-in a few - * frames which didn't have BPF applied in the hardware should - * be fine if software fallback is available, though. - */ - if (prog && old_prog && nn->dp.bpf_offload_skip_sw) - return -EBUSY; + if (prog && old_prog) { + u8 cap; + + cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); + if (!(cap & NFP_NET_BPF_CAP_RELO)) { + nn_err(nn, "FW does not support live reload\n"); + return -EBUSY; + } + } /* Something else is loaded, different program type? */ if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return -EBUSY; + if (old_prog && !prog) + return nfp_net_bpf_stop(nn); + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); - code = NULL; - if (prog) { - err = nfp_net_bpf_offload_prepare(nn, prog, &res, &code, - &dma_addr, max_instr); - if (err) - return err; - } + err = nfp_net_bpf_offload_prepare(nn, prog, &res, &code, &dma_addr, + max_instr); + if (err) + return err; - if (old_prog) - nfp_net_bpf_stop(nn); - - if (prog) - nfp_net_bpf_load_and_start(nn, sw_fallback, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr); + nfp_net_bpf_load(nn, code, dma_addr, max_instr * sizeof(u64), + res.n_instr); + if (!old_prog) + nfp_net_bpf_start(nn); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 3d411f0d15b6..7f9857c276b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -476,7 +476,6 @@ struct nfp_stat_pair { * @dev: Backpointer to struct device * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? - * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @bpf_offload_xdp: Offloaded BPF program is XDP * @chained_metadata_format: Firemware will use new metadata format * @rx_dma_dir: Mapping direction for RX buffers @@ -502,7 +501,6 @@ struct nfp_net_dp { struct net_device *netdev; u8 is_vf:1; - u8 bpf_offload_skip_sw:1; u8 bpf_offload_xdp:1; u8 chained_metadata_format:1; From c1c88eae8a8155c55dbbc7363f1f127c43e1b5d1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:27 -0700 Subject: [PATCH 1853/2177] nfp: bpf: move program prepare and free into offload.c Most of offload/translation prepare logic will be moved to offload.c. To help git generate more reasonable diffs move nfp_prog_prepare() and nfp_prog_free() functions there as a first step. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 33 ------------------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 5 +++ .../net/ethernet/netronome/nfp/bpf/offload.c | 33 +++++++++++++++++++ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index ff150c27f411..2eddbb45fd60 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -77,17 +77,6 @@ nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return meta->l.prev != &nfp_prog->insns; } -static void nfp_prog_free(struct nfp_prog *nfp_prog) -{ - struct nfp_insn_meta *meta, *tmp; - - list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { - list_del(&meta->l); - kfree(meta); - } - kfree(nfp_prog); -} - static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) { if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { @@ -2127,28 +2116,6 @@ static int nfp_translate(struct nfp_prog *nfp_prog) return nfp_fixup_branches(nfp_prog); } -static int -nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, - unsigned int cnt) -{ - unsigned int i; - - for (i = 0; i < cnt; i++) { - struct nfp_insn_meta *meta; - - meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - return -ENOMEM; - - meta->insn = prog[i]; - meta->n = i; - - list_add_tail(&meta->l, &nfp_prog->insns); - } - - return 0; -} - /* --- Optimizations --- */ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index df56f40fea7c..b77231a134b9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -173,6 +173,11 @@ struct nfp_bpf_result { unsigned int n_instr; }; +int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt); +void nfp_prog_free(struct nfp_prog *nfp_prog); + int nfp_bpf_jit(struct bpf_prog *filter, void *prog, unsigned int prog_start, unsigned int prog_done, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index f4b9a46c844d..3eeee200051e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,6 +51,39 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" +int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + static int nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, struct nfp_bpf_result *res, From 9314c442d7ddf749d29c09ab48ffa5333d2bf48e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:28 -0700 Subject: [PATCH 1854/2177] nfp: bpf: move translation prepare to offload.c struct nfp_prog is currently only used internally by the translator. This means there is a lot of parameter passing going on, between the translator and different stages of offload. Simplify things by allocating nfp_prog in offload.c already. We will now use kmalloc() to allocate the program area and only DMA map it for the time of loading (instead of allocating DMA coherent memory upfront). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 43 +----- drivers/net/ethernet/netronome/nfp/bpf/main.h | 14 +- .../net/ethernet/netronome/nfp/bpf/offload.c | 128 ++++++++++++------ 3 files changed, 94 insertions(+), 91 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 2eddbb45fd60..eae7a137a7a8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2245,58 +2245,27 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) /** * nfp_bpf_jit() - translate BPF code into NFP assembly + * @nfp_prog: nfp_prog prepared based on @filter * @filter: kernel BPF filter struct - * @prog_mem: memory to store assembler instructions - * @prog_start: offset of the first instruction when loaded - * @prog_done: where to jump on exit - * @prog_sz: size of @prog_mem in instructions - * @res: achieved parameters of translation results */ -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res) +int nfp_bpf_jit(struct nfp_prog *nfp_prog, struct bpf_prog *filter) { - struct nfp_prog *nfp_prog; int ret; - nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); - if (!nfp_prog) - return -ENOMEM; - - INIT_LIST_HEAD(&nfp_prog->insns); - nfp_prog->type = filter->type; - nfp_prog->start_off = prog_start; - nfp_prog->tgt_done = prog_done; - - ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); - if (ret) - goto out; - ret = nfp_prog_verify(nfp_prog, filter); if (ret) - goto out; + return ret; ret = nfp_bpf_optimize(nfp_prog); if (ret) - goto out; - - nfp_prog->prog = prog_mem; - nfp_prog->__prog_alloc_len = prog_sz; + return ret; ret = nfp_translate(nfp_prog); if (ret) { pr_err("Translation failed with error %d (translated: %u)\n", ret, nfp_prog->n_translated); - ret = -EINVAL; - goto out; + return -EINVAL; } - ret = nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)prog_mem); - - res->n_instr = nfp_prog->prog_len; -out: - nfp_prog_free(nfp_prog); - - return ret; + return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index b77231a134b9..36b4eda2d3f8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -169,19 +169,7 @@ struct nfp_prog { struct list_head insns; }; -struct nfp_bpf_result { - unsigned int n_instr; -}; - -int -nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, - unsigned int cnt); -void nfp_prog_free(struct nfp_prog *nfp_prog); - -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res); +int nfp_bpf_jit(struct nfp_prog *nfp_prog, struct bpf_prog *filter); int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 3eeee200051e..c5546c0e87d8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,7 +51,7 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" -int +static int nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, unsigned int cnt) { @@ -73,7 +73,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, return 0; } -void nfp_prog_free(struct nfp_prog *nfp_prog) +static void nfp_prog_free(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta, *tmp; @@ -84,25 +84,36 @@ void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -static int -nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, - struct nfp_bpf_result *res, - void **code, dma_addr_t *dma_addr, u16 max_instr) +static struct nfp_prog *nfp_bpf_verifier_prep(struct bpf_prog *prog) { - unsigned int code_sz = max_instr * sizeof(u64); - unsigned int stack_size; - u16 start_off, done_off; - unsigned int max_mtu; + struct nfp_prog *nfp_prog; int ret; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (max_mtu < nn->dp.netdev->mtu) { - nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); - return -EOPNOTSUPP; - } + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return NULL; - start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->type = prog->type; + + ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); + if (ret) + goto err_free; + + return nfp_prog; + +err_free: + nfp_prog_free(nfp_prog); + + return NULL; +} + +static int +nfp_bpf_translate(struct nfp_net *nn, struct nfp_prog *nfp_prog, + struct bpf_prog *prog) +{ + unsigned int stack_size; + unsigned int max_instr; stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; if (prog->aux->stack_depth > stack_size) { @@ -111,28 +122,68 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, return -EOPNOTSUPP; } - *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL); - if (!*code) + nfp_prog->stack_depth = prog->aux->stack_depth; + nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); + + nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + if (!nfp_prog->prog) return -ENOMEM; - ret = nfp_bpf_jit(prog, *code, start_off, done_off, max_instr, res); - if (ret) - goto out; + return nfp_bpf_jit(nfp_prog, prog); +} + +static void nfp_bpf_destroy(struct nfp_prog *nfp_prog) +{ + kfree(nfp_prog->prog); + nfp_prog_free(nfp_prog); +} + +static struct nfp_prog * +nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, + dma_addr_t *dma_addr) +{ + struct nfp_prog *nfp_prog; + unsigned int max_mtu; + int err; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->dp.netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return NULL; + } + + nfp_prog = nfp_bpf_verifier_prep(prog); + if (!nfp_prog) + return NULL; + + err = nfp_bpf_translate(nn, nfp_prog, prog); + if (err) + goto err_destroy_prog; + + *dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, *dma_addr)) + goto err_destroy_prog; return 0; -out: - dma_free_coherent(nn->dp.dev, code_sz, *code, *dma_addr); - return ret; +err_destroy_prog: + nfp_bpf_destroy(nfp_prog); + return NULL; } static void -nfp_net_bpf_load(struct nfp_net *nn, void *code, dma_addr_t dma_addr, - unsigned int code_sz, unsigned int n_instr) +nfp_net_bpf_load(struct nfp_net *nn, struct nfp_prog *nfp_prog, + dma_addr_t dma_addr) { int err; - nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); /* Load up the JITed code */ @@ -140,7 +191,9 @@ nfp_net_bpf_load(struct nfp_net *nn, void *code, dma_addr_t dma_addr, if (err) nn_err(nn, "FW command error while loading BPF: %d\n", err); - dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); + dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + nfp_bpf_destroy(nfp_prog); } static void nfp_net_bpf_start(struct nfp_net *nn) @@ -169,11 +222,8 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog) { - struct nfp_bpf_result res; + struct nfp_prog *nfp_prog; dma_addr_t dma_addr; - u16 max_instr; - void *code; - int err; if (prog && old_prog) { u8 cap; @@ -192,15 +242,11 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, if (old_prog && !prog) return nfp_net_bpf_stop(nn); - max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog = nfp_net_bpf_offload_prepare(nn, prog, &dma_addr); + if (!nfp_prog) + return -EINVAL; - err = nfp_net_bpf_offload_prepare(nn, prog, &res, &code, &dma_addr, - max_instr); - if (err) - return err; - - nfp_net_bpf_load(nn, code, dma_addr, max_instr * sizeof(u64), - res.n_instr); + nfp_net_bpf_load(nn, nfp_prog, dma_addr); if (!old_prog) nfp_net_bpf_start(nn); From c6c580d7bc390f864488c66153a487057e76d9d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:29 -0700 Subject: [PATCH 1855/2177] nfp: bpf: move to new BPF program offload infrastructure Following steps are taken in the driver to offload an XDP program: XDP_SETUP_PROG: * prepare: - allocate program state; - run verifier (bpf_analyzer()); - run translation; * load: - stop old program if needed; - load program; - enable BPF if not enabled; * clean up: - free program image. With new infrastructure the flow will look like this: BPF_OFFLOAD_VERIFIER_PREP: - allocate program state; BPF_OFFLOAD_TRANSLATE: - run translation; XDP_SETUP_PROG: - stop old program if needed; - load program; - enable BPF if not enabled; BPF_OFFLOAD_DESTROY: - free program image. Take advantage of the new infrastructure. Allocation of driver metadata has to be moved from jit.c to offload.c since it's now done at a different stage. Since there is no separate driver private data for verification step, move temporary nfp_meta pointer into nfp_prog. We will now use user space context offsets. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 35 +++----- drivers/net/ethernet/netronome/nfp/bpf/main.c | 4 + drivers/net/ethernet/netronome/nfp/bpf/main.h | 15 +++- .../net/ethernet/netronome/nfp/bpf/offload.c | 85 +++++++++---------- .../net/ethernet/netronome/nfp/bpf/verifier.c | 43 ++-------- drivers/net/ethernet/netronome/nfp/nfp_app.h | 37 ++++++++ .../ethernet/netronome/nfp/nfp_net_common.c | 8 ++ 7 files changed, 121 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index eae7a137a7a8..995e95410b11 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1427,19 +1427,18 @@ static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg dst = reg_both(meta->insn.dst_reg * 2); switch (meta->insn.off) { - case offsetof(struct sk_buff, len): - if (size != FIELD_SIZEOF(struct sk_buff, len)) + case offsetof(struct __sk_buff, len): + if (size != FIELD_SIZEOF(struct __sk_buff, len)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); break; - case offsetof(struct sk_buff, data): - if (size != sizeof(void *)) + case offsetof(struct __sk_buff, data): + if (size != FIELD_SIZEOF(struct __sk_buff, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; - case offsetof(struct sk_buff, cb) + - offsetof(struct bpf_skb_data_end, data_end): - if (size != sizeof(void *)) + case offsetof(struct __sk_buff, data_end): + if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@ -1458,14 +1457,15 @@ static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, { swreg dst = reg_both(meta->insn.dst_reg * 2); - if (size != sizeof(void *)) - return -EINVAL; - switch (meta->insn.off) { - case offsetof(struct xdp_buff, data): + case offsetof(struct xdp_md, data): + if (size != FIELD_SIZEOF(struct xdp_md, data)) + return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; - case offsetof(struct xdp_buff, data_end): + case offsetof(struct xdp_md, data_end): + if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); break; @@ -2243,19 +2243,10 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) return 0; } -/** - * nfp_bpf_jit() - translate BPF code into NFP assembly - * @nfp_prog: nfp_prog prepared based on @filter - * @filter: kernel BPF filter struct - */ -int nfp_bpf_jit(struct nfp_prog *nfp_prog, struct bpf_prog *filter) +int nfp_bpf_jit(struct nfp_prog *nfp_prog) { int ret; - ret = nfp_prog_verify(nfp_prog, filter); - if (ret) - return ret; - ret = nfp_bpf_optimize(nfp_prog); if (ret) return ret; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 7ae7528cd96b..e379b78e86ef 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -173,4 +173,8 @@ const struct nfp_app_type app_bpf = { .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, .xdp_offload = nfp_bpf_xdp_offload, + + .bpf_verifier_prep = nfp_bpf_verifier_prep, + .bpf_translate = nfp_bpf_translate, + .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 36b4eda2d3f8..082a15f6dfb5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -139,6 +139,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @prog: machine code * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array + * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type * @start_off: address of the first instruction in the memory * @tgt_out: jump target for normal exit @@ -154,6 +155,8 @@ struct nfp_prog { unsigned int prog_len; unsigned int __prog_alloc_len; + struct nfp_insn_meta *verifier_meta; + enum bpf_prog_type type; unsigned int start_off; @@ -169,13 +172,21 @@ struct nfp_prog { struct list_head insns; }; -int nfp_bpf_jit(struct nfp_prog *nfp_prog, struct bpf_prog *filter); +int nfp_bpf_jit(struct nfp_prog *prog); -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); +extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; +struct netdev_bpf; +struct nfp_app; struct nfp_net; int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog); +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index c5546c0e87d8..b6cee71f49d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -84,14 +84,17 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -static struct nfp_prog *nfp_bpf_verifier_prep(struct bpf_prog *prog) +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { + struct bpf_prog *prog = bpf->verifier.prog; struct nfp_prog *nfp_prog; int ret; nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); if (!nfp_prog) - return NULL; + return -ENOMEM; + prog->aux->offload->dev_priv = nfp_prog; INIT_LIST_HEAD(&nfp_prog->insns); nfp_prog->type = prog->type; @@ -100,18 +103,21 @@ static struct nfp_prog *nfp_bpf_verifier_prep(struct bpf_prog *prog) if (ret) goto err_free; - return nfp_prog; + nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); + bpf->verifier.ops = &nfp_bpf_analyzer_ops; + + return 0; err_free: nfp_prog_free(nfp_prog); - return NULL; + return ret; } -static int -nfp_bpf_translate(struct nfp_net *nn, struct nfp_prog *nfp_prog, - struct bpf_prog *prog) +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) { + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int stack_size; unsigned int max_instr; @@ -133,55 +139,38 @@ nfp_bpf_translate(struct nfp_net *nn, struct nfp_prog *nfp_prog, if (!nfp_prog->prog) return -ENOMEM; - return nfp_bpf_jit(nfp_prog, prog); + return nfp_bpf_jit(nfp_prog); } -static void nfp_bpf_destroy(struct nfp_prog *nfp_prog) +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) { + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + kfree(nfp_prog->prog); nfp_prog_free(nfp_prog); + + return 0; } -static struct nfp_prog * -nfp_net_bpf_offload_prepare(struct nfp_net *nn, struct bpf_prog *prog, - dma_addr_t *dma_addr) +static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) { - struct nfp_prog *nfp_prog; + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int max_mtu; + dma_addr_t dma_addr; int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); - return NULL; + return -EOPNOTSUPP; } - nfp_prog = nfp_bpf_verifier_prep(prog); - if (!nfp_prog) - return NULL; - - err = nfp_bpf_translate(nn, nfp_prog, prog); - if (err) - goto err_destroy_prog; - - *dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, - nfp_prog->prog_len * sizeof(u64), - DMA_TO_DEVICE); - if (dma_mapping_error(nn->dp.dev, *dma_addr)) - goto err_destroy_prog; - - return 0; - -err_destroy_prog: - nfp_bpf_destroy(nfp_prog); - return NULL; -} - -static void -nfp_net_bpf_load(struct nfp_net *nn, struct nfp_prog *nfp_prog, - dma_addr_t dma_addr) -{ - int err; + dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, dma_addr)) + return -ENOMEM; nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -193,7 +182,8 @@ nfp_net_bpf_load(struct nfp_net *nn, struct nfp_prog *nfp_prog, dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); - nfp_bpf_destroy(nfp_prog); + + return err; } static void nfp_net_bpf_start(struct nfp_net *nn) @@ -222,8 +212,10 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog) { - struct nfp_prog *nfp_prog; - dma_addr_t dma_addr; + int err; + + if (prog && !prog->aux->offload) + return -EINVAL; if (prog && old_prog) { u8 cap; @@ -242,11 +234,10 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, if (old_prog && !prog) return nfp_net_bpf_stop(nn); - nfp_prog = nfp_net_bpf_offload_prepare(nn, prog, &dma_addr); - if (!nfp_prog) - return -EINVAL; + err = nfp_net_bpf_load(nn, prog); + if (err) + return err; - nfp_net_bpf_load(nn, nfp_prog, dma_addr); if (!old_prog) nfp_net_bpf_start(nn); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 4f31bdefd331..8d43491ddd6b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -40,12 +40,6 @@ #include "main.h" -/* Analyzer/verifier definitions */ -struct nfp_bpf_analyzer_priv { - struct nfp_prog *prog; - struct nfp_insn_meta *meta; -}; - static struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -171,11 +165,11 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { - struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; - struct nfp_insn_meta *meta = priv->meta; + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; - meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); - priv->meta = meta; + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); + nfp_prog->verifier_meta = meta; if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { @@ -184,39 +178,18 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) } if (meta->insn.code == (BPF_JMP | BPF_EXIT)) - return nfp_bpf_check_exit(priv->prog, env); + return nfp_bpf_check_exit(nfp_prog, env); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) - return nfp_bpf_check_ptr(priv->prog, meta, env, + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.src_reg); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) - return nfp_bpf_check_ptr(priv->prog, meta, env, + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); return 0; } -static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { +const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { .insn_hook = nfp_verify_insn, }; - -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) -{ - struct nfp_bpf_analyzer_priv *priv; - int ret; - - nfp_prog->stack_depth = prog->aux->stack_depth; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->prog = nfp_prog; - priv->meta = nfp_prog_first_meta(nfp_prog); - - ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); - - kfree(priv); - - return ret; -} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 857bb33020ba..54b67c9b8d5b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -42,6 +42,7 @@ struct bpf_prog; struct net_device; +struct netdev_bpf; struct pci_dev; struct sk_buff; struct sk_buff; @@ -83,6 +84,9 @@ extern const struct nfp_app_type app_flower; * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) * @xdp_offload: offload an XDP program + * @bpf_verifier_prep: verifier prep for dev-specific BPF programs + * @bpf_translate: translate call for dev-specific BPF programs + * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -118,6 +122,12 @@ struct nfp_app_type { bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); + int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); + int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); + int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -271,6 +281,33 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } +static inline int +nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf_verifier_prep) + return -EOPNOTSUPP; + return app->type->bpf_verifier_prep(app, nn, bpf); +} + +static inline int +nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_translate) + return -EOPNOTSUPP; + return app->type->bpf_translate(app, nn, prog); +} + +static inline int +nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_destroy) + return -EOPNOTSUPP; + return app->type->bpf_destroy(app, nn, prog); +} + static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f6c6ad4e8a59..232044b1b7aa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3393,6 +3393,14 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; return 0; + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); + case BPF_OFFLOAD_TRANSLATE: + return nfp_app_bpf_translate(nn->app, nn, + xdp->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_app_bpf_destroy(nn->app, nn, + xdp->offload.prog); default: return -EINVAL; } From b37a530613104aa3f592376c67a462823298759c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:30 -0700 Subject: [PATCH 1856/2177] bpf: remove old offload/analyzer Thanks to the ability to load a program for a specific device, running verifier twice is no longer needed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 --- kernel/bpf/verifier.c | 75 ------------------------------------ net/core/filter.c | 42 -------------------- 3 files changed, 122 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e45011dbc02d..07b96aaca256 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -152,9 +152,7 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ - void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -179,7 +177,4 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) } #endif -int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, - void *priv); - #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 51aabb32ad67..add845fe788a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -949,9 +949,6 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (env->analyzer_ops) - return 0; - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) @@ -3736,9 +3733,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { - if (env->analyzer_ops && env->analyzer_ops->insn_hook) - return env->analyzer_ops->insn_hook(env, insn_idx, - prev_insn_idx); if (env->dev_ops && env->dev_ops->insn_hook) return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); @@ -4601,72 +4595,3 @@ err_free_env: kfree(env); return ret; } - -static const struct bpf_verifier_ops * const bpf_analyzer_ops[] = { -#ifdef CONFIG_NET - [BPF_PROG_TYPE_XDP] = &xdp_analyzer_ops, - [BPF_PROG_TYPE_SCHED_CLS] = &tc_cls_act_analyzer_ops, -#endif -}; - -int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, - void *priv) -{ - struct bpf_verifier_env *env; - int ret; - - if (prog->type >= ARRAY_SIZE(bpf_analyzer_ops) || - !bpf_analyzer_ops[prog->type]) - return -EOPNOTSUPP; - - env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); - if (!env) - return -ENOMEM; - - env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * - prog->len); - ret = -ENOMEM; - if (!env->insn_aux_data) - goto err_free_env; - env->prog = prog; - env->ops = bpf_analyzer_ops[env->prog->type]; - env->analyzer_ops = ops; - env->analyzer_priv = priv; - - /* grab the mutex to protect few globals used by verifier */ - mutex_lock(&bpf_verifier_lock); - - env->strict_alignment = false; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - env->strict_alignment = true; - - env->explored_states = kcalloc(env->prog->len, - sizeof(struct bpf_verifier_state_list *), - GFP_KERNEL); - ret = -ENOMEM; - if (!env->explored_states) - goto skip_full_check; - - ret = check_cfg(env); - if (ret < 0) - goto skip_full_check; - - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - - ret = do_check(env); - if (env->cur_state) { - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; - } - -skip_full_check: - while (!pop_stack(env, NULL, NULL)); - free_states(env); - - mutex_unlock(&bpf_verifier_lock); - vfree(env->insn_aux_data); -err_free_env: - kfree(env); - return ret; -} -EXPORT_SYMBOL_GPL(bpf_analyzer); diff --git a/net/core/filter.c b/net/core/filter.c index a0112168d6f9..1afa17935954 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3777,25 +3777,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } -static bool -tc_cls_act_is_valid_access_analyzer(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - switch (off) { - case offsetof(struct sk_buff, len): - return true; - case offsetof(struct sk_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct sk_buff, cb) + - offsetof(struct bpf_skb_data_end, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; -} - static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -3830,21 +3811,6 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } -static bool xdp_is_valid_access_analyzer(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - switch (off) { - case offsetof(struct xdp_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct xdp_buff, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; -} - void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; @@ -4516,10 +4482,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .gen_prologue = tc_cls_act_prologue, }; -const struct bpf_verifier_ops tc_cls_act_analyzer_ops = { - .is_valid_access = tc_cls_act_is_valid_access_analyzer, -}; - const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; @@ -4530,10 +4492,6 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .convert_ctx_access = xdp_convert_ctx_access, }; -const struct bpf_verifier_ops xdp_analyzer_ops = { - .is_valid_access = xdp_is_valid_access_analyzer, -}; - const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; From 0b1c27db12fd338ed912fec18f5cc02d7bd4e54e Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 3 Nov 2017 13:59:07 -0700 Subject: [PATCH 1857/2177] tools: bpftool: move p_err() and p_info() from main.h to common.c The two functions were declared as static inline in a header file. There is no particular reason why they should be inlined, they just happened to remain in the same header file when they were turned from macros to functions in a precious commit. Make them non-inlined functions and move them to common.c file instead. Suggested-by: Joe Perches Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 31 +++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 34 +++------------------------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index f0288269dae8..aa7017098b2a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -50,6 +50,37 @@ #include "main.h" +void p_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "error"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + +void p_info(const char *fmt, ...) +{ + va_list ap; + + if (json_output) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + static bool is_bpffs(char *path) { struct statfs st_fs; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d315d01be645..ff5ad05b137b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -71,6 +71,9 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; +void p_err(const char *fmt, ...); +void p_info(const char *fmt, ...); + bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); @@ -97,35 +100,4 @@ int prog_parse_fd(int *argc, char ***argv); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); void print_hex_data_json(uint8_t *data, size_t len); -static inline void p_err(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - if (json_output) { - jsonw_start_object(json_wtr); - jsonw_name(json_wtr, "error"); - jsonw_vprintf_enquote(json_wtr, fmt, ap); - jsonw_end_object(json_wtr); - } else { - fprintf(stderr, "Error: "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - } - va_end(ap); -} - -static inline void p_info(const char *fmt, ...) -{ - va_list ap; - - if (json_output) - return; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - va_end(ap); -} - #endif From 952484610cc2f67303be4feedb0e52a519c31470 Mon Sep 17 00:00:00 2001 From: Intiyaz Basha Date: Fri, 3 Nov 2017 14:32:33 -0700 Subject: [PATCH 1858/2177] liquidio: do not consider packets dropped by network stack as driver Rx dropped netdev->rx_dropped was including packets dropped by napi_gro_receive. If a packet is dropped by network stack, it should not be counted under driver Rx dropped. Made necessary changes to not include network stack drops under netdev->rx_dropped. Signed-off-by: Intiyaz Basha Signed-off-by: Satanand Burla Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_core.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 89b7820d59ce..32ae63b6f20e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -467,7 +467,6 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), if (netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - int packet_was_received; /* Do not proceed if the interface is not in RUNNING state. */ if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) { @@ -570,18 +569,10 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag); } - packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP); - - if (packet_was_received) { - droq->stats.rx_bytes_received += len; - droq->stats.rx_pkts_received++; - } else { - droq->stats.rx_dropped++; - netif_info(lio, rx_err, lio->netdev, - "droq:%d error rx_dropped:%llu\n", - droq->q_no, droq->stats.rx_dropped); - } + napi_gro_receive(napi, skb); + droq->stats.rx_bytes_received += len; + droq->stats.rx_pkts_received++; } else { recv_buffer_free(skb); } From 99feaafcdb566e8f032e7acc2a303713ad6bf196 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:20 -0400 Subject: [PATCH 1859/2177] net: dsa: make switch index unsigned Define the DSA switch index as an unsigned int, because it will never be less than 0. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 50e276dc4c01..fa1c21ab8092 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -209,7 +209,7 @@ struct dsa_switch { * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; - int index; + unsigned int index; /* Listener for switch fabric events */ struct notifier_block nb; From 49463b7f2da1a115404b02c5533bc2c2125833a3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:21 -0400 Subject: [PATCH 1860/2177] net: dsa: make tree index unsigned Similarly to a DSA switch and port, rename the tree index from "tree" to "index" and make it an unsigned int because it isn't supposed to be less than 0. u32 is an OF specific data used to retrieve the value and has no need to be propagated up to the tree index. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- net/dsa/dsa2.c | 14 +++++++------- net/dsa/slave.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index fa1c21ab8092..e54332968417 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,7 +116,7 @@ struct dsa_switch_tree { struct raw_notifier_head nh; /* Tree identifier */ - u32 tree; + unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 797d1156b4e6..8b68dc2f5707 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -27,12 +27,12 @@ static DEFINE_MUTEX(dsa2_mutex); static const struct devlink_ops dsa_devlink_ops = { }; -static struct dsa_switch_tree *dsa_get_dst(u32 tree) +static struct dsa_switch_tree *dsa_get_dst(unsigned int index) { struct dsa_switch_tree *dst; list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->tree == tree) { + if (dst->index == index) { kref_get(&dst->refcount); return dst; } @@ -53,14 +53,14 @@ static void dsa_put_dst(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_free_dst); } -static struct dsa_switch_tree *dsa_add_dst(u32 tree) +static struct dsa_switch_tree *dsa_add_dst(unsigned int index) { struct dsa_switch_tree *dst; dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) return NULL; - dst->tree = tree; + dst->index = index; INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_switch_trees, &dst->list); kref_init(&dst->refcount); @@ -454,7 +454,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dst->cpu_dp = NULL; - pr_info("DSA: tree %d unapplied\n", dst->tree); + pr_info("DSA: tree %d unapplied\n", dst->index); dst->applied = false; } @@ -504,7 +504,7 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) } - pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); + pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index); return 0; } @@ -549,7 +549,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) } } - pr_info("DSA: tree %d parsed\n", dst->tree); + pr_info("DSA: tree %d parsed\n", dst->index); return 0; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9b75d0ac4092..814ced75a0cc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", - ds->dst->tree, ds->index); + ds->dst->index, ds->index); ds->slave_mii_bus->parent = ds->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } From 8e5bf9759a06be2251fa96cfd8b412f1808c62f9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:22 -0400 Subject: [PATCH 1861/2177] net: dsa: simplify tree reference counting DSA trees have a refcount used to automatically free the dsa_switch_tree structure once there is no switch devices inside of it. The refcount is incremented when a switch is added to the tree, and decremented when it is removed from it. But because of kref_init, the refcount is also incremented at initialization, and when looking up the tree from the list for symmetry. Thus the current code stores the number of switches plus one, and makes the switch registration more complex. To simplify the switch registration function, we reset the refcount to zero after initialization and don't increment it when looking up a tree. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8b68dc2f5707..d3f1a7607463 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -32,10 +32,9 @@ static struct dsa_switch_tree *dsa_get_dst(unsigned int index) struct dsa_switch_tree *dst; list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->index == index) { - kref_get(&dst->refcount); + if (dst->index == index) return dst; - } + return NULL; } @@ -48,11 +47,6 @@ static void dsa_free_dst(struct kref *ref) kfree(dst); } -static void dsa_put_dst(struct dsa_switch_tree *dst) -{ - kref_put(&dst->refcount, dsa_free_dst); -} - static struct dsa_switch_tree *dsa_add_dst(unsigned int index) { struct dsa_switch_tree *dst; @@ -63,7 +57,10 @@ static struct dsa_switch_tree *dsa_add_dst(unsigned int index) dst->index = index; INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_switch_trees, &dst->list); + + /* Initialize the reference counter to the number of switches, not 1 */ kref_init(&dst->refcount); + refcount_set(&dst->refcount.refcount, 0); return dst; } @@ -739,10 +736,8 @@ static int _dsa_register_switch(struct dsa_switch *ds) return -ENOMEM; } - if (dst->ds[index]) { - err = -EBUSY; - goto out; - } + if (dst->ds[index]) + return -EBUSY; ds->dst = dst; ds->index = index; @@ -758,11 +753,9 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (err < 0) goto out_del_dst; - if (err == 1) { - /* Not all switches registered yet */ - err = 0; - goto out; - } + /* Not all switches registered yet */ + if (err == 1) + return 0; if (dst->applied) { pr_info("DSA: Disjoint trees?\n"); @@ -779,13 +772,10 @@ static int _dsa_register_switch(struct dsa_switch *ds) goto out_del_dst; } - dsa_put_dst(dst); return 0; out_del_dst: dsa_dst_del_ds(dst, ds, ds->index); -out: - dsa_put_dst(dst); return err; } From 65254108b4655dc55e8d8f62ee895960085e73f4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:23 -0400 Subject: [PATCH 1862/2177] net: dsa: get and put tree reference counting Provide convenient dsa_tree_get and dsa_tree_put functions scoping a DSA tree used to increment and decrement its reference counter, instead of poking directly its kref structure. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index d3f1a7607463..609d92684505 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -38,15 +38,6 @@ static struct dsa_switch_tree *dsa_get_dst(unsigned int index) return NULL; } -static void dsa_free_dst(struct kref *ref) -{ - struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree, - refcount); - - list_del(&dst->list); - kfree(dst); -} - static struct dsa_switch_tree *dsa_add_dst(unsigned int index) { struct dsa_switch_tree *dst; @@ -65,10 +56,35 @@ static struct dsa_switch_tree *dsa_add_dst(unsigned int index) return dst; } +static void dsa_tree_free(struct dsa_switch_tree *dst) +{ + list_del(&dst->list); + kfree(dst); +} + +static void dsa_tree_get(struct dsa_switch_tree *dst) +{ + kref_get(&dst->refcount); +} + +static void dsa_tree_release(struct kref *ref) +{ + struct dsa_switch_tree *dst; + + dst = container_of(ref, struct dsa_switch_tree, refcount); + + dsa_tree_free(dst); +} + +static void dsa_tree_put(struct dsa_switch_tree *dst) +{ + kref_put(&dst->refcount, dsa_tree_release); +} + static void dsa_dst_add_ds(struct dsa_switch_tree *dst, struct dsa_switch *ds, u32 index) { - kref_get(&dst->refcount); + dsa_tree_get(dst); dst->ds[index] = ds; } @@ -76,7 +92,7 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst, struct dsa_switch *ds, u32 index) { dst->ds[index] = NULL; - kref_put(&dst->refcount, dsa_free_dst); + dsa_tree_put(dst); } /* For platform data configurations, we need to have a valid name argument to From 1ca28ec9abff927178b1ea9d6431e4c320145b10 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:24 -0400 Subject: [PATCH 1863/2177] net: dsa: provide a find or new tree helper Rename dsa_get_dst to dsa_tree_find since it doesn't increment the reference counter, rename dsa_add_dst to dsa_tree_alloc for symmetry with dsa_tree_free, and provide a convenient dsa_tree_touch function to find or allocate a new tree. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 609d92684505..bda222cfc02c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,33 +21,35 @@ #include "dsa_priv.h" -static LIST_HEAD(dsa_switch_trees); +static LIST_HEAD(dsa_tree_list); static DEFINE_MUTEX(dsa2_mutex); static const struct devlink_ops dsa_devlink_ops = { }; -static struct dsa_switch_tree *dsa_get_dst(unsigned int index) +static struct dsa_switch_tree *dsa_tree_find(int index) { struct dsa_switch_tree *dst; - list_for_each_entry(dst, &dsa_switch_trees, list) + list_for_each_entry(dst, &dsa_tree_list, list) if (dst->index == index) return dst; return NULL; } -static struct dsa_switch_tree *dsa_add_dst(unsigned int index) +static struct dsa_switch_tree *dsa_tree_alloc(int index) { struct dsa_switch_tree *dst; dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) return NULL; + dst->index = index; + INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); + list_add_tail(&dsa_tree_list, &dst->list); /* Initialize the reference counter to the number of switches, not 1 */ kref_init(&dst->refcount); @@ -62,6 +64,17 @@ static void dsa_tree_free(struct dsa_switch_tree *dst) kfree(dst); } +static struct dsa_switch_tree *dsa_tree_touch(int index) +{ + struct dsa_switch_tree *dst; + + dst = dsa_tree_find(index); + if (!dst) + dst = dsa_tree_alloc(index); + + return dst; +} + static void dsa_tree_get(struct dsa_switch_tree *dst) { kref_get(&dst->refcount); @@ -745,12 +758,9 @@ static int _dsa_register_switch(struct dsa_switch *ds) return err; } - dst = dsa_get_dst(tree); - if (!dst) { - dst = dsa_add_dst(tree); - if (!dst) - return -ENOMEM; - } + dst = dsa_tree_touch(tree); + if (!dst) + return -ENOMEM; if (dst->ds[index]) return -EBUSY; From 6da2a940ac6a0680e50b3aaf945e409cea03c346 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:25 -0400 Subject: [PATCH 1864/2177] net: dsa: rework switch addition and removal This patch removes the unnecessary index argument from the dsa_dst_add_ds and dsa_dst_del_ds functions and renames them to dsa_tree_add_switch and dsa_tree_remove_switch respectively. In addition to a more explicit scope, we now check the presence of an existing switch with the same index directly within dsa_tree_add_switch. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index bda222cfc02c..5b6a3dad8015 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -94,20 +94,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_tree_release); } -static void dsa_dst_add_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) -{ - dsa_tree_get(dst); - dst->ds[index] = ds; -} - -static void dsa_dst_del_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) -{ - dst->ds[index] = NULL; - dsa_tree_put(dst); -} - /* For platform data configurations, we need to have a valid name argument to * differentiate a disabled port from an enabled one */ @@ -484,6 +470,27 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dst->applied = false; } +static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, + unsigned int index) +{ + dst->ds[index] = NULL; + dsa_tree_put(dst); +} + +static int dsa_tree_add_switch(struct dsa_switch_tree *dst, + struct dsa_switch *ds) +{ + unsigned int index = ds->index; + + if (dst->ds[index]) + return -EBUSY; + + dsa_tree_get(dst); + dst->ds[index] = ds; + + return 0; +} + static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) @@ -762,9 +769,6 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (!dst) return -ENOMEM; - if (dst->ds[index]) - return -EBUSY; - ds->dst = dst; ds->index = index; ds->cd = pdata; @@ -773,7 +777,9 @@ static int _dsa_register_switch(struct dsa_switch *ds) for (i = 0; i < DSA_MAX_SWITCHES; ++i) ds->rtable[i] = DSA_RTABLE_NONE; - dsa_dst_add_ds(dst, ds, index); + err = dsa_tree_add_switch(dst, ds); + if (err) + return err; err = dsa_dst_complete(dst); if (err < 0) @@ -801,7 +807,7 @@ static int _dsa_register_switch(struct dsa_switch *ds) return 0; out_del_dst: - dsa_dst_del_ds(dst, ds, ds->index); + dsa_tree_remove_switch(dst, index); return err; } @@ -843,10 +849,11 @@ EXPORT_SYMBOL_GPL(dsa_register_switch); static void _dsa_unregister_switch(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; + unsigned int index = ds->index; dsa_dst_unapply(dst); - dsa_dst_del_ds(dst, ds, ds->index); + dsa_tree_remove_switch(dst, index); } void dsa_unregister_switch(struct dsa_switch *ds) From 0eefe2c1730020c6207bc9695fd466e558301dbb Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:26 -0400 Subject: [PATCH 1865/2177] net: dsa: get tree before parsing ports We will need a reference to the dsa_switch_tree when parsing a CPU port, so fetch it right after parsing the member and before parsing ports. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5b6a3dad8015..5918fbddb0ab 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -751,18 +751,10 @@ static int _dsa_register_switch(struct dsa_switch *ds) err = dsa_parse_member_dn(np, &tree, &index); if (err) return err; - - err = dsa_parse_ports_of(np, ds); - if (err) - return err; } else { err = dsa_parse_member(pdata, &tree, &index); if (err) return err; - - err = dsa_parse_ports(pdata, ds); - if (err) - return err; } dst = dsa_tree_touch(tree); @@ -773,6 +765,16 @@ static int _dsa_register_switch(struct dsa_switch *ds) ds->index = index; ds->cd = pdata; + if (np) { + err = dsa_parse_ports_of(np, ds); + if (err) + return err; + } else { + err = dsa_parse_ports(pdata, ds); + if (err) + return err; + } + /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) ds->rtable[i] = DSA_RTABLE_NONE; From 975e6e32215e6cbc09b65d762865b1a46e8e9103 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:27 -0400 Subject: [PATCH 1866/2177] net: dsa: rework switch parsing When parsing a switch, we have to identify to which tree it belongs and parse its ports. Provide two functions to separate the OF and platform data specific paths. Also use the of_property_read_variable_u32_array function to parse the OF member array instead of calling of_property_read_u32_index twice. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 117 ++++++++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5918fbddb0ab..dfcb6247f2f2 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -617,7 +617,8 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) return 0; } -static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) +static int dsa_switch_parse_ports_of(struct dsa_switch *ds, + struct device_node *dn) { struct device_node *ports, *port; struct dsa_port *dp; @@ -648,6 +649,39 @@ static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) return 0; } +static int dsa_switch_parse_member_of(struct dsa_switch *ds, + struct device_node *dn) +{ + u32 m[2] = { 0, 0 }; + int sz; + + /* Don't error out if this optional property isn't found */ + sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2); + if (sz < 0 && sz != -EINVAL) + return sz; + + ds->index = m[1]; + if (ds->index >= DSA_MAX_SWITCHES) + return -EINVAL; + + ds->dst = dsa_tree_touch(m[0]); + if (!ds->dst) + return -ENOMEM; + + return 0; +} + +static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) +{ + int err; + + err = dsa_switch_parse_member_of(ds, dn); + if (err) + return err; + + return dsa_switch_parse_ports_of(ds, dn); +} + static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { @@ -673,7 +707,8 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, return 0; } -static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +static int dsa_switch_parse_ports(struct dsa_switch *ds, + struct dsa_chip_data *cd) { bool valid_name_found = false; struct dsa_port *dp; @@ -703,40 +738,19 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) return 0; } -static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) +static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) { - int err; + ds->cd = cd; - *tree = *index = 0; + /* We don't support interconnected switches nor multiple trees via + * platform data, so this is the unique switch of the tree. + */ + ds->index = 0; + ds->dst = dsa_tree_touch(0); + if (!ds->dst) + return -ENOMEM; - err = of_property_read_u32_index(np, "dsa,member", 0, tree); - if (err) { - /* Does not exist, but it is optional */ - if (err == -EINVAL) - return 0; - return err; - } - - err = of_property_read_u32_index(np, "dsa,member", 1, index); - if (err) - return err; - - if (*index >= DSA_MAX_SWITCHES) - return -EINVAL; - - return 0; -} - -static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) -{ - if (!pd) - return -ENODEV; - - /* We do not support complex trees with dsa_chip_data */ - *tree = 0; - *index = 0; - - return 0; + return dsa_switch_parse_ports(ds, cd); } static int _dsa_register_switch(struct dsa_switch *ds) @@ -744,36 +758,21 @@ static int _dsa_register_switch(struct dsa_switch *ds) struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; - u32 tree, index; + unsigned int index; int i, err; - if (np) { - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; - } else { - err = dsa_parse_member(pdata, &tree, &index); - if (err) - return err; - } + if (np) + err = dsa_switch_parse_of(ds, np); + else if (pdata) + err = dsa_switch_parse(ds, pdata); + else + err = -ENODEV; - dst = dsa_tree_touch(tree); - if (!dst) - return -ENOMEM; + if (err) + return err; - ds->dst = dst; - ds->index = index; - ds->cd = pdata; - - if (np) { - err = dsa_parse_ports_of(np, ds); - if (err) - return err; - } else { - err = dsa_parse_ports(pdata, ds); - if (err) - return err; - } + index = ds->index; + dst = ds->dst; /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) From 54df6fa9541752b3b28106eb1a9764dcda815fd2 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:28 -0400 Subject: [PATCH 1867/2177] net: dsa: only check presence of link property When parsing a port, simply use of_property_read_bool which checks the presence of a given property, instead of parsing the link phandle. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dfcb6247f2f2..06bcdb6bc796 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -590,8 +590,8 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); - struct device_node *link = of_parse_phandle(dn, "link", 0); const char *name = of_get_property(dn, "label", NULL); + bool link = of_property_read_bool(dn, "link"); if (ethernet) { struct net_device *master; From 06e24d0868a361774fc46d0819450915e63a2815 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:29 -0400 Subject: [PATCH 1868/2177] net: dsa: add one port parsing function per type Add dsa_port_parse_user, dsa_port_parse_dsa and dsa_port_parse_cpu functions to factorize the code shared by both OF and pdata parsing. They don't do much for the moment but will be extended later to support tagging protocol resolution for example. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 56 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 06bcdb6bc796..271a97ef5bf6 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -491,6 +491,32 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst, return 0; } +static int dsa_port_parse_user(struct dsa_port *dp, const char *name) +{ + if (!name) + name = "eth%d"; + + dp->type = DSA_PORT_TYPE_USER; + dp->name = name; + + return 0; +} + +static int dsa_port_parse_dsa(struct dsa_port *dp) +{ + dp->type = DSA_PORT_TYPE_DSA; + + return 0; +} + +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) +{ + dp->type = DSA_PORT_TYPE_CPU; + dp->master = master; + + return 0; +} + static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) @@ -593,6 +619,8 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) const char *name = of_get_property(dn, "label", NULL); bool link = of_property_read_bool(dn, "link"); + dp->dn = dn; + if (ethernet) { struct net_device *master; @@ -600,21 +628,13 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (!master) return -EPROBE_DEFER; - dp->type = DSA_PORT_TYPE_CPU; - dp->master = master; - } else if (link) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - if (!name) - name = "eth%d"; - - dp->type = DSA_PORT_TYPE_USER; - dp->name = name; + return dsa_port_parse_cpu(dp, master); } - dp->dn = dn; + if (link) + return dsa_port_parse_dsa(dp); - return 0; + return dsa_port_parse_user(dp, name); } static int dsa_switch_parse_ports_of(struct dsa_switch *ds, @@ -694,17 +714,13 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, dev_put(master); - dp->type = DSA_PORT_TYPE_CPU; - dp->master = master; - } else if (!strcmp(name, "dsa")) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - dp->type = DSA_PORT_TYPE_USER; + return dsa_port_parse_cpu(dp, master); } - dp->name = name; + if (!strcmp(name, "dsa")) + return dsa_port_parse_dsa(dp); - return 0; + return dsa_port_parse_user(dp, name); } static int dsa_switch_parse_ports(struct dsa_switch *ds, From 7354fcb0a3a3a5518107f8de117a6ce5ce08cc7c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Nov 2017 19:05:30 -0400 Subject: [PATCH 1869/2177] net: dsa: resolve tagging protocol at parse time Extend the dsa_port_parse_cpu() function to resolve the tagging protocol at port parsing time, instead of waiting for the whole tree to be complete. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 271a97ef5bf6..283104e5ca6a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -511,22 +511,11 @@ static int dsa_port_parse_dsa(struct dsa_port *dp) static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) { - dp->type = DSA_PORT_TYPE_CPU; - dp->master = master; - - return 0; -} - -static int dsa_cpu_parse(struct dsa_port *port, u32 index, - struct dsa_switch_tree *dst, - struct dsa_switch *ds) -{ + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - if (!dst->cpu_dp) - dst->cpu_dp = port; - tag_protocol = ds->ops->get_tag_protocol(ds); tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) { @@ -534,11 +523,21 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, return PTR_ERR(tag_ops); } - dst->cpu_dp->tag_ops = tag_ops; + dp->type = DSA_PORT_TYPE_CPU; + dp->rcv = tag_ops->rcv; + dp->tag_ops = tag_ops; + dp->master = master; + dp->dst = dst; - /* Make a few copies for faster access in master receive hot path */ - dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->cpu_dp->dst = dst; + return 0; +} + +static int dsa_cpu_parse(struct dsa_port *port, u32 index, + struct dsa_switch_tree *dst, + struct dsa_switch *ds) +{ + if (!dst->cpu_dp) + dst->cpu_dp = port; return 0; } From 1f2556916d974cfb62b6af51660186b5f58bd869 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 16:38:48 -0700 Subject: [PATCH 1870/2177] tcp: higher throughput under reordering with adaptive RACK reordering wnd Currently TCP RACK loss detection does not work well if packets are being reordered beyond its static reordering window (min_rtt/4).Under such reordering it may falsely trigger loss recoveries and reduce TCP throughput significantly. This patch improves that by increasing and reducing the reordering window based on DSACK, which is now supported in major TCP implementations. It makes RACK's reo_wnd adaptive based on DSACK and no. of recoveries. - If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded by srtt), since there is possibility that spurious retransmission was due to reordering delay longer than reo_wnd. - Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) no. of successful recoveries (accounts for full DSACK-based loss recovery undo). After that, reset it to default (min_rtt/4). - At max, reo_wnd is incremented only once per rtt. So that the new DSACK on which we are reacting, is due to the spurious retx (approx) after the reo_wnd has been updated last time. - reo_wnd is tracked in terms of steps (of min_rtt/4), rather than absolute value to account for change in rtt. In our internal testing, we observed significant increase in throughput, in scenarios where reordering exceeds min_rtt/4 (previous static value). Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 1 + include/linux/tcp.h | 9 +++-- include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 7 ++++ net/ipv4/tcp_minisocks.c | 4 +++ net/ipv4/tcp_recovery.c | 48 ++++++++++++++++++++++++-- 7 files changed, 68 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e6661b205f72..54410a1d4065 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -454,6 +454,7 @@ tcp_recovery - INTEGER RACK: 0x1 enables the RACK loss detection for fast detection of lost retransmissions and tail drops. + RACK: 0x2 makes RACK's reordering window static (min_rtt/4). Default: 0x1 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c431385b272..22f40c96a15b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -210,8 +210,13 @@ struct tcp_sock { u64 mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ u32 end_seq; /* Ending TCP sequence of the skb */ - u8 advanced; /* mstamp advanced since last lost marking */ - u8 reord; /* reordering detected */ + u32 last_delivered; /* tp->delivered at last reo_wnd adj */ + u8 reo_wnd_steps; /* Allowed reordering window */ +#define TCP_RACK_RECOVERY_THRESH 16 + u8 reo_wnd_persist:5, /* No. of recovery since last adj */ + dsack_seen:1, /* Whether DSACK seen after last adj */ + advanced:1, /* mstamp advanced since last lost marking */ + reord:1; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ u32 chrono_start; /* Start time in jiffies of a TCP chrono */ diff --git a/include/net/tcp.h b/include/net/tcp.h index c2bf2a822b10..babfd4da1515 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -246,6 +246,7 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ +#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -1901,6 +1902,7 @@ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7a0f316eb86..c4cb19ed4628 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -447,6 +447,7 @@ void tcp_init_sock(struct sock *sk) tcp_assign_congestion_control(sk); tp->tsoffset = 0; + tp->rack.reo_wnd_steps = 1; sk->sk_state = TCP_CLOSE; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8393b405ea98..0ada8bfc2ebd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -856,6 +856,7 @@ void tcp_disable_fack(struct tcp_sock *tp) static void tcp_dsack_seen(struct tcp_sock *tp) { tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; + tp->rack.dsack_seen = 1; } static void tcp_update_reordering(struct sock *sk, const int metric, @@ -2408,6 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS(sock_net(sk), mib_idx); + } else if (tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_persist--; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2427,6 +2430,8 @@ static bool tcp_try_undo_dsack(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { + tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH, + tp->rack.reo_wnd_persist + 1); DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); @@ -3644,6 +3649,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + tcp_rack_update_reo_wnd(sk, &rs); + if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); /* If needed, reset TLP/RTO timer; RACK may later override this. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3c65c1a3f944..4bb86580decd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -551,6 +551,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->syn_data_acked = 0; newtp->rack.mstamp = 0; newtp->rack.advanced = 0; + newtp->rack.reo_wnd_steps = 1; + newtp->rack.last_delivered = 0; + newtp->rack.reo_wnd_persist = 0; + newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index ac3e9c6d3a3d..d3ea89020c69 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -44,6 +44,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); + u32 min_rtt = tcp_min_rtt(tp); struct sk_buff *skb, *n; u32 reo_wnd; @@ -54,8 +55,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) - reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); + if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); + reo_wnd = min(reo_wnd, tp->srtt_us >> 3); + } list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) { @@ -160,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk) if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) tcp_rearm_rto(sk); } + +/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. + * + * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * by srtt), since there is possibility that spurious retransmission was + * due to reordering delay longer than reo_wnd. + * + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) + * no. of successful recoveries (accounts for full DSACK-based loss + * recovery undo). After that, reset it to default (min_rtt/4). + * + * At max, reo_wnd is incremented only once per rtt. So that the new + * DSACK on which we are reacting, is due to the spurious retx (approx) + * after the reo_wnd has been updated last time. + * + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than + * absolute value to account for change in rtt. + */ +void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + !rs->prior_delivered) + return; + + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ + if (before(rs->prior_delivered, tp->rack.last_delivered)) + tp->rack.dsack_seen = 0; + + /* Adjust the reo_wnd if update is pending */ + if (tp->rack.dsack_seen) { + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, + tp->rack.reo_wnd_steps + 1); + tp->rack.dsack_seen = 0; + tp->rack.last_delivered = tp->delivered; + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; + } else if (!tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_steps = 1; + } +} From 4e59532541c865c85c92d42be4edf2ba6aa4af64 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 4 Nov 2017 16:48:54 +0100 Subject: [PATCH 1871/2177] nfp: don't depend on compiler constant propagation Matthias reports: nfp_eth_set_bit_config() is marked as __always_inline to allow gcc to identify the 'mask' parameter as known to be constant at compile time, which is required to use the FIELD_GET() macro. The forced inlining does the trick for gcc, but for kernel builds with clang it results in undefined symbols: drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.o: In function `__nfp_eth_set_aneg': drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c:(.text+0x787): undefined reference to `__compiletime_assert_492' drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c:(.text+0x7b1): undefined reference to `__compiletime_assert_496' These __compiletime_assert_xyx() calls would have been optimized away if the compiler had seen 'mask' as a constant. Add a macro to extract the mask and shift and pass those to nfp_eth_set_bit_config() separately. Reported-by: Matthias Kaehlcke Signed-off-by: Jakub Kicinski Tested-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../netronome/nfp/nfpcore/nfp_nsp_eth.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index f6f7c085f8e0..47251396fcae 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -469,10 +469,10 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) return nfp_eth_config_commit_end(nsp); } -/* Force inline, FIELD_* macroes require masks to be compilation-time known */ -static __always_inline int +static int nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, - const u64 mask, unsigned int val, const u64 ctrl_bit) + const u64 mask, const unsigned int shift, + unsigned int val, const u64 ctrl_bit) { union eth_table_entry *entries = nfp_nsp_config_entries(nsp); unsigned int idx = nfp_nsp_config_idx(nsp); @@ -489,11 +489,11 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, /* Check if we are already in requested state */ reg = le64_to_cpu(entries[idx].raw[raw_idx]); - if (val == FIELD_GET(mask, reg)) + if (val == (reg & mask) >> shift) return 0; reg &= ~mask; - reg |= FIELD_PREP(mask, val); + reg |= (val << shift) & mask; entries[idx].raw[raw_idx] = cpu_to_le64(reg); entries[idx].control |= cpu_to_le64(ctrl_bit); @@ -503,6 +503,13 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, return 0; } +#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ + ({ \ + __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ + nfp_eth_set_bit_config(nsp, raw_idx, mask, __bf_shf(mask), \ + val, ctrl_bit); \ + }) + /** * __nfp_eth_set_aneg() - set PHY autonegotiation control bit * @nsp: NFP NSP handle returned from nfp_eth_config_start() @@ -515,7 +522,7 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, */ int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_ANEG, mode, NSP_ETH_CTRL_SET_ANEG); } @@ -544,7 +551,7 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) return -EINVAL; } - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_RATE, rate, NSP_ETH_CTRL_SET_RATE); } @@ -561,6 +568,6 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) */ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } From 7717c319d8c025aba426f10f41a9d7f9ea8af192 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 4 Nov 2017 16:48:55 +0100 Subject: [PATCH 1872/2177] nfp: make use of MAC reinit Recent management FW images can perform full reinit of MAC cores without requiring a reboot. When loading the driver check if there are changes pending and if so call NSP MAC reinit. Full application FW reload is still required, and all MACs need to be reinited at the same time (not only the ones which have been reconfigured, and thus potentially causing disruption to unrelated netdevs) therefore for now changing MAC config without reloading the driver still remains future work. Signed-off-by: Jakub Kicinski Tested-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_main.c | 28 ++++++++++++++++++- .../ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +- .../net/ethernet/netronome/nfp/nfp_net_main.c | 2 +- .../ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 5 ++++ .../ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 6 ++++ 5 files changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index f8fa63b66739..35eaccbece36 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -346,6 +346,32 @@ exit_release_fw: return err < 0 ? err : 1; } +static void +nfp_nsp_init_ports(struct pci_dev *pdev, struct nfp_pf *pf, + struct nfp_nsp *nsp) +{ + bool needs_reinit = false; + int i; + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + if (!pf->eth_tbl) + return; + + if (!nfp_nsp_has_mac_reinit(nsp)) + return; + + for (i = 0; i < pf->eth_tbl->count; i++) + needs_reinit |= pf->eth_tbl->ports[i].override_changed; + if (!needs_reinit) + return; + + kfree(pf->eth_tbl); + if (nfp_nsp_mac_reinit(nsp)) + dev_warn(&pdev->dev, "MAC reinit failed\n"); + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); +} + static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) { struct nfp_nsp *nsp; @@ -366,7 +392,7 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) if (err < 0) goto exit_close_nsp; - pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + nfp_nsp_init_ports(pdev, pf, nsp); pf->nspi = __nfp_nsp_identify(nsp); if (pf->nspi) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index c67b90c8d8b7..0061097c271e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -328,7 +328,7 @@ nfp_net_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; if (netif_running(netdev)) { - netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until reboot.\n"); + netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until driver reload.\n"); return -EBUSY; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index ff373acd28f3..0beb9b21557b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -597,7 +597,7 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, return -EIO; } if (eth_port->override_changed) { - nfp_warn(cpp, "Port #%d config changed, unregistering. Reboot required before port will be operational again.\n", port->eth_id); + nfp_warn(cpp, "Port #%d config changed, unregistering. Driver reload required before port will be operational again.\n", port->eth_id); port->type = NFP_PORT_INVALID; } diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 37364555c42b..14a6d1ba51a9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -477,6 +477,11 @@ int nfp_nsp_device_soft_reset(struct nfp_nsp *state) return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0); } +int nfp_nsp_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0); +} + int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw) { return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, fw->size, fw->data, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index e2f028027c6f..47486d42f2d7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -48,6 +48,12 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state); int nfp_nsp_wait(struct nfp_nsp *state); int nfp_nsp_device_soft_reset(struct nfp_nsp *state); int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw); +int nfp_nsp_mac_reinit(struct nfp_nsp *state); + +static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 20; +} enum nfp_eth_interface { NFP_INTERFACE_NONE = 0, From 51ccc37d9d3392884024b272089fd3e864d3cf3c Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sat, 4 Nov 2017 16:48:56 +0100 Subject: [PATCH 1873/2177] nfp: refactor nfp_app_reprs_set The criteria that reprs cannot be replaced with another new set of reprs has been removed. This check is not needed since the only use case that could exercise this at the moment, would be to modify the number of SRIOV VFs without first disabling them. This case is explicitly disallowed in any case and subsequent patches in this series need to be able to replace the running set of reprs. All cases where the return code used to be checked for the nfp_app_reprs_set function have been removed. As stated above, it is not possible for the current code to encounter a case where reprs exist and need to be replaced. Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/main.c | 16 ++++------------ drivers/net/ethernet/netronome/nfp/nfp_app.c | 6 ------ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index e46e7c60d491..e0283bb24f06 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -142,8 +142,8 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, { u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); struct nfp_flower_priv *priv = app->priv; - struct nfp_reprs *reprs, *old_reprs; enum nfp_port_type port_type; + struct nfp_reprs *reprs; const u8 queue = 0; int i, err; @@ -194,11 +194,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, reprs->reprs[i]->name); } - old_reprs = nfp_app_reprs_set(app, repr_type, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, repr_type, reprs); return 0; err_reprs_clean: @@ -222,8 +218,8 @@ static int nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) { struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; - struct nfp_reprs *reprs, *old_reprs; struct sk_buff *ctrl_skb; + struct nfp_reprs *reprs; unsigned int i; int err; @@ -280,11 +276,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) phys_port, reprs->reprs[phys_port]->name); } - old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); /* The MAC_REPR control message should be sent after the MAC * representors are registered using nfp_app_reprs_set(). This is diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 3644d74fe304..955a9f44d244 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -106,14 +106,8 @@ nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, old = rcu_dereference_protected(app->reprs[type], lockdep_is_held(&app->pf->lock)); - if (reprs && old) { - old = ERR_PTR(-EBUSY); - goto exit_unlock; - } - rcu_assign_pointer(app->reprs[type], reprs); -exit_unlock: return old; } From 5fa27d59af2a36c32156e56b6370387f60b67052 Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sat, 4 Nov 2017 16:48:57 +0100 Subject: [PATCH 1874/2177] nfp: resync repr state when port table sync If the NSP port table has been refreshed, resync the representor state with the new port information. At the moment, this only entails looking for invalid ports and killing off representors associated with them. The repr instance becomes NULL which is safe since the app accessor function for reprs returns NULL when it cannot access a repr. Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_net_main.c | 6 +++ .../net/ethernet/netronome/nfp/nfp_net_repr.c | 47 +++++++++++++++++++ .../net/ethernet/netronome/nfp/nfp_net_repr.h | 1 + 3 files changed, 54 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 0beb9b21557b..c505014121c4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -611,6 +611,7 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) struct nfp_eth_table *eth_table; struct nfp_net *nn, *next; struct nfp_port *port; + int err; lockdep_assert_held(&pf->lock); @@ -640,6 +641,11 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) kfree(eth_table); + /* Resync repr state. This may cause reprs to be removed. */ + err = nfp_reprs_resync_phys_ports(pf->app); + if (err) + return err; + /* Shoot off the ports which became invalid */ list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { if (!nn->port || nn->port->type != NFP_PORT_INVALID) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index d540a9dc77b3..1bce8c131bb9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -390,3 +390,50 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) return reprs; } + +int nfp_reprs_resync_phys_ports(struct nfp_app *app) +{ + struct nfp_reprs *reprs, *old_reprs; + struct nfp_repr *repr; + int i; + + old_reprs = + rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT], + lockdep_is_held(&app->pf->lock)); + if (!old_reprs) + return 0; + + reprs = nfp_reprs_alloc(old_reprs->num_reprs); + if (!reprs) + return -ENOMEM; + + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type == NFP_PORT_INVALID) + continue; + + reprs->reprs[i] = old_reprs->reprs[i]; + } + + old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); + synchronize_rcu(); + + /* Now we free up removed representors */ + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type != NFP_PORT_INVALID) + continue; + + nfp_app_repr_stop(app, repr); + nfp_repr_clean(repr); + } + + kfree(old_reprs); + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 32179cad062a..5d4d897bc9c6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -124,5 +124,6 @@ void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type); struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs); +int nfp_reprs_resync_phys_ports(struct nfp_app *app); #endif /* NFP_NET_REPR_H */ From a564d30ec2b859205d5fdd521df3fb6d342dc461 Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sat, 4 Nov 2017 16:48:58 +0100 Subject: [PATCH 1875/2177] nfp: add get/set link settings ndos to representors Since it is now safe to modify link settings for representors, we can attach the get/set link settings ndos to it. The get/set link settings are nfp_port based operations. If a port becomes invalid, the representor will be removed in the same way a vnic would be. Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 0061097c271e..d0028894667c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1155,6 +1155,8 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .set_dump = nfp_app_set_dump, .get_dump_flag = nfp_app_get_dump_flag, .get_dump_data = nfp_app_get_dump_data, + .get_link_ksettings = nfp_net_get_link_ksettings, + .set_link_ksettings = nfp_net_set_link_ksettings, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) From b471232e2caa054e006fa4b5fd4bf15544b00b0f Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sat, 4 Nov 2017 16:48:59 +0100 Subject: [PATCH 1876/2177] nfp: add helpers for FEC support Implement helpers to determine and modify FEC modes via the NSP. The NSP advertises FEC capabilities on a per port basis and provides support for: * Auto mode selection * Reed Solomon * BaseR * None/Off Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 30 +++++++++ .../netronome/nfp/nfpcore/nfp_nsp_eth.c | 64 +++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index 47486d42f2d7..650ca1a5bd21 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -79,6 +79,18 @@ enum nfp_eth_aneg { NFP_ANEG_DISABLED, }; +enum nfp_eth_fec { + NFP_FEC_AUTO_BIT = 0, + NFP_FEC_BASER_BIT, + NFP_FEC_REED_SOLOMON_BIT, + NFP_FEC_DISABLED_BIT, +}; + +#define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) +#define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) +#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) +#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT) + /** * struct nfp_eth_table - ETH table information * @count: number of table entries @@ -93,6 +105,7 @@ enum nfp_eth_aneg { * @speed: interface speed (in Mbps) * @interface: interface (module) plugged in * @media: media type of the @interface + * @fec: forward error correction mode * @aneg: auto negotiation mode * @mac_addr: interface MAC address * @label_port: port id @@ -105,6 +118,7 @@ enum nfp_eth_aneg { * @port_type: one of %PORT_* defines for ethtool * @port_lanes: total number of lanes on the port (sum of lanes of all subports) * @is_split: is interface part of a split port + * @fec_modes_supported: bitmap of FEC modes supported */ struct nfp_eth_table { unsigned int count; @@ -120,6 +134,7 @@ struct nfp_eth_table { unsigned int interface; enum nfp_eth_media media; + enum nfp_eth_fec fec; enum nfp_eth_aneg aneg; u8 mac_addr[ETH_ALEN]; @@ -139,6 +154,8 @@ struct nfp_eth_table { unsigned int port_lanes; bool is_split; + + unsigned int fec_modes_supported; } ports[0]; }; @@ -149,6 +166,19 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp); int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable); int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed); +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); + +static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) +{ + return !!eth_port->fec_modes_supported; +} + +static inline unsigned int +nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port) +{ + return eth_port->fec_modes_supported; +} struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx); int nfp_eth_config_commit_end(struct nfp_nsp *nsp); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index 47251396fcae..7ca589660e4d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -55,6 +55,8 @@ #define NSP_ETH_PORT_INDEX GENMASK_ULL(15, 8) #define NSP_ETH_PORT_LABEL GENMASK_ULL(53, 48) #define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) +#define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) +#define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) #define NSP_ETH_PORT_LANES_MASK cpu_to_le64(NSP_ETH_PORT_LANES) @@ -67,6 +69,7 @@ #define NSP_ETH_STATE_MEDIA GENMASK_ULL(21, 20) #define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) #define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) +#define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) #define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) #define NSP_ETH_CTRL_ENABLED BIT_ULL(1) @@ -75,6 +78,7 @@ #define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) #define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) #define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) +#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) enum nfp_eth_raw { NSP_ETH_RAW_PORT = 0, @@ -152,6 +156,7 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, unsigned int index, struct nfp_eth_table_port *dst) { unsigned int rate; + unsigned int fec; u64 port, state; port = le64_to_cpu(src->port); @@ -183,6 +188,18 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state); dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state); + + if (nfp_nsp_get_abi_ver_minor(nsp) < 22) + return; + + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port); + dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT; + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port); + dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT; + if (dst->fec_modes_supported) + dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; + + dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state); } static void @@ -527,6 +544,53 @@ int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) NSP_ETH_CTRL_SET_ANEG); } +/** + * __nfp_eth_set_fec() - set PHY forward error correction control bit + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @mode: Desired fec mode + * + * Set the PHY module forward error correction mode. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +static int __nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_FEC, mode, + NSP_ETH_CTRL_SET_FEC); +} + +/** + * nfp_eth_set_fec() - set PHY forward error correction control mode + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @mode: Desired fec mode + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode) +{ + struct nfp_nsp *nsp; + int err; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + err = __nfp_eth_set_fec(nsp, mode); + if (err) { + nfp_eth_config_cleanup_end(nsp); + return err; + } + + return nfp_eth_config_commit_end(nsp); +} + /** * __nfp_eth_set_speed() - set interface speed/rate * @nsp: NFP NSP handle returned from nfp_eth_config_start() From 0d08709383377087bc50825db4b47c058c7ab70a Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sat, 4 Nov 2017 16:49:00 +0100 Subject: [PATCH 1877/2177] nfp: implement ethtool FEC mode settings Add support in the driver ethtool ops to modify the NFP FEC modes. The FEC modes can be set for vNIC associated with physical ports or for MAC representor netdevs. Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_ethtool.c | 117 +++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index d0028894667c..60c8d733a37d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -244,6 +244,30 @@ nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } +static void +nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *c) +{ + unsigned int modes; + + ethtool_link_ksettings_add_link_mode(c, supported, FEC_NONE); + if (!nfp_eth_can_support_fec(eth_port)) { + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_NONE); + return; + } + + modes = nfp_eth_supported_fec_modes(eth_port); + if (modes & NFP_FEC_BASER) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_BASER); + } + + if (modes & NFP_FEC_REED_SOLOMON) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_RS); + } +} + /** * nfp_net_get_link_ksettings - Get Link Speed settings * @netdev: network interface device structure @@ -278,9 +302,11 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); - if (eth_port) + if (eth_port) { cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; + nfp_net_set_fec_link_mode(eth_port, cmd); + } if (!netif_carrier_ok(netdev)) return 0; @@ -686,6 +712,91 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) } } +static int nfp_port_fec_ethtool_to_nsp(u32 fec) +{ + switch (fec) { + case ETHTOOL_FEC_AUTO: + return NFP_FEC_AUTO_BIT; + case ETHTOOL_FEC_OFF: + return NFP_FEC_DISABLED_BIT; + case ETHTOOL_FEC_RS: + return NFP_FEC_REED_SOLOMON_BIT; + case ETHTOOL_FEC_BASER: + return NFP_FEC_BASER_BIT; + default: + /* NSP only supports a single mode at a time */ + return -EOPNOTSUPP; + } +} + +static u32 nfp_port_fec_nsp_to_ethtool(u32 fec) +{ + u32 result = 0; + + if (fec & NFP_FEC_AUTO) + result |= ETHTOOL_FEC_AUTO; + if (fec & NFP_FEC_BASER) + result |= ETHTOOL_FEC_BASER; + if (fec & NFP_FEC_REED_SOLOMON) + result |= ETHTOOL_FEC_RS; + if (fec & NFP_FEC_DISABLED) + result |= ETHTOOL_FEC_OFF; + + return result ?: ETHTOOL_FEC_NONE; +} + +static int +nfp_port_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + param->active_fec = ETHTOOL_FEC_NONE_BIT; + param->fec = ETHTOOL_FEC_NONE_BIT; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return 0; + + param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported); + param->active_fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec); + + return 0; +} + +static int +nfp_port_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err, fec; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return -EOPNOTSUPP; + + fec = nfp_port_fec_ethtool_to_nsp(param->fec); + if (fec < 0) + return fec; + + err = nfp_eth_set_fec(port->app->cpp, eth_port->index, fec); + if (!err) + /* Only refresh if we did something */ + nfp_net_refresh_port_table(port); + + return err < 0 ? err : 0; +} + /* RX network flow classification (RSS, filters, etc) */ static u32 ethtool_flow_to_nfp_flag(u32 flow_type) @@ -1144,6 +1255,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_channels = nfp_net_set_channels, .get_link_ksettings = nfp_net_get_link_ksettings, .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; const struct ethtool_ops nfp_port_ethtool_ops = { @@ -1157,6 +1270,8 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .get_dump_data = nfp_app_get_dump_data, .get_link_ksettings = nfp_net_get_link_ksettings, .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) From 67e306fdbed71ab0a6e0d5985e088a49061c523f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:30 -0500 Subject: [PATCH 1878/2177] device_cgroup: add DEVCG_ prefix to ACC_* and DEV_* constants Rename device type and access type constants defined in security/device_cgroup.c by adding the DEVCG_ prefix. The reason behind this renaming is to make them global namespace friendly, as they will be moved to the corresponding header file by following patches. Signed-off-by: Roman Gushchin Cc: David S. Miller Cc: Tejun Heo Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: David S. Miller --- security/device_cgroup.c | 72 ++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 5ef7e5240563..968c21557ba7 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -15,14 +15,14 @@ #include #include -#define ACC_MKNOD 1 -#define ACC_READ 2 -#define ACC_WRITE 4 -#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) +#define DEVCG_ACC_MKNOD 1 +#define DEVCG_ACC_READ 2 +#define DEVCG_ACC_WRITE 4 +#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) -#define DEV_BLOCK 1 -#define DEV_CHAR 2 -#define DEV_ALL 4 /* this represents all devices */ +#define DEVCG_DEV_BLOCK 1 +#define DEVCG_DEV_CHAR 2 +#define DEVCG_DEV_ALL 4 /* this represents all devices */ static DEFINE_MUTEX(devcgroup_mutex); @@ -246,21 +246,21 @@ static void set_access(char *acc, short access) { int idx = 0; memset(acc, 0, ACCLEN); - if (access & ACC_READ) + if (access & DEVCG_ACC_READ) acc[idx++] = 'r'; - if (access & ACC_WRITE) + if (access & DEVCG_ACC_WRITE) acc[idx++] = 'w'; - if (access & ACC_MKNOD) + if (access & DEVCG_ACC_MKNOD) acc[idx++] = 'm'; } static char type_to_char(short type) { - if (type == DEV_ALL) + if (type == DEVCG_DEV_ALL) return 'a'; - if (type == DEV_CHAR) + if (type == DEVCG_DEV_CHAR) return 'c'; - if (type == DEV_BLOCK) + if (type == DEVCG_DEV_BLOCK) return 'b'; return 'X'; } @@ -287,10 +287,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) * This way, the file remains as a "whitelist of devices" */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { - set_access(acc, ACC_MASK); + set_access(acc, DEVCG_ACC_MASK); set_majmin(maj, ~0); set_majmin(min, ~0); - seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL), + seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL), maj, min, acc); } else { list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) { @@ -309,10 +309,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) /** * match_exception - iterates the exception list trying to find a complete match * @exceptions: list of exceptions - * @type: device type (DEV_BLOCK or DEV_CHAR) + * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR) * @major: device file major number, ~0 to match all * @minor: device file minor number, ~0 to match all - * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD) * * It is considered a complete match if an exception is found that will * contain the entire range of provided parameters. @@ -325,9 +325,9 @@ static bool match_exception(struct list_head *exceptions, short type, struct dev_exception_item *ex; list_for_each_entry_rcu(ex, exceptions, list) { - if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK)) continue; - if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR)) continue; if (ex->major != ~0 && ex->major != major) continue; @@ -344,10 +344,10 @@ static bool match_exception(struct list_head *exceptions, short type, /** * match_exception_partial - iterates the exception list trying to find a partial match * @exceptions: list of exceptions - * @type: device type (DEV_BLOCK or DEV_CHAR) + * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR) * @major: device file major number, ~0 to match all * @minor: device file minor number, ~0 to match all - * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD) * * It is considered a partial match if an exception's range is found to * contain *any* of the devices specified by provided parameters. This is @@ -362,9 +362,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type, struct dev_exception_item *ex; list_for_each_entry_rcu(ex, exceptions, list) { - if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK)) continue; - if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR)) continue; /* * We must be sure that both the exception and the provided @@ -647,10 +647,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, } return 0; case 'b': - ex.type = DEV_BLOCK; + ex.type = DEVCG_DEV_BLOCK; break; case 'c': - ex.type = DEV_CHAR; + ex.type = DEVCG_DEV_CHAR; break; default: return -EINVAL; @@ -703,13 +703,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, for (b++, count = 0; count < 3; count++, b++) { switch (*b) { case 'r': - ex.access |= ACC_READ; + ex.access |= DEVCG_ACC_READ; break; case 'w': - ex.access |= ACC_WRITE; + ex.access |= DEVCG_ACC_WRITE; break; case 'm': - ex.access |= ACC_MKNOD; + ex.access |= DEVCG_ACC_MKNOD; break; case '\n': case '\0': @@ -806,7 +806,7 @@ struct cgroup_subsys devices_cgrp_subsys = { * @type: device type * @major: device major number * @minor: device minor number - * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD + * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD * * returns 0 on success, -EPERM case the operation is not permitted */ @@ -839,13 +839,13 @@ int __devcgroup_inode_permission(struct inode *inode, int mask) short type, access = 0; if (S_ISBLK(inode->i_mode)) - type = DEV_BLOCK; + type = DEVCG_DEV_BLOCK; if (S_ISCHR(inode->i_mode)) - type = DEV_CHAR; + type = DEVCG_DEV_CHAR; if (mask & MAY_WRITE) - access |= ACC_WRITE; + access |= DEVCG_ACC_WRITE; if (mask & MAY_READ) - access |= ACC_READ; + access |= DEVCG_ACC_READ; return __devcgroup_check_permission(type, imajor(inode), iminor(inode), access); @@ -859,11 +859,11 @@ int devcgroup_inode_mknod(int mode, dev_t dev) return 0; if (S_ISBLK(mode)) - type = DEV_BLOCK; + type = DEVCG_DEV_BLOCK; else - type = DEV_CHAR; + type = DEVCG_DEV_CHAR; return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), - ACC_MKNOD); + DEVCG_ACC_MKNOD); } From ecf8fecb7828648cba0e42de7464a7e600c93459 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:31 -0500 Subject: [PATCH 1879/2177] device_cgroup: prepare code for bpf-based device controller This is non-functional change to prepare the device cgroup code for adding eBPF-based controller for cgroups v2. The patch performs the following changes: 1) __devcgroup_inode_permission() and devcgroup_inode_mknod() are moving to the device-cgroup.h and converting into static inline. 2) __devcgroup_check_permission() is exported. 3) devcgroup_check_permission() wrapper is introduced to be used by both existing and new bpf-based implementations. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/device_cgroup.h | 61 ++++++++++++++++++++++++++++++++--- security/device_cgroup.c | 47 ++------------------------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index cdbc344a92e4..2d93d7ecd479 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,17 +1,70 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#define DEVCG_ACC_MKNOD 1 +#define DEVCG_ACC_READ 2 +#define DEVCG_ACC_WRITE 4 +#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) + +#define DEVCG_DEV_BLOCK 1 +#define DEVCG_DEV_CHAR 2 +#define DEVCG_DEV_ALL 4 /* this represents all devices */ + #ifdef CONFIG_CGROUP_DEVICE -extern int __devcgroup_inode_permission(struct inode *inode, int mask); -extern int devcgroup_inode_mknod(int mode, dev_t dev); +extern int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access); +#else +static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ return 0; } +#endif + +#ifdef CONFIG_CGROUP_DEVICE +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ + return __devcgroup_check_permission(type, major, minor, access); +} + static inline int devcgroup_inode_permission(struct inode *inode, int mask) { + short type, access = 0; + if (likely(!inode->i_rdev)) return 0; - if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + + if (S_ISBLK(inode->i_mode)) + type = DEVCG_DEV_BLOCK; + else if (S_ISCHR(inode->i_mode)) + type = DEVCG_DEV_CHAR; + else return 0; - return __devcgroup_inode_permission(inode, mask); + + if (mask & MAY_WRITE) + access |= DEVCG_ACC_WRITE; + if (mask & MAY_READ) + access |= DEVCG_ACC_READ; + + return devcgroup_check_permission(type, imajor(inode), iminor(inode), + access); } + +static inline int devcgroup_inode_mknod(int mode, dev_t dev) +{ + short type; + + if (!S_ISBLK(mode) && !S_ISCHR(mode)) + return 0; + + if (S_ISBLK(mode)) + type = DEVCG_DEV_BLOCK; + else + type = DEVCG_DEV_CHAR; + + return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), + DEVCG_ACC_MKNOD); +} + #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 968c21557ba7..c65b39bafdfe 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -15,15 +15,6 @@ #include #include -#define DEVCG_ACC_MKNOD 1 -#define DEVCG_ACC_READ 2 -#define DEVCG_ACC_WRITE 4 -#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) - -#define DEVCG_DEV_BLOCK 1 -#define DEVCG_DEV_CHAR 2 -#define DEVCG_DEV_ALL 4 /* this represents all devices */ - static DEFINE_MUTEX(devcgroup_mutex); enum devcg_behavior { @@ -810,8 +801,8 @@ struct cgroup_subsys devices_cgrp_subsys = { * * returns 0 on success, -EPERM case the operation is not permitted */ -static int __devcgroup_check_permission(short type, u32 major, u32 minor, - short access) +int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access) { struct dev_cgroup *dev_cgroup; bool rc; @@ -833,37 +824,3 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, return 0; } - -int __devcgroup_inode_permission(struct inode *inode, int mask) -{ - short type, access = 0; - - if (S_ISBLK(inode->i_mode)) - type = DEVCG_DEV_BLOCK; - if (S_ISCHR(inode->i_mode)) - type = DEVCG_DEV_CHAR; - if (mask & MAY_WRITE) - access |= DEVCG_ACC_WRITE; - if (mask & MAY_READ) - access |= DEVCG_ACC_READ; - - return __devcgroup_check_permission(type, imajor(inode), iminor(inode), - access); -} - -int devcgroup_inode_mknod(int mode, dev_t dev) -{ - short type; - - if (!S_ISBLK(mode) && !S_ISCHR(mode)) - return 0; - - if (S_ISBLK(mode)) - type = DEVCG_DEV_BLOCK; - else - type = DEVCG_DEV_CHAR; - - return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), - DEVCG_ACC_MKNOD); - -} From ebc614f687369f9df99828572b1d85a7c2de3d92 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:32 -0500 Subject: [PATCH 1880/2177] bpf, cgroup: implement eBPF-based device controller for cgroup v2 Cgroup v2 lacks the device controller, provided by cgroup v1. This patch adds a new eBPF program type, which in combination of previously added ability to attach multiple eBPF programs to a cgroup, will provide a similar functionality, but with some additional flexibility. This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type. A program takes major and minor device numbers, device type (block/character) and access type (mknod/read/write) as parameters and returns an integer which defines if the operation should be allowed or terminated with -EPERM. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 15 ++++++++ include/linux/bpf_types.h | 3 ++ include/linux/device_cgroup.h | 8 +++- include/uapi/linux/bpf.h | 15 ++++++++ kernel/bpf/cgroup.c | 67 ++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 7 ++++ kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 15 ++++++++ 8 files changed, 130 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 87a7db9feb38..a7f16e0f8d68 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } \ __ret; \ }) + +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ + access, \ + BPF_CGROUP_DEVICE); \ + \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 53c5b9ad7220..978c1d9c9383 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 2d93d7ecd479..8557efe096dc 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -19,10 +20,15 @@ static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, { return 0; } #endif -#ifdef CONFIG_CGROUP_DEVICE +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) static inline int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; + return __devcgroup_check_permission(type, major, minor, access); } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4455dd195201..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -141,6 +142,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -991,4 +993,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3db5a17fcfe8..b789ab78d28f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); + +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type) +{ + struct cgroup *cgrp; + struct bpf_cgroup_dev_ctx ctx = { + .access_type = (access << 16) | dev_type, + .major = major, + .minor = minor, + }; + int allow = 1; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, + BPF_PROG_RUN); + rcu_read_unlock(); + + return !allow; +} +EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); + +static const struct bpf_func_proto * +cgroup_dev_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_trace_printk: + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); + default: + return NULL; + } +} + +static bool cgroup_dev_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +const struct bpf_prog_ops cg_dev_prog_ops = { +}; + +const struct bpf_verifier_ops cg_dev_verifier_ops = { + .get_func_proto = cgroup_dev_func_proto, + .is_valid_access = cgroup_dev_is_valid_access, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 416d70cdfc76..09badc37e864 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, true); @@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, false); @@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: break; default: return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index add845fe788a..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_DEVICE: break; default: return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e92f62cf933a..b280f37cd057 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -131,6 +131,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -140,6 +141,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -990,4 +992,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ From 9d1f15941967cd80fc3baa3322751fab532f98a4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:33 -0500 Subject: [PATCH 1881/2177] bpf: move cgroup_helpers from samples/bpf/ to tools/testing/selftesting/bpf/ The purpose of this move is to use these files in bpf tests. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 +++-- tools/testing/selftests/bpf/Makefile | 2 +- {samples => tools/testing/selftests}/bpf/cgroup_helpers.c | 0 {samples => tools/testing/selftests}/bpf/cgroup_helpers.h | 0 4 files changed, 4 insertions(+), 3 deletions(-) rename {samples => tools/testing/selftests}/bpf/cgroup_helpers.c (100%) rename {samples => tools/testing/selftests}/bpf/cgroup_helpers.h (100%) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6a9321ec348a..5994075b080d 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ hostprogs-y += syscall_tp # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o +CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) @@ -69,13 +70,13 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o -test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o +test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS) test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ +test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4f0734aa6e93..9fbb02638198 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,7 @@ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a +BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) diff --git a/samples/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c similarity index 100% rename from samples/bpf/cgroup_helpers.c rename to tools/testing/selftests/bpf/cgroup_helpers.c diff --git a/samples/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h similarity index 100% rename from samples/bpf/cgroup_helpers.h rename to tools/testing/selftests/bpf/cgroup_helpers.h From 37f1ba0909dfa12c75f8e8ea7a2f01355ebd60f1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:34 -0500 Subject: [PATCH 1882/2177] selftests/bpf: add a test for device cgroup controller Add a test for device cgroup controller. The test loads a simple bpf program which logs all device access attempts using trace_printk() and forbids all operations except operations with /dev/zero and /dev/urandom. Then the test creates and joins a test cgroup, and attaches the bpf program to it. Then it tries to perform some simple device operations and checks the result: create /dev/null (should fail) create /dev/zero (should pass) copy data from /dev/urandom to /dev/zero (should pass) copy data from /dev/urandom to /dev/full (should fail) copy data from /dev/random to /dev/zero (should fail) Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/dev_cgroup.c | 60 ++++++++++++ tools/testing/selftests/bpf/test_dev_cgroup.c | 93 +++++++++++++++++++ 3 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/dev_cgroup.c create mode 100644 tools/testing/selftests/bpf/test_dev_cgroup.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9fbb02638198..333a48655ee0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -13,11 +13,11 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log + test_align test_verifier_log test_dev_cgroup TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o + sockmap_verdict_prog.o dev_cgroup.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c new file mode 100644 index 000000000000..ce41a3475f27 --- /dev/null +++ b/tools/testing/selftests/bpf/dev_cgroup.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include "bpf_helpers.h" + +SEC("cgroup/dev") +int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) +{ + short type = ctx->access_type & 0xFFFF; +#ifdef DEBUG + short access = ctx->access_type >> 16; + char fmt[] = " %d:%d \n"; + + switch (type) { + case BPF_DEVCG_DEV_BLOCK: + fmt[0] = 'b'; + break; + case BPF_DEVCG_DEV_CHAR: + fmt[0] = 'c'; + break; + default: + fmt[0] = '?'; + break; + } + + if (access & BPF_DEVCG_ACC_READ) + fmt[8] = 'r'; + + if (access & BPF_DEVCG_ACC_WRITE) + fmt[9] = 'w'; + + if (access & BPF_DEVCG_ACC_MKNOD) + fmt[10] = 'm'; + + bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor); +#endif + + /* Allow access to /dev/zero and /dev/random. + * Forbid everything else. + */ + if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR) + return 0; + + switch (ctx->minor) { + case 5: /* 1:5 /dev/zero */ + case 9: /* 1:9 /dev/urandom */ + return 1; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c new file mode 100644 index 000000000000..02c85d6c89b0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cgroup_helpers.h" + +#define DEV_CGROUP_PROG "./dev_cgroup.o" + +#define TEST_CGROUP "test-bpf-based-device-cgroup/" + +int main(int argc, char **argv) +{ + struct bpf_object *obj; + int error = EXIT_FAILURE; + int prog_fd, cgroup_fd; + __u32 prog_cnt; + + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, + &obj, &prog_fd)) { + printf("Failed to load DEV_CGROUP program\n"); + goto err; + } + + if (setup_cgroup_environment()) { + printf("Failed to load DEV_CGROUP program\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (!cgroup_fd) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) { + printf("Failed to attach DEV_CGROUP program"); + goto err; + } + + if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL, + &prog_cnt)) { + printf("Failed to query attached programs"); + goto err; + } + + /* All operations with /dev/zero and and /dev/urandom are allowed, + * everything else is forbidden. + */ + assert(system("rm -f /tmp/test_dev_cgroup_null") == 0); + assert(system("mknod /tmp/test_dev_cgroup_null c 1 3")); + assert(system("rm -f /tmp/test_dev_cgroup_null") == 0); + + /* /dev/zero is whitelisted */ + assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0); + assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0); + assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0); + + assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0); + + /* src is allowed, target is forbidden */ + assert(system("dd if=/dev/urandom of=/dev/full count=64")); + + /* src is forbidden, target is allowed */ + assert(system("dd if=/dev/random of=/dev/zero count=64")); + + error = 0; + printf("test_dev_cgroup:PASS\n"); + +err: + cleanup_cgroup_environment(); + + return error; +} From c45e3e4c5b134b081e8af362109905427967eb19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 9 Jul 2017 13:08:58 +0200 Subject: [PATCH 1883/2177] NFC: fix device-allocation error return A recent change fixing NFC device allocation itself introduced an error-handling bug by returning an error pointer in case device-id allocation failed. This is clearly broken as the callers still expected NULL to be returned on errors as detected by Dan's static checker. Fix this up by returning NULL in the event that we've run out of memory when allocating a new device id. Note that the offending commit is marked for stable (3.8) so this fix needs to be backported along with it. Fixes: 20777bc57c34 ("NFC: fix broken device allocation") Cc: stable # 3.8 Reported-by: Dan Carpenter Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index e5e23c2cbe74..2c7c9b357e70 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1105,7 +1105,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, err_free_dev: kfree(dev); - return ERR_PTR(rc); + return NULL; } EXPORT_SYMBOL(nfc_allocate_device); From f98bc10e0eb90075665516c6ddc81761d42b3525 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:53 +0530 Subject: [PATCH 1884/2177] nfc: microread: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/microread/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index b668b7b9a61e..1806d20a5e29 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -294,7 +294,7 @@ static int microread_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id microread_i2c_id[] = { +static const struct i2c_device_id microread_i2c_id[] = { { MICROREAD_I2C_DRIVER_NAME, 0}, { } }; From ab1df981574bdb06c2d7f8b6817d5007c2f4a10d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:54 +0530 Subject: [PATCH 1885/2177] nfc: nfcmrvl: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcmrvl/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index ffec103702f1..0f22379887ca 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -266,7 +266,7 @@ static const struct of_device_id of_nfcmrvl_i2c_match[] = { }; MODULE_DEVICE_TABLE(of, of_nfcmrvl_i2c_match); -static struct i2c_device_id nfcmrvl_i2c_id_table[] = { +static const struct i2c_device_id nfcmrvl_i2c_id_table[] = { { "nfcmrvl_i2c", 0 }, {} }; From 01e682ad085d0e7d253c3bad557a2a4b08bbf5fe Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:55 +0530 Subject: [PATCH 1886/2177] nfc: nxp-nci: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/nxp-nci/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 198585bbc771..ba695e392c3b 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -393,7 +393,7 @@ static int nxp_nci_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id nxp_nci_i2c_id_table[] = { +static const struct i2c_device_id nxp_nci_i2c_id_table[] = { {"nxp-nci_i2c", 0}, {} }; From f98786da9d18ee1ecfac4a68e532b1859cf7d1c0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:56 +0530 Subject: [PATCH 1887/2177] nfc: pn533: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c index 8f60ce039b0d..4389eb4c8d0b 100644 --- a/drivers/nfc/pn533/i2c.c +++ b/drivers/nfc/pn533/i2c.c @@ -264,7 +264,7 @@ static const struct of_device_id of_pn533_i2c_match[] = { }; MODULE_DEVICE_TABLE(of, of_pn533_i2c_match); -static struct i2c_device_id pn533_i2c_id_table[] = { +static const struct i2c_device_id pn533_i2c_id_table[] = { { PN533_I2C_DRIVER_NAME, 0 }, {} }; From 3737ff15b0e88fd9d4942118c2c9ece917ddc68f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:57 +0530 Subject: [PATCH 1888/2177] nfc: pn544: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/pn544/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 4b14740edb67..d0207f8e68b7 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -54,7 +54,7 @@ #define PN544_HCI_I2C_LLC_MAX_SIZE (PN544_HCI_I2C_LLC_LEN_CRC + 1 + \ PN544_HCI_I2C_LLC_MAX_PAYLOAD) -static struct i2c_device_id pn544_hci_i2c_id_table[] = { +static const struct i2c_device_id pn544_hci_i2c_id_table[] = { {"pn544", 0}, {} }; From 81251cc599acfac63d68caa69c6284b546c66fe0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:58 +0530 Subject: [PATCH 1889/2177] nfc: s3fwrn5: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/s3fwrn5/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index 3f09d7fd2285..4da409e77a72 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -276,7 +276,7 @@ static int s3fwrn5_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id s3fwrn5_i2c_id_table[] = { +static const struct i2c_device_id s3fwrn5_i2c_id_table[] = { {S3FWRN5_I2C_DRIVER_NAME, 0}, {} }; From 984553078497082008b8a2bd681facdc789b5acf Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:33:59 +0530 Subject: [PATCH 1890/2177] nfc: st-nci: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/st-nci/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index 515f08d037fb..f9525ef87d57 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -279,7 +279,7 @@ static int st_nci_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id st_nci_i2c_id_table[] = { +static const struct i2c_device_id st_nci_i2c_id_table[] = { {ST_NCI_DRIVER_NAME, 0}, {} }; From a122ffd0911d563f4b5e95120259ec5d2e8e3c4f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 21 Aug 2017 22:34:00 +0530 Subject: [PATCH 1891/2177] nfc: st21nfca: constify i2c_device_id i2c_device_id are not supposed to change at runtime. All functions working with i2c_device_id provided by work with const i2c_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Samuel Ortiz --- drivers/nfc/st21nfca/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index cd1f7bfa75eb..1b347096422f 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -589,7 +589,7 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id st21nfca_hci_i2c_id_table[] = { +static const struct i2c_device_id st21nfca_hci_i2c_id_table[] = { {ST21NFCA_HCI_DRIVER_NAME, 0}, {} }; From 5057f6647b3286eee2d9a7eff7f3c072ea7573d9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 15:25:15 +0100 Subject: [PATCH 1892/2177] nfc: s3fwrn5: make array match static const Don't populate the read-only array match on the stack, instead make it static const. Makes the object code smaller by over 310 bytes: Before: text data bss dec hex filename 8304 1084 128 9516 252c drivers/nfc/s3fwrn5/firmware.o After: text data bss dec hex filename 7894 1180 128 9202 23f2 drivers/nfc/s3fwrn5/firmware.o Signed-off-by: Colin Ian King Signed-off-by: Samuel Ortiz --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 38548bd970cd..b7828fb252f2 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -336,7 +336,7 @@ static int s3fwrn5_fw_get_base_addr( struct s3fwrn5_fw_cmd_get_bootinfo_rsp *bootinfo, u32 *base_addr) { int i; - struct { + static const struct { u8 version[4]; u32 base_addr; } match[] = { From 81ade1cd6761e791904801ca9bfc960de4b94916 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Oct 2017 10:47:12 +0100 Subject: [PATCH 1893/2177] NFC: fdp: make struct nci_ops static The structure nci_ops is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'nci_ops' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Samuel Ortiz --- drivers/nfc/fdp/fdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index ec50027b0d8b..d5784a47fc13 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -726,7 +726,7 @@ static struct nci_driver_ops fdp_prop_ops[] = { }, }; -struct nci_ops nci_ops = { +static struct nci_ops nci_ops = { .open = fdp_nci_open, .close = fdp_nci_close, .send = fdp_nci_send, From 4b519bb493e0866de7659b88dd22dc2cd89dd628 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 11 Oct 2017 16:03:44 +0530 Subject: [PATCH 1894/2177] NFC: Convert timers to use timer_setup() Switch to using the new timer_setup() and from_timer() for net/nfc/* Signed-off-by: Allen Pais Reviewed-by: Kees Cook Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 8 +++----- net/nfc/hci/core.c | 7 +++---- net/nfc/hci/llc_shdlc.c | 23 +++++++++-------------- net/nfc/llcp_core.c | 14 ++++++-------- 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index 2c7c9b357e70..947a470f929d 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1015,9 +1015,9 @@ exit: device_unlock(&dev->dev); } -static void nfc_check_pres_timeout(unsigned long data) +static void nfc_check_pres_timeout(struct timer_list *t) { - struct nfc_dev *dev = (struct nfc_dev *)data; + struct nfc_dev *dev = from_timer(dev, t, check_pres_timer); schedule_work(&dev->check_pres_work); } @@ -1094,9 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout, - (unsigned long)dev); - + timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0); INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index a8a6e7814e09..ac8030c4bcf8 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -428,9 +428,9 @@ exit_noskb: nfc_hci_driver_failure(hdev, r); } -static void nfc_hci_cmd_timeout(unsigned long data) +static void nfc_hci_cmd_timeout(struct timer_list *t) { - struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; + struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer); schedule_work(&hdev->msg_tx_work); } @@ -1004,8 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout, - (unsigned long)hdev); + timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0); skb_queue_head_init(&hdev->rx_hcp_frags); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 58df37eae1e8..fe988936ad92 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) } } -static void llc_shdlc_connect_timeout(unsigned long data) +static void llc_shdlc_connect_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer); pr_debug("\n"); schedule_work(&shdlc->sm_work); } -static void llc_shdlc_t1_timeout(unsigned long data) +static void llc_shdlc_t1_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer); pr_debug("SoftIRQ: need to send ack\n"); schedule_work(&shdlc->sm_work); } -static void llc_shdlc_t2_timeout(unsigned long data) +static void llc_shdlc_t2_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer); pr_debug("SoftIRQ: need to retransmit\n"); @@ -763,14 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, mutex_init(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; - setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout, - (unsigned long)shdlc); - - setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout, - (unsigned long)shdlc); - - setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout, - (unsigned long)shdlc); + timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0); + timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0); + timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0); shdlc->w = SHDLC_MAX_WINDOW; shdlc->srej_support = SHDLC_SREJ_SUPPORT; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 7988185072e5..ef4026a23e80 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work) nfc_dep_link_down(local->dev); } -static void nfc_llcp_symm_timer(unsigned long data) +static void nfc_llcp_symm_timer(struct timer_list *t) { - struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + struct nfc_llcp_local *local = from_timer(local, t, link_timer); pr_err("SYMM timeout\n"); @@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work) nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); } -static void nfc_llcp_sdreq_timer(unsigned long data) +static void nfc_llcp_sdreq_timer(struct timer_list *t) { - struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer); schedule_work(&local->sdreq_timeout_work); } @@ -1573,8 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); - setup_timer(&local->link_timer, nfc_llcp_symm_timer, - (unsigned long)local); + timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0); skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); @@ -1600,8 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) mutex_init(&local->sdreq_lock); INIT_HLIST_HEAD(&local->pending_sdreqs); - setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer, - (unsigned long)local); + timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); list_add(&local->list, &llcp_devices); From c5cc0c697149345ded2f792a919097cd51464274 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 6 Sep 2017 22:28:00 +0200 Subject: [PATCH 1895/2177] netfilter: ipvs: Use %pS printk format for direct addresses The debug and error printk functions in ipvs uses wrongly the %pF instead of the %pS printk format specifier for printing symbols for the address returned by _builtin_return_address(0). Fix it for the ia64, ppc64 and parisc64 architectures. Signed-off-by: Helge Deller Cc: Wensong Zhang Cc: netdev@vger.kernel.org Cc: lvs-devel@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3d2ac71a83ec..f73561ca982d 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); ret = 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4f940d7eb2f7..b825835752e6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) unsigned int hash; if (svc->flags & IP_VS_SVC_F_HASHED) { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } @@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) static int ip_vs_svc_unhash(struct ip_vs_service *svc) { if (!(svc->flags & IP_VS_SVC_F_HASHED)) { - pr_err("%s(): request for unhash flagged, called from %pF\n", + pr_err("%s(): request for unhash flagged, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } From c5504f724c86ee925e7ffb80aa342cfd57959b13 Mon Sep 17 00:00:00 2001 From: KUWAZAWA Takuya Date: Sun, 15 Oct 2017 20:54:10 +0900 Subject: [PATCH 1896/2177] netfilter: ipvs: Fix inappropriate output of procfs Information about ipvs in different network namespace can be seen via procfs. How to reproduce: # ip netns add ns01 # ip netns add ns02 # ip netns exec ns01 ip a add dev lo 127.0.0.1/8 # ip netns exec ns02 ip a add dev lo 127.0.0.1/8 # ip netns exec ns01 ipvsadm -A -t 10.1.1.1:80 # ip netns exec ns02 ipvsadm -A -t 10.1.1.2:80 The ipvsadm displays information about its own network namespace only. # ip netns exec ns01 ipvsadm -Ln IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 10.1.1.1:80 wlc # ip netns exec ns02 ipvsadm -Ln IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 10.1.1.2:80 wlc But I can see information about other network namespace via procfs. # ip netns exec ns01 cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 0A010101:0050 wlc TCP 0A010102:0050 wlc # ip netns exec ns02 cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 0A010102:0050 wlc Signed-off-by: KUWAZAWA Takuya Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b825835752e6..fac8c802b4ea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; + if (svc->ipvs != ipvs) + return 0; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) From 9912156c2e42a5b0100da37275622262ece02c05 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 16 Oct 2017 11:24:02 +0100 Subject: [PATCH 1897/2177] netfilter: ebtables: clean up initialization of buf buf is initialized to buf_start and then set on the next statement to buf_start + offsets[i]. Clean this up to just initialize buf to buf_start + offsets[i] to clean up the clang build warning: "Value stored to 'buf' during its initialization is never read" Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83951f978445..54c274dbf4f1 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2111,9 +2111,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; - char *buf = buf_start; + char *buf = buf_start + offsets[i]; - buf = buf_start + offsets[i]; if (offsets[i] > offsets[j]) return -EINVAL; From b1fc1372c48027a5b99943905cf7cfa6d622d6a9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 12:50:38 +0100 Subject: [PATCH 1898/2177] netfilter: xt_connlimit: remove mask argument Instead of passing mask to all the helpers, just fixup the search key early. After rbtree conversion, each rbtree node stores connections of same 'addr & mask', so no need to pass the mask too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connlimit.c | 52 ++++++++++++++---------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ce2870428631..a6214f235333 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -71,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr) } static inline unsigned int -connlimit_iphash6(const union nf_inet_addr *addr, - const union nf_inet_addr *mask) +connlimit_iphash6(const union nf_inet_addr *addr) { - union nf_inet_addr res; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) - res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), connlimit_rnd) % CONNLIMIT_SLOTS; } @@ -94,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn) } static int -same_source_net(const union nf_inet_addr *addr, - const union nf_inet_addr *mask, - const union nf_inet_addr *u3, u_int8_t family) +same_source(const union nf_inet_addr *addr, + const union nf_inet_addr *u3, u_int8_t family) { - if (family == NFPROTO_IPV4) { - return ntohl(addr->ip & mask->ip) - - ntohl(u3->ip & mask->ip); - } else { - union nf_inet_addr lh, rh; - unsigned int i; + if (family == NFPROTO_IPV4) + return ntohl(addr->ip) - ntohl(u3->ip); - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { - lh.ip6[i] = addr->ip6[i] & mask->ip6[i]; - rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; - } - - return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); - } + return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6)); } static bool add_hlist(struct hlist_head *head, @@ -194,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root, static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, const union nf_inet_addr *mask, + const union nf_inet_addr *addr, u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; @@ -215,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root, rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); parent = *rbnode; - diff = same_source_net(addr, mask, &rbconn->addr, family); + diff = same_source(addr, &rbconn->addr, family); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -282,7 +264,6 @@ static int count_them(struct net *net, struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, - const union nf_inet_addr *mask, u_int8_t family, const struct nf_conntrack_zone *zone) { @@ -291,14 +272,14 @@ static int count_them(struct net *net, u32 hash; if (family == NFPROTO_IPV6) - hash = connlimit_iphash6(addr, mask); + hash = connlimit_iphash6(addr); else - hash = connlimit_iphash(addr->ip & mask->ip); + hash = connlimit_iphash(addr->ip); root = &data->climit_root[hash]; spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - count = count_tree(net, root, tuple, addr, mask, family, zone); + count = count_tree(net, root, tuple, addr, family, zone); spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -329,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int i; + memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); + + for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) + addr.ip6[i] &= info->mask.ip6[i]; } else { const struct iphdr *iph = ip_hdr(skb); addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? iph->daddr : iph->saddr; + + addr.ip &= info->mask.ip; } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, xt_family(par), zone); + xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; From 909dcf9b16738d2dee46b688378d64fc771712b8 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:55 +0200 Subject: [PATCH 1899/2177] ieee802154: atusb: switch from uint8_t to u8 Switch top the preferred kernel type naming. Found by checkpatch. Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index d5584063049f..bfc9d360d880 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -61,7 +61,7 @@ struct atusb { struct usb_ctrlrequest tx_dr; struct urb *tx_urb; struct sk_buff *tx_skb; - uint8_t tx_ack_seq; /* current TX ACK sequence number */ + u8 tx_ack_seq; /* current TX ACK sequence number */ /* Firmware variable */ unsigned char fw_ver_maj; /* Firmware major version number */ @@ -105,7 +105,7 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe, return ret; } -static int atusb_command(struct atusb *atusb, uint8_t cmd, uint8_t arg) +static int atusb_command(struct atusb *atusb, u8 cmd, u8 arg) { struct usb_device *usb_dev = atusb->usb_dev; @@ -114,7 +114,7 @@ static int atusb_command(struct atusb *atusb, uint8_t cmd, uint8_t arg) cmd, ATUSB_REQ_TO_DEV, arg, 0, NULL, 0, 1000); } -static int atusb_write_reg(struct atusb *atusb, uint8_t reg, uint8_t value) +static int atusb_write_reg(struct atusb *atusb, u8 reg, u8 value) { struct usb_device *usb_dev = atusb->usb_dev; @@ -125,12 +125,12 @@ static int atusb_write_reg(struct atusb *atusb, uint8_t reg, uint8_t value) value, reg, NULL, 0, 1000); } -static int atusb_read_reg(struct atusb *atusb, uint8_t reg) +static int atusb_read_reg(struct atusb *atusb, u8 reg) { struct usb_device *usb_dev = atusb->usb_dev; int ret; - uint8_t *buffer; - uint8_t value; + u8 *buffer; + u8 value; buffer = kmalloc(1, GFP_KERNEL); if (!buffer) @@ -151,11 +151,11 @@ static int atusb_read_reg(struct atusb *atusb, uint8_t reg) } } -static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, - uint8_t shift, uint8_t value) +static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask, + u8 shift, u8 value) { struct usb_device *usb_dev = atusb->usb_dev; - uint8_t orig, tmp; + u8 orig, tmp; int ret = 0; dev_dbg(&usb_dev->dev, "atusb_write_subreg: 0x%02x <- 0x%02x\n", @@ -261,10 +261,10 @@ static void atusb_work_urbs(struct work_struct *work) /* ----- Asynchronous USB -------------------------------------------------- */ -static void atusb_tx_done(struct atusb *atusb, uint8_t seq) +static void atusb_tx_done(struct atusb *atusb, u8 seq) { struct usb_device *usb_dev = atusb->usb_dev; - uint8_t expect = atusb->tx_ack_seq; + u8 expect = atusb->tx_ack_seq; dev_dbg(&usb_dev->dev, "atusb_tx_done (0x%02x/0x%02x)\n", seq, expect); if (seq == expect) { @@ -287,7 +287,7 @@ static void atusb_in_good(struct urb *urb) struct usb_device *usb_dev = urb->dev; struct sk_buff *skb = urb->context; struct atusb *atusb = SKB_ATUSB(skb); - uint8_t len, lqi; + u8 len, lqi; if (!urb->actual_length) { dev_dbg(&usb_dev->dev, "atusb_in: zero-sized URB ?\n"); @@ -880,7 +880,7 @@ static int atusb_get_and_show_build(struct atusb *atusb) static int atusb_get_and_conf_chip(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - uint8_t man_id_0, man_id_1, part_num, version_num; + u8 man_id_0, man_id_1, part_num, version_num; const char *chip; struct ieee802154_hw *hw = atusb->hw; From 5f0cbf4e50c874fd07a30fbc115bce7c5da96a13 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:56 +0200 Subject: [PATCH 1900/2177] ieee802154: atusb: use __func__ macro for debug messages Instead of having the function name hard-coded (it might change and we forgot to update them in the debug output) we can use __func__ instead and also shorter the line so we do not need to break it. Found by checkpatch. Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 41 ++++++++++++++++------------------ 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index bfc9d360d880..dabe759e4de5 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -99,8 +99,8 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe, if (ret < 0) { atusb->err = ret; dev_err(&usb_dev->dev, - "atusb_control_msg: req 0x%02x val 0x%x idx 0x%x, error %d\n", - request, value, index, ret); + "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n", + __func__, request, value, index, ret); } return ret; } @@ -109,7 +109,7 @@ static int atusb_command(struct atusb *atusb, u8 cmd, u8 arg) { struct usb_device *usb_dev = atusb->usb_dev; - dev_dbg(&usb_dev->dev, "atusb_command: cmd = 0x%x\n", cmd); + dev_dbg(&usb_dev->dev, "%s: cmd = 0x%x\n", __func__, cmd); return atusb_control_msg(atusb, usb_sndctrlpipe(usb_dev, 0), cmd, ATUSB_REQ_TO_DEV, arg, 0, NULL, 0, 1000); } @@ -118,8 +118,7 @@ static int atusb_write_reg(struct atusb *atusb, u8 reg, u8 value) { struct usb_device *usb_dev = atusb->usb_dev; - dev_dbg(&usb_dev->dev, "atusb_write_reg: 0x%02x <- 0x%02x\n", - reg, value); + dev_dbg(&usb_dev->dev, "%s: 0x%02x <- 0x%02x\n", __func__, reg, value); return atusb_control_msg(atusb, usb_sndctrlpipe(usb_dev, 0), ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV, value, reg, NULL, 0, 1000); @@ -136,7 +135,7 @@ static int atusb_read_reg(struct atusb *atusb, u8 reg) if (!buffer) return -ENOMEM; - dev_dbg(&usb_dev->dev, "atusb: reg = 0x%x\n", reg); + dev_dbg(&usb_dev->dev, "%s: reg = 0x%x\n", __func__, reg); ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_REG_READ, ATUSB_REQ_FROM_DEV, 0, reg, buffer, 1, 1000); @@ -158,8 +157,7 @@ static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask, u8 orig, tmp; int ret = 0; - dev_dbg(&usb_dev->dev, "atusb_write_subreg: 0x%02x <- 0x%02x\n", - reg, value); + dev_dbg(&usb_dev->dev, "%s: 0x%02x <- 0x%02x\n", __func__, reg, value); orig = atusb_read_reg(atusb, reg); @@ -266,7 +264,7 @@ static void atusb_tx_done(struct atusb *atusb, u8 seq) struct usb_device *usb_dev = atusb->usb_dev; u8 expect = atusb->tx_ack_seq; - dev_dbg(&usb_dev->dev, "atusb_tx_done (0x%02x/0x%02x)\n", seq, expect); + dev_dbg(&usb_dev->dev, "%s (0x%02x/0x%02x)\n", __func__, seq, expect); if (seq == expect) { /* TODO check for ifs handling in firmware */ ieee802154_xmit_complete(atusb->hw, atusb->tx_skb, false); @@ -326,7 +324,7 @@ static void atusb_in(struct urb *urb) struct sk_buff *skb = urb->context; struct atusb *atusb = SKB_ATUSB(skb); - dev_dbg(&usb_dev->dev, "atusb_in: status %d len %d\n", + dev_dbg(&usb_dev->dev, "%s: status %d len %d\n", __func__, urb->status, urb->actual_length); if (urb->status) { if (urb->status == -ENOENT) { /* being killed */ @@ -334,7 +332,7 @@ static void atusb_in(struct urb *urb) urb->context = NULL; return; } - dev_dbg(&usb_dev->dev, "atusb_in: URB error %d\n", urb->status); + dev_dbg(&usb_dev->dev, "%s: URB error %d\n", __func__, urb->status); } else { atusb_in_good(urb); } @@ -388,7 +386,7 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) struct usb_device *usb_dev = atusb->usb_dev; int ret; - dev_dbg(&usb_dev->dev, "atusb_xmit (%d)\n", skb->len); + dev_dbg(&usb_dev->dev, "%s (%d)\n", __func__, skb->len); atusb->tx_skb = skb; atusb->tx_ack_seq++; atusb->tx_dr.wIndex = cpu_to_le16(atusb->tx_ack_seq); @@ -399,7 +397,7 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) (unsigned char *)&atusb->tx_dr, skb->data, skb->len, atusb_xmit_complete, NULL); ret = usb_submit_urb(atusb->tx_urb, GFP_ATOMIC); - dev_dbg(&usb_dev->dev, "atusb_xmit done (%d)\n", ret); + dev_dbg(&usb_dev->dev, "%s done (%d)\n", __func__, ret); return ret; } @@ -420,7 +418,7 @@ static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_SADDR_CHANGED) { u16 addr = le16_to_cpu(filt->short_addr); - dev_vdbg(dev, "atusb_set_hw_addr_filt called for saddr\n"); + dev_vdbg(dev, "%s called for saddr\n", __func__); atusb_write_reg(atusb, RG_SHORT_ADDR_0, addr); atusb_write_reg(atusb, RG_SHORT_ADDR_1, addr >> 8); } @@ -428,7 +426,7 @@ static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_PANID_CHANGED) { u16 pan = le16_to_cpu(filt->pan_id); - dev_vdbg(dev, "atusb_set_hw_addr_filt called for pan id\n"); + dev_vdbg(dev, "%s called for pan id\n", __func__); atusb_write_reg(atusb, RG_PAN_ID_0, pan); atusb_write_reg(atusb, RG_PAN_ID_1, pan >> 8); } @@ -437,14 +435,13 @@ static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw, u8 i, addr[IEEE802154_EXTENDED_ADDR_LEN]; memcpy(addr, &filt->ieee_addr, IEEE802154_EXTENDED_ADDR_LEN); - dev_vdbg(dev, "atusb_set_hw_addr_filt called for IEEE addr\n"); + dev_vdbg(dev, "%s called for IEEE addr\n", __func__); for (i = 0; i < 8; i++) atusb_write_reg(atusb, RG_IEEE_ADDR_0 + i, addr[i]); } if (changed & IEEE802154_AFILT_PANC_CHANGED) { - dev_vdbg(dev, - "atusb_set_hw_addr_filt called for panc change\n"); + dev_vdbg(dev, "%s called for panc change\n", __func__); if (filt->pan_coord) atusb_write_subreg(atusb, SR_AACK_I_AM_COORD, 1); else @@ -460,7 +457,7 @@ static int atusb_start(struct ieee802154_hw *hw) struct usb_device *usb_dev = atusb->usb_dev; int ret; - dev_dbg(&usb_dev->dev, "atusb_start\n"); + dev_dbg(&usb_dev->dev, "%s\n", __func__); schedule_delayed_work(&atusb->work, 0); atusb_command(atusb, ATUSB_RX_MODE, 1); ret = atusb_get_and_clear_error(atusb); @@ -474,7 +471,7 @@ static void atusb_stop(struct ieee802154_hw *hw) struct atusb *atusb = hw->priv; struct usb_device *usb_dev = atusb->usb_dev; - dev_dbg(&usb_dev->dev, "atusb_stop\n"); + dev_dbg(&usb_dev->dev, "%s\n", __func__); usb_kill_anchored_urbs(&atusb->idle_urbs); atusb_command(atusb, ATUSB_RX_MODE, 0); atusb_get_and_clear_error(atusb); @@ -1129,7 +1126,7 @@ static void atusb_disconnect(struct usb_interface *interface) { struct atusb *atusb = usb_get_intfdata(interface); - dev_dbg(&atusb->usb_dev->dev, "atusb_disconnect\n"); + dev_dbg(&atusb->usb_dev->dev, "%s\n", __func__); atusb->shutdown = 1; cancel_delayed_work_sync(&atusb->work); @@ -1146,7 +1143,7 @@ static void atusb_disconnect(struct usb_interface *interface) usb_set_intfdata(interface, NULL); usb_put_dev(atusb->usb_dev); - pr_debug("atusb_disconnect done\n"); + pr_debug("%s done\n", __func__); } /* The devices we work with */ From 2f150344497d8b7c2cdd38e3200e2f11e805cc3a Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:57 +0200 Subject: [PATCH 1901/2177] ieee802154: atusb: fix some kernel coding style errors Fix a long line, wrong comment format and misaligned indent. Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index dabe759e4de5..bef53c8173d8 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -839,7 +839,8 @@ static int atusb_get_and_show_revision(struct atusb *atusb) dev_info(&usb_dev->dev, "Firmware: major: %u, minor: %u, hardware type: %s (%d)\n", - atusb->fw_ver_maj, atusb->fw_ver_min, hw_name, atusb->fw_hw_type); + atusb->fw_ver_maj, atusb->fw_ver_min, hw_name, + atusb->fw_hw_type); } if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 2) { dev_info(&usb_dev->dev, @@ -974,7 +975,8 @@ static int atusb_set_extended_addr(struct atusb *atusb) int ret; /* Firmware versions before 0.3 do not support the EUI64_READ command. - * Just use a random address and be done */ + * Just use a random address and be done. + */ if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 3) { ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr); return 0; @@ -1004,7 +1006,7 @@ static int atusb_set_extended_addr(struct atusb *atusb) atusb->hw->phy->perm_extended_addr = extended_addr; addr = swab64((__force u64)atusb->hw->phy->perm_extended_addr); dev_info(&usb_dev->dev, "Read permanent extended address %8phC from device\n", - &addr); + &addr); } kfree(buffer); From bd910a960f0c4e817d945b061c8ca75aed3d1091 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:58 +0200 Subject: [PATCH 1902/2177] ieee802154: atusb: switch from BUG_ON() to WARN_ON() on problem The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index bef53c8173d8..9fb9b565a002 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -403,7 +403,7 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) static int atusb_ed(struct ieee802154_hw *hw, u8 *level) { - BUG_ON(!level); + WARN_ON(!level); *level = 0xbe; return 0; } From 16869e470b573a45af7f4f6b0fed77b709469f43 Mon Sep 17 00:00:00 2001 From: Harry Morris Date: Tue, 31 Oct 2017 11:22:06 +0000 Subject: [PATCH 1903/2177] MAINTAINERS: Update ca8210 driver contact emails Remove non-functioning secondary email address from maintainer information. Signed-off-by: Harry Morris Signed-off-by: Stefan Schmidt --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9729ebe4eb12..f048f85d741b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3084,7 +3084,6 @@ F: arch/c6x/ CA8210 IEEE-802.15.4 RADIO DRIVER M: Harry Morris -M: linuxdev@cascoda.com L: linux-wpan@vger.kernel.org W: https://github.com/Cascoda/ca8210-linux.git S: Maintained From 3ee0275d8db8757b2895c01868361550d38ae109 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:02 +0200 Subject: [PATCH 1904/2177] ieee802154: cc2520: fix some kernel coding style errors Fix some spacing and needed new line. Signed-off-by: Stefan Schmidt Acked-by: Varka Bhadram Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index d50add705a79..9c1d1768a36f 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -648,7 +648,7 @@ cc2520_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) BUG_ON(channel > CC2520_MAXCHANNEL); ret = cc2520_write_register(priv, CC2520_FREQCTRL, - 11 + 5*(channel - 11)); + 11 + 5 * (channel - 11)); return ret; } @@ -929,6 +929,7 @@ static int cc2520_get_platform_data(struct spi_device *spi, if (!np) { struct cc2520_platform_data *spi_pdata = spi->dev.platform_data; + if (!spi_pdata) return -ENOENT; *pdata = *spi_pdata; From a8ab042c8099a4fe4d81cccec09f67556697dedd Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:03 +0200 Subject: [PATCH 1905/2177] ieee802154: cc2520: use __func__ macro for debug messages Instead of having the function name hard-coded (it might change and we forgot to update them in the debug output) we can use __func__ instead and also shorter the line so we do not need to break it. Found by checkpatch. Signed-off-by: Stefan Schmidt Acked-by: Varka Bhadram Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 9c1d1768a36f..735b9f5f9754 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -663,15 +663,14 @@ cc2520_filter(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_PANID_CHANGED) { u16 panid = le16_to_cpu(filt->pan_id); - dev_vdbg(&priv->spi->dev, - "cc2520_filter called for pan id\n"); + dev_vdbg(&priv->spi->dev, "%s called for pan id\n", __func__); ret = cc2520_write_ram(priv, CC2520RAM_PANID, sizeof(panid), (u8 *)&panid); } if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) { dev_vdbg(&priv->spi->dev, - "cc2520_filter called for IEEE addr\n"); + "%s called for IEEE addr\n", __func__); ret = cc2520_write_ram(priv, CC2520RAM_IEEEADDR, sizeof(filt->ieee_addr), (u8 *)&filt->ieee_addr); @@ -680,8 +679,7 @@ cc2520_filter(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_SADDR_CHANGED) { u16 addr = le16_to_cpu(filt->short_addr); - dev_vdbg(&priv->spi->dev, - "cc2520_filter called for saddr\n"); + dev_vdbg(&priv->spi->dev, "%s called for saddr\n", __func__); ret = cc2520_write_ram(priv, CC2520RAM_SHORTADDR, sizeof(addr), (u8 *)&addr); } @@ -690,7 +688,7 @@ cc2520_filter(struct ieee802154_hw *hw, u8 frmfilt0; dev_vdbg(&priv->spi->dev, - "cc2520_filter called for panc change\n"); + "%s called for panc change\n", __func__); cc2520_read_register(priv, CC2520_FRMFILT0, &frmfilt0); From cd3a21b5bd27b9b92df7025e72cb4275c5ea6e58 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:04 +0200 Subject: [PATCH 1906/2177] ieee802154: cc2520: switch from BUG_ON() to WARN_ON() on problem The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt Acked-by: Varka Bhadram Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 735b9f5f9754..0c89d3edf901 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -517,7 +517,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb) } spin_lock_irqsave(&priv->lock, flags); - BUG_ON(priv->is_tx); + WARN_ON(priv->is_tx); priv->is_tx = 1; spin_unlock_irqrestore(&priv->lock, flags); @@ -643,9 +643,9 @@ cc2520_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) dev_dbg(&priv->spi->dev, "trying to set channel\n"); - BUG_ON(page != 0); - BUG_ON(channel < CC2520_MINCHANNEL); - BUG_ON(channel > CC2520_MAXCHANNEL); + WARN_ON(page != 0); + WARN_ON(channel < CC2520_MINCHANNEL); + WARN_ON(channel > CC2520_MAXCHANNEL); ret = cc2520_write_register(priv, CC2520_FREQCTRL, 11 + 5 * (channel - 11)); From 0be7fc7e08e7c6a5ec3837960d420e377380d1a8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 31 Oct 2017 01:31:27 -0500 Subject: [PATCH 1907/2177] ieee802154: mrf24j40: fix incorrect mask in mrf24j40_stop It seems that this is a copy/paste error and the proper bit masking is: BIT_TXNIE | BIT_RXIE This issue was detected with the help of Coccinelle. Reported-by: Julia Lawall Signed-off-by: Gustavo A. R. Silva Fixes: 7d840545e5b9 ("mrf24j40: replace magic numbers") Acked-by: Alan Ott Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mrf24j40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index ee7084b2d52d..cf4788d840bf 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -635,7 +635,7 @@ static void mrf24j40_stop(struct ieee802154_hw *hw) /* Set TXNIE and RXIE. Disable Interrupts */ regmap_update_bits(devrec->regmap_short, REG_INTCON, - BIT_TXNIE | BIT_TXNIE, BIT_TXNIE | BIT_TXNIE); + BIT_TXNIE | BIT_RXIE, BIT_TXNIE | BIT_RXIE); } static int mrf24j40_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) From 395cef423358dccfefc0a8ea6a73730c0084df7f Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:52 +0200 Subject: [PATCH 1908/2177] ieee802154: adf7242: use unsigned int over only unsigned Bring it in line with the rest of the ieee802154 drivers. Found by checkpatch. Signed-off-by: Stefan Schmidt Acked-by: Michael Hennerich Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 3e4c8b21403c..400fdbd3a120 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -311,8 +311,8 @@ static int adf7242_status(struct adf7242_local *lp, u8 *stat) return status; } -static int adf7242_wait_status(struct adf7242_local *lp, unsigned status, - unsigned mask, int line) +static int adf7242_wait_status(struct adf7242_local *lp, unsigned int status, + unsigned int mask, int line) { int cnt = 0, ret = 0; u8 stat; @@ -477,7 +477,7 @@ static int adf7242_write_reg(struct adf7242_local *lp, u16 addr, u8 data) return status; } -static int adf7242_cmd(struct adf7242_local *lp, unsigned cmd) +static int adf7242_cmd(struct adf7242_local *lp, unsigned int cmd) { int status; @@ -920,7 +920,7 @@ static void adf7242_debug(u8 irq1) static irqreturn_t adf7242_isr(int irq, void *data) { struct adf7242_local *lp = data; - unsigned xmit; + unsigned int xmit; u8 irq1; adf7242_wait_status(lp, RC_STATUS_PHY_RDY, RC_STATUS_MASK, __LINE__); From dc1281e1f8550337e09487348d3a27491fd31f66 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:00 +0200 Subject: [PATCH 1909/2177] ieee802154: ca8210: fix some kernel coding style errors Remove unneeded parentheses and fix format for pointer style. Signed-off-by: Stefan Schmidt Acked-by: Harry Morris Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index e6b8ce81a6c3..3da40a17e8ce 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1303,7 +1303,7 @@ static u8 tdme_checkpibattribute( break; /* MAC */ case MAC_BATT_LIFE_EXT_PERIODS: - if ((value < 6) || (value > 41)) + if (value < 6 || value > 41) status = MAC_INVALID_PARAMETER; break; case MAC_BEACON_PAYLOAD: @@ -1319,7 +1319,7 @@ static u8 tdme_checkpibattribute( status = MAC_INVALID_PARAMETER; break; case MAC_MAX_BE: - if ((value < 3) || (value > 8)) + if (value < 3 || value > 8) status = MAC_INVALID_PARAMETER; break; case MAC_MAX_CSMA_BACKOFFS: @@ -1335,7 +1335,7 @@ static u8 tdme_checkpibattribute( status = MAC_INVALID_PARAMETER; break; case MAC_RESPONSE_WAIT_TIME: - if ((value < 2) || (value > 64)) + if (value < 2 || value > 64) status = MAC_INVALID_PARAMETER; break; case MAC_SUPERFRAME_ORDER: @@ -1511,7 +1511,7 @@ static u8 mcps_data_request( psec = (struct secspec *)(command.pdata.data_req.msdu + msdu_length); command.length = sizeof(struct mcps_data_request_pset) - MAX_DATA_SIZE + msdu_length; - if (!security || (security->security_level == 0)) { + if (!security || security->security_level == 0) { psec->security_level = 0; command.length += 1; } else { @@ -1561,7 +1561,7 @@ static u8 mlme_reset_request_sync( status = response.pdata.status; /* reset COORD Bit for Channel Filtering as Coordinator */ - if (CA8210_MAC_WORKAROUNDS && set_default_pib && (!status)) { + if (CA8210_MAC_WORKAROUNDS && set_default_pib && !status) { status = tdme_setsfr_request_sync( 0, CA8210_SFR_MACCON, @@ -2369,7 +2369,7 @@ static int ca8210_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) MAC_PROMISCUOUS_MODE, 0, 1, - (const void*)&on, + (const void *)&on, priv->spi ); if (status) { From 7558bd50201f0b21129a9a5c220c11cf3bfc6efe Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:01 +0200 Subject: [PATCH 1910/2177] ieee802154: ca8210: use __func__ macro for debug messages Instead of having the function name hard-coded (it might change and we forgot to update them in the debug output) we can use __func__ instead and also shorter the line so we do not need to break it. Found by checkpatch. Signed-off-by: Stefan Schmidt Acked-by: Harry Morris Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 3da40a17e8ce..7900ed066d8a 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -924,7 +924,7 @@ static int ca8210_spi_transfer( priv = spi_get_drvdata(spi); reinit_completion(&priv->spi_transfer_complete); - dev_dbg(&spi->dev, "ca8210_spi_transfer called\n"); + dev_dbg(&spi->dev, "%s called\n", __func__); cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); if (!cas_ctl) @@ -1898,7 +1898,7 @@ static int ca8210_net_rx(struct ieee802154_hw *hw, u8 *command, size_t len) unsigned long flags; u8 status; - dev_dbg(&priv->spi->dev, "ca8210_net_rx(), CmdID = %d\n", command[0]); + dev_dbg(&priv->spi->dev, "%s: CmdID = %d\n", __func__, command[0]); if (command[0] == SPI_MCPS_DATA_INDICATION) { /* Received data */ @@ -1948,7 +1948,7 @@ static int ca8210_skb_tx( struct secspec secspec; unsigned int mac_len; - dev_dbg(&priv->spi->dev, "ca8210_skb_tx() called\n"); + dev_dbg(&priv->spi->dev, "%s called\n", __func__); /* Get addressing info from skb - ieee802154 layer creates a full * packet @@ -2051,7 +2051,7 @@ static int ca8210_xmit_async(struct ieee802154_hw *hw, struct sk_buff *skb) struct ca8210_priv *priv = hw->priv; int status; - dev_dbg(&priv->spi->dev, "calling ca8210_xmit_async()\n"); + dev_dbg(&priv->spi->dev, "calling %s\n", __func__); priv->tx_skb = skb; priv->async_tx_pending = true; From 7f4dae2d7f03d2aaf3b7d8343d4509c8d9d7ca9b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 19:04:47 +0100 Subject: [PATCH 1911/2177] netfilter: nft_hash: fix nft_hash_deactivate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jindřich Makovička says: The logical OR looks fishy to me. Shouldn't be && there instead? Link: https://bugzilla.netfilter.org/show_bug.cgi?id=1199 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 0fa01d772c5e..650677f1e539 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -494,7 +494,7 @@ static void *nft_hash_deactivate(const struct net *net, hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, - set->klen) || + set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); return he; From 5caaed151a68ae36aca2981cc245f5960a0a7603 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 19:41:09 +0100 Subject: [PATCH 1912/2177] netfilter: conntrack: don't cache nlattr_tuple_size result in nla_size We currently call ->nlattr_tuple_size() once at register time and cache result in l4proto->nla_size. nla_size is the only member that is written to, avoiding this would allow to make l4proto trackers const. We can use ->nlattr_tuple_size() at run time, and cache result in the individual trackers instead. This is an intermediate step, next patch removes nlattr_size() callback and computes size at compile time, then removes nla_size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 9 +++++++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 9 +++++++-- net/netfilter/nf_conntrack_core.c | 9 +++++++-- net/netfilter/nf_conntrack_netlink.c | 10 +++++++--- net/netfilter/nf_conntrack_proto.c | 2 -- net/netfilter/nf_conntrack_proto_tcp.c | 9 +++++++-- 7 files changed, 37 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e06518874144..46e786ffcf2f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -74,7 +74,7 @@ struct nf_conntrack_l4proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); /* Calculate tuple nlattr size */ - int (*nlattr_tuple_size)(void); + unsigned int (*nlattr_tuple_size)(void); int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; @@ -144,7 +144,7 @@ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); -int nf_ct_port_nlattr_tuple_size(void); +unsigned int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 8969420cecc3..1849fedd9b81 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -258,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmp_nlattr_tuple_size(void) +static unsigned int icmp_nlattr_tuple_size(void) { - return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index dca921df28e1..3ac0d826afc4 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -259,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmpv6_nlattr_tuple_size(void) +static unsigned int icmpv6_nlattr_tuple_size(void) { - return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 28e675150853..0e516947c16f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); -int nf_ct_port_nlattr_tuple_size(void) +unsigned int nf_ct_port_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index de4053d84364..6e0adfefb9ed 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -533,11 +533,11 @@ nla_put_failure: return -1; } -static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) +static size_t ctnetlink_proto_size(const struct nf_conn *ct) { const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; - size_t len; + size_t len, len4 = 0; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); len = l3proto->nla_size; @@ -545,8 +545,12 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); len += l4proto->nla_size; + if (l4proto->nlattr_tuple_size) { + len4 = l4proto->nlattr_tuple_size(); + len4 *= 3u; /* ORIG, REPLY, MASTER */ + } - return len; + return len + len4; } static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 83f739e9dc08..3b06ff3f2dee 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -398,8 +398,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) l4proto->nla_size = 0; if (l4proto->nlattr_size) l4proto->nla_size += l4proto->nlattr_size(); - if (l4proto->nlattr_tuple_size) - l4proto->nla_size += 3 * l4proto->nlattr_tuple_size(); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8f283294d70f..b12fc07111d0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1277,9 +1277,14 @@ static int tcp_nlattr_size(void) + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); } -static int tcp_nlattr_tuple_size(void) +static unsigned int tcp_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif From 7e35ec0e8044f1ede852f55948f71a1963903219 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 3 Nov 2017 16:26:32 +0100 Subject: [PATCH 1913/2177] netfilter: conntrack: move nf_ct_netns_{get,put}() to core So we can call this from other expression that need conntrack in place to work. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- net/netfilter/nf_conntrack_proto.c | 37 ++++++++++++++++++++++++++-- net/netfilter/nft_ct.c | 39 +++--------------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 3b06ff3f2dee..c8e9c9503a08 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -172,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); -int nf_ct_netns_get(struct net *net, u8 nfproto) +static int nf_ct_netns_do_get(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; int ret; @@ -197,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto) return ret; } + +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + int err; + + if (nfproto == NFPROTO_INET) { + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_netns_do_get(net, nfproto); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_netns_put(net, NFPROTO_IPV4); +err1: + return err; +} EXPORT_SYMBOL_GPL(nf_ct_netns_get); -void nf_ct_netns_put(struct net *net, u8 nfproto) +static void nf_ct_netns_do_put(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; @@ -218,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto) nf_ct_l3proto_module_put(nfproto); } + +void nf_ct_netns_put(struct net *net, uint8_t nfproto) +{ + if (nfproto == NFPROTO_INET) { + nf_ct_netns_do_put(net, NFPROTO_IPV4); + nf_ct_netns_do_put(net, NFPROTO_IPV6); + } else + nf_ct_netns_do_put(net, nfproto); +} EXPORT_SYMBOL_GPL(nf_ct_netns_put); const struct nf_conntrack_l4proto * diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bd0975d7dd6f..2647b895f4b0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_netns_get(struct net *net, uint8_t family) -{ - int err; - - if (family == NFPROTO_INET) { - err = nf_ct_netns_get(net, NFPROTO_IPV4); - if (err < 0) - goto err1; - err = nf_ct_netns_get(net, NFPROTO_IPV6); - if (err < 0) - goto err2; - } else { - err = nf_ct_netns_get(net, family); - if (err < 0) - goto err1; - } - return 0; - -err2: - nf_ct_netns_put(net, NFPROTO_IPV4); -err1: - return err; -} - -static void nft_ct_netns_put(struct net *net, uint8_t family) -{ - if (family == NFPROTO_INET) { - nf_ct_netns_put(net, NFPROTO_IPV4); - nf_ct_netns_put(net, NFPROTO_IPV6); - } else - nf_ct_netns_put(net, family); -} - #ifdef CONFIG_NF_CONNTRACK_ZONES static void nft_ct_tmpl_put_pcpu(void) { @@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nft_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) From 0984d427c1d3cb2aa882c9ad9e787ba973cf2915 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 2 Nov 2017 16:16:07 +0100 Subject: [PATCH 1914/2177] netfilter: conntrack: use power efficient workqueue conntrack uses the bounded system_long_wq workqueue for its works that don't have to run on the cpu they have been queued. Using bounded workqueue prevents the scheduler to make smart decision about the best place to schedule the work. This patch replaces system_long_wq with system_power_efficient_wq. the work stays bounded to a cpu by default unless the CONFIG_WQ_POWER_EFFICIENT is enable. In the latter case, the work can be scheduled on the best cpu from a power or a performance point of view. Signed-off-by: Vincent Guittot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e516947c16f..5749fcaa2770 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; gc_work->early_drop = false; - queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) @@ -2089,7 +2089,7 @@ int nf_conntrack_init_start(void) goto err_proto; conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); + queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); return 0; From 644e334eeec01a25138b62ebd576b3a798183c7c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Nov 2017 05:57:13 +0100 Subject: [PATCH 1915/2177] netfilter: nf_tables: performance set policy skips size description in selection Use the complexity and space notations if policy is performance, this results in placing the bitmap set representation over the hashtable for key <= 16 for better performance as we discussed during the last NFWS in Faro, Portugal. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 929927171426..3b4a0739ee39 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx, case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; - if (est.lookup == best.lookup) { - if (!desc->size) { - if (est.space < best.space) - break; - } else if (est.size < best.size) { - break; - } - } + if (est.lookup == best.lookup && + est.space < best.space) + break; continue; case NFT_SET_POL_MEMORY: if (!desc->size) { From ba0e4d9917b43dfa746cbbcb4477da59aae73bd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Oct 2017 19:52:28 +0200 Subject: [PATCH 1916/2177] netfilter: nf_tables: get set elements via netlink This patch adds a new get operation to look up for specific elements in a set via netlink interface. You can also use it to check if an interval already exists. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 + net/netfilter/nf_tables_api.c | 184 +++++++++++++++++++++--------- net/netfilter/nft_set_bitmap.c | 18 +++ net/netfilter/nft_set_hash.c | 39 +++++++ net/netfilter/nft_set_rbtree.c | 73 ++++++++++++ 5 files changed, 264 insertions(+), 55 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f5b12a4ad09..d011e56cc7a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -311,6 +311,7 @@ struct nft_expr; * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts + * @get: get set elements * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance @@ -350,6 +351,10 @@ struct nft_set_ops { void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); + void * (*get)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + unsigned int flags); unsigned int (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3b4a0739ee39..1d66be0d8ef7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3586,45 +3586,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack) -{ - u8 genmask = nft_genmask_cur(net); - const struct nft_set *set; - struct nft_ctx ctx; - int err; - - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); - if (err < 0) - return err; - - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); - - if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = nf_tables_dump_set, - .done = nf_tables_dump_set_done, - }; - struct nft_set_dump_ctx *dump_ctx; - - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); - if (!dump_ctx) - return -ENOMEM; - - dump_ctx->set = set; - dump_ctx->ctx = ctx; - - c.data = dump_ctx; - return netlink_dump_start(nlsk, skb, nlh, &c); - } - return -EOPNOTSUPP; -} - static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, @@ -3670,6 +3631,135 @@ nla_put_failure: return -1; } +static int nft_setelem_parse_flags(const struct nft_set *set, + const struct nlattr *attr, u32 *flags) +{ + if (attr == NULL) + return 0; + + *flags = ntohl(nla_get_be32(attr)); + if (*flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + *flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + + return 0; +} + +static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + const struct nft_set_ext *ext; + struct nft_data_desc desc; + struct nft_set_elem elem; + struct sk_buff *skb; + uint32_t flags = 0; + void *priv; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); + if (err < 0) + return err; + + if (!nla[NFTA_SET_ELEM_KEY]) + return -EINVAL; + + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, + nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + return err; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + return err; + + priv = set->ops->get(ctx->net, set, &elem, flags); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + elem.priv = priv; + ext = nft_set_elem_ext(set, &elem); + + err = -ENOMEM; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err1; + + err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, + NFT_MSG_NEWSETELEM, 0, set, &elem); + if (err < 0) + goto err2; + + err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT); + /* This avoids a loop in nfnetlink. */ + if (err < 0) + goto err1; + + return 0; +err2: + kfree_skb(skb); +err1: + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; +} + +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + u8 genmask = nft_genmask_cur(net); + struct nft_set *set; + struct nlattr *attr; + struct nft_ctx ctx; + int rem, err = 0; + + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, + }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&ctx, set, attr); + if (err < 0) + break; + } + + return err; +} + static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, @@ -3770,22 +3860,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) kfree(elem); } -static int nft_setelem_parse_flags(const struct nft_set *set, - const struct nlattr *attr, u32 *flags) -{ - if (attr == NULL) - return 0; - - *flags = ntohl(nla_get_be32(attr)); - if (*flags & ~NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (!(set->flags & NFT_SET_INTERVAL) && - *flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - - return 0; -} - static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 734989c40579..45fb2752fb63 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } +static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return ERR_PTR(-ENOENT); +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, + .get = nft_bitmap_get, }; static struct nft_set_type nft_bitmap_type __read_mostly = { diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 650677f1e539..c68a7e0fcf1e 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } +static void *nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { + .genmask = nft_genmask_cur(net), + .set = set, + .key = elem->key.val.data, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); + if (he != NULL) + return he; + + return ERR_PTR(-ENOENT); +} + static bool nft_rhash_update(struct nft_set *set, const u32 *key, void *(*new)(struct nft_set *, const struct nft_expr *, @@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } +static void *nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_hash_elem *he; + u32 hash; + + hash = jhash(elem->key.val.data, set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && + nft_set_elem_active(&he->ext, genmask)) + return he; + } + return ERR_PTR(-ENOENT); +} + /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ static inline u32 nft_hash_key(const u32 *key, u32 klen) { @@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = { .lookup = nft_rhash_lookup, .update = nft_rhash_update, .walk = nft_rhash_walk, + .get = nft_rhash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, }; @@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; @@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup_fast, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d83a4ec5900d..e6f08bc5f359 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, return ret; } +static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, + const u32 *key, struct nft_rbtree_elem **elem, + unsigned int seq, unsigned int flags, u8 genmask) +{ + struct nft_rbtree_elem *rbe, *interval = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent; + const void *this; + int d; + + parent = rcu_dereference_raw(priv->root.rb_node); + while (parent != NULL) { + if (read_seqcount_retry(&priv->count, seq)) + return false; + + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + this = nft_set_ext_key(&rbe->ext); + d = memcmp(this, key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); + interval = rbe; + } else if (d > 0) { + parent = rcu_dereference_raw(parent->rb_right); + } else { + if (!nft_set_elem_active(&rbe->ext, genmask)) + parent = rcu_dereference_raw(parent->rb_left); + + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == + (flags & NFT_SET_ELEM_INTERVAL_END)) { + *elem = rbe; + return true; + } + return false; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && + !nft_rbtree_interval_end(interval)) { + *elem = interval; + return true; + } + + return false; +} + +static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rbtree *priv = nft_set_priv(set); + unsigned int seq = read_seqcount_begin(&priv->count); + struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); + const u32 *key = (const u32 *)&elem->key.val; + u8 genmask = nft_genmask_cur(net); + bool ret; + + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (ret || !read_seqcount_retry(&priv->count, seq)) + return rbe; + + read_lock_bh(&priv->lock); + seq = read_seqcount_begin(&priv->count); + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (!ret) + rbe = ERR_PTR(-ENOENT); + read_unlock_bh(&priv->lock); + + return rbe; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) @@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, + .get = nft_rbtree_get, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, }; From fffcefe967a02997be7a296a4f0766b29dcd1a67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Nov 2017 14:13:29 -0800 Subject: [PATCH 1917/2177] ipv6: addrconf: fix a lockdep splat Fixes a case where GFP_ATOMIC allocation must be used instead of GFP_KERNEL one. [ 54.891146] lock_acquire+0xb3/0x2f0 [ 54.891153] ? fs_reclaim_acquire.part.60+0x5/0x30 [ 54.891165] fs_reclaim_acquire.part.60+0x29/0x30 [ 54.891170] ? fs_reclaim_acquire.part.60+0x5/0x30 [ 54.891178] kmem_cache_alloc_trace+0x3f/0x500 [ 54.891186] ? cyc2ns_read_end+0x1e/0x30 [ 54.891196] ipv6_add_addr+0x15a/0xc30 [ 54.891217] ? ipv6_create_tempaddr+0x2ea/0x5d0 [ 54.891223] ipv6_create_tempaddr+0x2ea/0x5d0 [ 54.891238] ? manage_tempaddrs+0x195/0x220 [ 54.891249] ? addrconf_prefix_rcv_add_addr+0x1c0/0x4f0 [ 54.891255] addrconf_prefix_rcv_add_addr+0x1c0/0x4f0 [ 54.891268] addrconf_prefix_rcv+0x2e5/0x9b0 [ 54.891279] ? neigh_update+0x446/0xb90 [ 54.891298] ? ndisc_router_discovery+0x5ab/0xf00 [ 54.891303] ndisc_router_discovery+0x5ab/0xf00 [ 54.891311] ? retint_kernel+0x2d/0x2d [ 54.891331] ndisc_rcv+0x1b6/0x270 [ 54.891340] icmpv6_rcv+0x6aa/0x9f0 [ 54.891345] ? ipv6_chk_mcast_addr+0x176/0x530 [ 54.891351] ? do_csum+0x17b/0x260 [ 54.891360] ip6_input_finish+0x194/0xb20 [ 54.891372] ip6_input+0x5b/0x2c0 [ 54.891380] ? ip6_rcv_finish+0x320/0x320 [ 54.891389] ip6_mc_input+0x15a/0x250 [ 54.891396] ipv6_rcv+0x772/0x1050 [ 54.891403] ? consume_skb+0xbe/0x2d0 [ 54.891412] ? ip6_make_skb+0x2a0/0x2a0 [ 54.891418] ? ip6_input+0x2c0/0x2c0 [ 54.891425] __netif_receive_skb_core+0xa0f/0x1600 [ 54.891436] ? process_backlog+0xac/0x400 [ 54.891441] process_backlog+0xfa/0x400 [ 54.891448] ? net_rx_action+0x145/0x1130 [ 54.891456] net_rx_action+0x310/0x1130 [ 54.891524] ? RTUSBBulkReceive+0x11d/0x190 [mt7610u_sta] [ 54.891538] __do_softirq+0x140/0xaba [ 54.891553] irq_exit+0x10b/0x160 [ 54.891561] do_IRQ+0xbb/0x1b0 Fixes: f3d9832e56c4 ("ipv6: addrconf: cleanup locking in ipv6_add_addr") Signed-off-by: Eric Dumazet Reported-by: Valdis Kletnieks Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 66d8c3d912fd..6233e06fa35c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1267,7 +1267,9 @@ out: in6_ifa_put(ifp); } -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) +static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, + struct inet6_ifaddr *ift, + bool block) { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; @@ -1371,7 +1373,7 @@ retry: ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft, true, NULL); + tmp_valid_lft, tmp_prefered_lft, block, NULL); if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1956,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (ifpub) { in6_ifa_hold(ifpub); spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); } else { spin_unlock_bh(&ifp->lock); @@ -2456,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, * no temporary address currently exists. */ read_unlock_bh(&idev->lock); - ipv6_create_tempaddr(ifp, NULL); + ipv6_create_tempaddr(ifp, NULL, false); } else { read_unlock_bh(&idev->lock); } @@ -4351,7 +4353,7 @@ restart: spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); goto restart; From ff198cdb9642851e01e41cdeb76d6cd86e8c427c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 2 Nov 2017 00:48:45 +0100 Subject: [PATCH 1918/2177] net: phy: leds: Refactor "no link" handler into a separate function Currently, phy_led_trigger_change_speed() is handling a "no link" condition like it was some kind of an error (using "goto" to a code at the function end). However, having no link at PHY is an ordinary operational state, so let's handle it in an appropriately named separate function so it is more obvious what the code is doing. Signed-off-by: Maciej S. Szmigiero Signed-off-by: David S. Miller --- drivers/net/phy/phy_led_triggers.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 94ca42e630bb..c736f29b3b2a 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -27,12 +27,20 @@ static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy, return NULL; } +static void phy_led_trigger_no_link(struct phy_device *phy) +{ + if (phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); + phy->last_triggered = NULL; + } +} + void phy_led_trigger_change_speed(struct phy_device *phy) { struct phy_led_trigger *plt; if (!phy->link) - goto out_change_speed; + return phy_led_trigger_no_link(phy); if (phy->speed == 0) return; @@ -42,7 +50,7 @@ void phy_led_trigger_change_speed(struct phy_device *phy) netdev_alert(phy->attached_dev, "No phy led trigger registered for speed(%d)\n", phy->speed); - goto out_change_speed; + return phy_led_trigger_no_link(phy); } if (plt != phy->last_triggered) { @@ -50,14 +58,6 @@ void phy_led_trigger_change_speed(struct phy_device *phy) led_trigger_event(&plt->trigger, LED_FULL); phy->last_triggered = plt; } - return; - -out_change_speed: - if (phy->last_triggered) { - led_trigger_event(&phy->last_triggered->trigger, - LED_OFF); - phy->last_triggered = NULL; - } } EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); From 3928ee6485a316c8abde7e24c7f82033a1c8d3ae Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 2 Nov 2017 00:49:18 +0100 Subject: [PATCH 1919/2177] net: phy: leds: Add support for "link" trigger Currently, we create a LED trigger for any link speed known to a PHY. These triggers only fire when their exact link speed had been negotiated (they aren't cumulative, that is, they don't fire for "their or any higher" link speed). What we are missing, however, is a trigger which will fire on any link speed known to the PHY. Such trigger can then be used for implementing a poor man's substitute of the "link" LED on boards that lack it. Let's add it. Signed-off-by: Maciej S. Szmigiero Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 7 +++-- drivers/net/phy/phy_led_triggers.c | 43 +++++++++++++++++++++++++++--- include/linux/phy.h | 2 ++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 8125412c8814..bdfbabb86ee0 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -191,11 +191,14 @@ config LED_TRIGGER_PHY Adds support for a set of LED trigger events per-PHY. Link state change will trigger the events, for consumption by an LED class driver. There are triggers for each link speed currently - supported by the phy, and are of the form: + supported by the PHY and also a one common "link" trigger as a + logical-or of all the link speed ones. + All these triggers are named according to the following pattern: :: Where speed is in the form: - Mbps or Gbps + Mbps OR Gbps OR link + for any speed known to the PHY. comment "MII PHY device drivers" diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index c736f29b3b2a..39ecad25b201 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -31,6 +31,7 @@ static void phy_led_trigger_no_link(struct phy_device *phy) { if (phy->last_triggered) { led_trigger_event(&phy->last_triggered->trigger, LED_OFF); + led_trigger_event(&phy->led_link_trigger->trigger, LED_OFF); phy->last_triggered = NULL; } } @@ -54,6 +55,10 @@ void phy_led_trigger_change_speed(struct phy_device *phy) } if (plt != phy->last_triggered) { + if (!phy->last_triggered) + led_trigger_event(&phy->led_link_trigger->trigger, + LED_FULL); + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); led_trigger_event(&plt->trigger, LED_FULL); phy->last_triggered = plt; @@ -61,6 +66,13 @@ void phy_led_trigger_change_speed(struct phy_device *phy) } EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); +static void phy_led_trigger_format_name(struct phy_device *phy, char *buf, + size_t size, char *suffix) +{ + snprintf(buf, size, PHY_ID_FMT ":%s", + phy->mdio.bus->id, phy->mdio.addr, suffix); +} + static int phy_led_trigger_register(struct phy_device *phy, struct phy_led_trigger *plt, unsigned int speed) @@ -77,8 +89,8 @@ static int phy_led_trigger_register(struct phy_device *phy, snprintf(name_suffix, sizeof(name_suffix), "%dGbps", DIV_ROUND_CLOSEST(speed, 1000)); - snprintf(plt->name, sizeof(plt->name), PHY_ID_FMT ":%s", - phy->mdio.bus->id, phy->mdio.addr, name_suffix); + phy_led_trigger_format_name(phy, plt->name, sizeof(plt->name), + name_suffix); plt->trigger.name = plt->name; return led_trigger_register(&plt->trigger); @@ -99,13 +111,30 @@ int phy_led_triggers_register(struct phy_device *phy) if (!phy->phy_num_led_triggers) return 0; + phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, + sizeof(*phy->led_link_trigger), + GFP_KERNEL); + if (!phy->led_link_trigger) { + err = -ENOMEM; + goto out_clear; + } + + phy_led_trigger_format_name(phy, phy->led_link_trigger->name, + sizeof(phy->led_link_trigger->name), + "link"); + phy->led_link_trigger->trigger.name = phy->led_link_trigger->name; + + err = led_trigger_register(&phy->led_link_trigger->trigger); + if (err) + goto out_free_link; + phy->phy_led_triggers = devm_kzalloc(&phy->mdio.dev, sizeof(struct phy_led_trigger) * phy->phy_num_led_triggers, GFP_KERNEL); if (!phy->phy_led_triggers) { err = -ENOMEM; - goto out_clear; + goto out_unreg_link; } for (i = 0; i < phy->phy_num_led_triggers; i++) { @@ -123,6 +152,11 @@ out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); +out_unreg_link: + phy_led_trigger_unregister(phy->led_link_trigger); +out_free_link: + devm_kfree(&phy->mdio.dev, phy->led_link_trigger); + phy->led_link_trigger = NULL; out_clear: phy->phy_num_led_triggers = 0; return err; @@ -135,5 +169,8 @@ void phy_led_triggers_unregister(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + + if (phy->led_link_trigger) + phy_led_trigger_unregister(phy->led_link_trigger); } EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/include/linux/phy.h b/include/linux/phy.h index d78cd01ea513..dc82a07cb4fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -451,6 +451,8 @@ struct phy_device { struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; struct phy_led_trigger *last_triggered; + + struct phy_led_trigger *led_link_trigger; #endif /* From 0cf737808ae7cb25e952be619db46b9147a92f46 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 2 Nov 2017 11:35:30 +0100 Subject: [PATCH 1920/2177] hv_netvsc: netvsc_teardown_gpadl() split It was found that in some cases host refuses to teardown GPADL for send/ receive buffers (probably when some work with these buffere is scheduled or ongoing). Change the teardown logic to be: 1) Send NVSP_MSG1_TYPE_REVOKE_* messages 2) Close the channel 3) Teardown GPADLs. This seems to work reliably. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 69 +++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5bb6a20072dd..bfc79698b8f4 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev) call_rcu(&nvdev->rcu, free_netvsc_device); } -static void netvsc_destroy_buf(struct hv_device *device) +static void netvsc_revoke_buf(struct hv_device *device, + struct netvsc_device *net_device) { struct nvsp_message *revoke_packet; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *ndc = netdev_priv(ndev); - struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); int ret; /* @@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device) net_device->recv_section_cnt = 0; } - /* Teardown the gpadl on the vsp end */ - if (net_device->recv_buf_gpadl_handle) { - ret = vmbus_teardown_gpadl(device->channel, - net_device->recv_buf_gpadl_handle); - - /* If we failed here, we might as well return and have a leak - * rather than continue and a bugchk - */ - if (ret != 0) { - netdev_err(ndev, - "unable to teardown receive buffer's gpadl\n"); - return; - } - net_device->recv_buf_gpadl_handle = 0; - } - - if (net_device->recv_buf) { - /* Free up the receive buffer */ - vfree(net_device->recv_buf); - net_device->recv_buf = NULL; - } - /* Deal with the send buffer we may have setup. * If we got a send section size, it means we received a * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent @@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device) } net_device->send_section_cnt = 0; } - /* Teardown the gpadl on the vsp end */ +} + +static void netvsc_teardown_gpadl(struct hv_device *device, + struct netvsc_device *net_device) +{ + struct net_device *ndev = hv_get_drvdata(device); + int ret; + + if (net_device->recv_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(device->channel, + net_device->recv_buf_gpadl_handle); + + /* If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, + "unable to teardown receive buffer's gpadl\n"); + return; + } + net_device->recv_buf_gpadl_handle = 0; + } + + if (net_device->recv_buf) { + /* Free up the receive buffer */ + vfree(net_device->recv_buf); + net_device->recv_buf = NULL; + } + if (net_device->send_buf_gpadl_handle) { ret = vmbus_teardown_gpadl(device->channel, net_device->send_buf_gpadl_handle); @@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device, goto exit; cleanup: - netvsc_destroy_buf(device); + netvsc_revoke_buf(device, net_device); + netvsc_teardown_gpadl(device, net_device); exit: return ret; @@ -539,11 +545,6 @@ cleanup: return ret; } -static void netvsc_disconnect_vsp(struct hv_device *device) -{ - netvsc_destroy_buf(device); -} - /* * netvsc_device_remove - Callback when the root bus device is removed */ @@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device) cancel_work_sync(&net_device->subchan_work); - netvsc_disconnect_vsp(device); + netvsc_revoke_buf(device, net_device); RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); @@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device) /* Now, we can close the channel safely */ vmbus_close(device->channel); + netvsc_teardown_gpadl(device, net_device); + /* And dissassociate NAPI context from device */ for (i = 0; i < net_device->num_chn; i++) netif_napi_del(&net_device->chan_table[i].napi); From b5eb819dcbcbeca7a047573a5218cb8f703709c9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 2 Nov 2017 11:35:31 +0100 Subject: [PATCH 1921/2177] hv_netvsc: hide warnings about uninitialized/missing rndis device Hyper-V hosts are known to send RNDIS messages even after we halt the device in rndis_filter_halt_device(). Remove user visible messages as they are not really useful. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0648eebda829..8b1242b8d8ef 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev, /* Make sure the rndis device state is initialized */ if (unlikely(!rndis_dev)) { - netif_err(net_device_ctx, rx_err, ndev, + netif_dbg(net_device_ctx, rx_err, ndev, "got rndis message but no rndis device!\n"); return NVSP_STAT_FAIL; } if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { - netif_err(net_device_ctx, rx_err, ndev, + netif_dbg(net_device_ctx, rx_err, ndev, "got rndis message uninitialized\n"); return NVSP_STAT_FAIL; } From 4ad1ceec05e49175d0f967cc87628101e79176f6 Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Fri, 3 Nov 2017 10:29:59 -0700 Subject: [PATCH 1922/2177] net: fec: Let fec_ptp have its own interrupt routine This is better for code locality and should slightly speed up normal interrupts. This also allows PPS clock output to start working for i.mx7. This is because i.mx7 was already using the limit of 3 interrupts, and needed another. Signed-off-by: Troy Kisky Acked-by: Fugang Duan Signed-off-by: David S. Miller --- .../devicetree/bindings/net/fsl-fec.txt | 13 +++ drivers/net/ethernet/freescale/fec.h | 3 +- drivers/net/ethernet/freescale/fec_main.c | 31 +++++-- drivers/net/ethernet/freescale/fec_ptp.c | 82 +++++++++++-------- 4 files changed, 84 insertions(+), 45 deletions(-) diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index 6f55bdd52f8a..f0dc94409107 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -34,6 +34,19 @@ Optional properties: - fsl,err006687-workaround-present: If present indicates that the system has the hardware workaround for ERR006687 applied and does not need a software workaround. + -interrupt-names: names of the interrupts listed in interrupts property in + the same order. The defaults if not specified are + __Number of interrupts__ __Default__ + 1 "int0" + 2 "int0", "pps" + 3 "int0", "int1", "int2" + 4 "int0", "int1", "int2", "pps" + The order may be changed as long as they correspond to the interrupts + property. Currently, only i.mx7 uses "int1" and "int2". They correspond to + tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. + For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse + per second interrupt associated with 1588 precision time protocol(PTP). + Optional subnodes: - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 44720f83af27..5385074b3b7d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -583,12 +583,11 @@ struct fec_enet_private { u64 ethtool_stats[0]; }; -void fec_ptp_init(struct platform_device *pdev); +void fec_ptp_init(struct platform_device *pdev, int irq_idx); void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); -uint fec_ptp_check_pps_event(struct fec_enet_private *fep); /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3dc2d771a222..610573855213 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1602,10 +1602,6 @@ fec_enet_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; complete(&fep->mdio_done); } - - if (fep->ptp_clock) - if (fec_ptp_check_pps_event(fep)) - ret = IRQ_HANDLED; return ret; } @@ -3312,6 +3308,19 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) } +static int fec_enet_get_irq_cnt(struct platform_device *pdev) +{ + int irq_cnt = platform_irq_count(pdev); + + if (irq_cnt > FEC_IRQ_NUM) + irq_cnt = FEC_IRQ_NUM; /* last for pps */ + else if (irq_cnt == 2) + irq_cnt = 1; /* last for pps */ + else if (irq_cnt <= 0) + irq_cnt = 1; /* At least 1 irq is needed */ + return irq_cnt; +} + static int fec_probe(struct platform_device *pdev) { @@ -3325,6 +3334,8 @@ fec_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node, *phy_node; int num_tx_qs; int num_rx_qs; + char irq_name[8]; + int irq_cnt; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3465,18 +3476,20 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_reset; + irq_cnt = fec_enet_get_irq_cnt(pdev); if (fep->bufdesc_ex) - fec_ptp_init(pdev); + fec_ptp_init(pdev, irq_cnt); ret = fec_enet_init(ndev); if (ret) goto failed_init; - for (i = 0; i < FEC_IRQ_NUM; i++) { - irq = platform_get_irq(pdev, i); + for (i = 0; i < irq_cnt; i++) { + sprintf(irq_name, "int%d", i); + irq = platform_get_irq_byname(pdev, irq_name); + if (irq < 0) + irq = platform_get_irq(pdev, i); if (irq < 0) { - if (i) - break; ret = irq; goto failed_irq; } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 6ebad3fac81d..f81439796ac7 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -549,6 +549,37 @@ static void fec_time_keep(struct work_struct *work) schedule_delayed_work(&fep->time_keep, HZ); } +/* This function checks the pps event and reloads the timer compare counter. */ +static irqreturn_t fec_pps_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct fec_enet_private *fep = netdev_priv(ndev); + u32 val; + u8 channel = fep->pps_channel; + struct ptp_clock_event event; + + val = readl(fep->hwp + FEC_TCSR(channel)); + if (val & FEC_T_TF_MASK) { + /* Write the next next compare(not the next according the spec) + * value to the register + */ + writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); + do { + writel(val, fep->hwp + FEC_TCSR(channel)); + } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); + + /* Update the counter; */ + fep->next_counter = (fep->next_counter + fep->reload_period) & + fep->cc.mask; + + event.type = PTP_CLOCK_PPS; + ptp_clock_event(fep->ptp_clock, &event); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + /** * fec_ptp_init * @ndev: The FEC network adapter @@ -558,10 +589,12 @@ static void fec_time_keep(struct work_struct *work) * cyclecounter init routine and exits. */ -void fec_ptp_init(struct platform_device *pdev) +void fec_ptp_init(struct platform_device *pdev, int irq_idx) { struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + int irq; + int ret; fep->ptp_caps.owner = THIS_MODULE; snprintf(fep->ptp_caps.name, 16, "fec ptp"); @@ -587,6 +620,20 @@ void fec_ptp_init(struct platform_device *pdev) INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep); + irq = platform_get_irq_byname(pdev, "pps"); + if (irq < 0) + irq = platform_get_irq(pdev, irq_idx); + /* Failure to get an irq is not fatal, + * only the PTP_CLOCK_PPS clock events should stop + */ + if (irq >= 0) { + ret = devm_request_irq(&pdev->dev, irq, fec_pps_interrupt, + 0, pdev->name, ndev); + if (ret < 0) + dev_warn(&pdev->dev, "request for pps irq failed(%d)\n", + ret); + } + fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev); if (IS_ERR(fep->ptp_clock)) { fep->ptp_clock = NULL; @@ -605,36 +652,3 @@ void fec_ptp_stop(struct platform_device *pdev) if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); } - -/** - * fec_ptp_check_pps_event - * @fep: the fec_enet_private structure handle - * - * This function check the pps event and reload the timer compare counter. - */ -uint fec_ptp_check_pps_event(struct fec_enet_private *fep) -{ - u32 val; - u8 channel = fep->pps_channel; - struct ptp_clock_event event; - - val = readl(fep->hwp + FEC_TCSR(channel)); - if (val & FEC_T_TF_MASK) { - /* Write the next next compare(not the next according the spec) - * value to the register - */ - writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); - do { - writel(val, fep->hwp + FEC_TCSR(channel)); - } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); - - /* Update the counter; */ - fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask; - - event.type = PTP_CLOCK_PPS; - ptp_clock_event(fep->ptp_clock, &event); - return 1; - } - - return 0; -} From 3e29cd0e6563d5fefd59e7225750ee9922f2dad5 Mon Sep 17 00:00:00 2001 From: Christina Jacob Date: Sun, 5 Nov 2017 08:52:30 +0530 Subject: [PATCH 1923/2177] xdp: Sample xdp program implementing ip forward Implements port to port forwarding with route table and arp table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie map. Signed-off-by: Christina Jacob Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/xdp_router_ipv4_kern.c | 186 ++++++++ samples/bpf/xdp_router_ipv4_user.c | 659 +++++++++++++++++++++++++++++ 3 files changed, 849 insertions(+) create mode 100644 samples/bpf/xdp_router_ipv4_kern.c create mode 100644 samples/bpf/xdp_router_ipv4_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5994075b080d..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -29,6 +29,7 @@ hostprogs-y += test_cgrp2_sock hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += xdp_router_ipv4 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip @@ -76,6 +77,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o @@ -118,6 +120,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += xdp_router_ipv4_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o @@ -166,6 +169,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_xdp_router_ipv4 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c new file mode 100644 index 000000000000..993f56bc7b9a --- /dev/null +++ b/samples/bpf/xdp_router_ipv4_kern.c @@ -0,0 +1,186 @@ +/* Copyright (C) 2017 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include +#include + +struct trie_value { + __u8 prefix[4]; + __be64 value; + int ifindex; + int metric; + __be32 gw; +}; + +/* Key for lpm_trie*/ +union key_4 { + u32 b32[2]; + u8 b8[8]; +}; + +struct arp_entry { + __be64 mac; + __be32 dst; +}; + +struct direct_map { + struct arp_entry arp; + int ifindex; + __be64 mac; +}; + +/* Map for trie implementation*/ +struct bpf_map_def SEC("maps") lpm_map = { + .type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = 8, + .value_size = sizeof(struct trie_value), + .max_entries = 50, + .map_flags = BPF_F_NO_PREALLOC, +}; + +/* Map for counter*/ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 256, +}; + +/* Map for ARP table*/ +struct bpf_map_def SEC("maps") arp_table = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(__be64), + .max_entries = 50, +}; + +/* Map to keep the exact match entries in the route table*/ +struct bpf_map_def SEC("maps") exact_match = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(struct direct_map), + .max_entries = 50, +}; + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 100, +}; + +/* Function to set source and destination mac of the packet */ +static inline void set_src_dst_mac(void *data, void *src, void *dst) +{ + unsigned short *source = src; + unsigned short *dest = dst; + unsigned short *p = data; + + __builtin_memcpy(p, dest, 6); + __builtin_memcpy(p + 3, source, 6); +} + +/* Parse IPV4 packet to get SRC, DST IP and protocol */ +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, + __be32 *src, __be32 *dest) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + *src = iph->saddr; + *dest = iph->daddr; + return iph->protocol; +} + +SEC("xdp_router_ipv4") +int xdp_router_ipv4_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + __be64 *dest_mac = NULL, *src_mac = NULL; + void *data = (void *)(long)ctx->data; + struct trie_value *prefix_value; + int rc = XDP_DROP, forward_to; + struct ethhdr *eth = data; + union key_4 key4; + long *value; + u16 h_proto; + u32 ipproto; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_ARP)) { + return XDP_PASS; + } else if (h_proto == htons(ETH_P_IP)) { + struct direct_map *direct_entry; + __be32 src_ip = 0, dest_ip = 0; + + ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip); + direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip); + /* Check for exact match, this would give a faster lookup*/ + if (direct_entry && direct_entry->mac && direct_entry->arp.mac) { + src_mac = &direct_entry->mac; + dest_mac = &direct_entry->arp.mac; + forward_to = direct_entry->ifindex; + } else { + /* Look up in the trie for lpm*/ + key4.b32[0] = 32; + key4.b8[4] = dest_ip & 0xff; + key4.b8[5] = (dest_ip >> 8) & 0xff; + key4.b8[6] = (dest_ip >> 16) & 0xff; + key4.b8[7] = (dest_ip >> 24) & 0xff; + prefix_value = bpf_map_lookup_elem(&lpm_map, &key4); + if (!prefix_value) + return XDP_DROP; + src_mac = &prefix_value->value; + if (!src_mac) + return XDP_DROP; + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); + if (!dest_mac) { + if (!prefix_value->gw) + return XDP_DROP; + dest_ip = prefix_value->gw; + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); + } + forward_to = prefix_value->ifindex; + } + } else { + ipproto = 0; + } + if (src_mac && dest_mac) { + set_src_dst_mac(data, src_mac, dest_mac); + value = bpf_map_lookup_elem(&rxcnt, &ipproto); + if (value) + *value += 1; + return bpf_redirect_map(&tx_port, forward_to, 0); + } + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c new file mode 100644 index 000000000000..2c1fe3f4b1a4 --- /dev/null +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -0,0 +1,659 @@ +/* Copyright (C) 2017 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "libbpf.h" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_util.h" + +int sock, sock_arp, flags = 0; +static int total_ifindex; +int *ifindex_list; +char buf[8192]; + +static int get_route_table(int rtm_family); +static void int_exit(int sig) +{ + int i = 0; + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); +} + +static void close_and_exit(int sig) +{ + int i = 0; + + close(sock); + close(sock_arp); + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); +} + +/* Get the mac address of the interface given interface name */ +static __be64 getmac(char *iface) +{ + struct ifreq ifr; + __be64 mac = 0; + int fd, i; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + ifr.ifr_addr.sa_family = AF_INET; + strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1); + if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { + printf("ioctl failed leaving....\n"); + return -1; + } + for (i = 0; i < 6 ; i++) + *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i]; + close(fd); + return mac; +} + +static int recv_msg(struct sockaddr_nl sock_addr, int sock) +{ + struct nlmsghdr *nh; + int len, nll = 0; + char *buf_ptr; + + buf_ptr = buf; + while (1) { + len = recv(sock, buf_ptr, sizeof(buf) - nll, 0); + if (len < 0) + return len; + + nh = (struct nlmsghdr *)buf_ptr; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + buf_ptr += len; + nll += len; + if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH) + break; + + if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE) + break; + } + return nll; +} + +/* Function to parse the route entry returned by netlink + * Updates the route entry related map entries + */ +static void read_route(struct nlmsghdr *nh, int nll) +{ + char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; + struct bpf_lpm_trie_key *prefix_key; + struct rtattr *rt_attr; + struct rtmsg *rt_msg; + int rtm_family; + int rtl; + int i; + struct route_table { + int dst_len, iface, metric; + char *iface_name; + __be32 dst, gw; + __be64 mac; + } route; + struct arp_table { + __be64 mac; + __be32 dst; + }; + + struct direct_map { + struct arp_table arp; + int ifindex; + __be64 mac; + } direct_entry; + + if (nh->nlmsg_type == RTM_DELROUTE) + printf("DELETING Route entry\n"); + else if (nh->nlmsg_type == RTM_GETROUTE) + printf("READING Route entry\n"); + else if (nh->nlmsg_type == RTM_NEWROUTE) + printf("NEW Route entry\n"); + else + printf("%d\n", nh->nlmsg_type); + + memset(&route, 0, sizeof(route)); + printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n"); + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { + rt_msg = (struct rtmsg *)NLMSG_DATA(nh); + rtm_family = rt_msg->rtm_family; + if (rtm_family == AF_INET) + if (rt_msg->rtm_table != RT_TABLE_MAIN) + continue; + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); + rtl = RTM_PAYLOAD(nh); + + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { + switch (rt_attr->rta_type) { + case NDA_DST: + sprintf(dsts, "%u", + (*((__be32 *)RTA_DATA(rt_attr)))); + break; + case RTA_GATEWAY: + sprintf(gws, "%u", + *((__be32 *)RTA_DATA(rt_attr))); + break; + case RTA_OIF: + sprintf(ifs, "%u", + *((int *)RTA_DATA(rt_attr))); + break; + case RTA_METRICS: + sprintf(metrics, "%u", + *((int *)RTA_DATA(rt_attr))); + default: + break; + } + } + sprintf(dsts_len, "%d", rt_msg->rtm_dst_len); + route.dst = atoi(dsts); + route.dst_len = atoi(dsts_len); + route.gw = atoi(gws); + route.iface = atoi(ifs); + route.metric = atoi(metrics); + route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); + route.iface_name = if_indextoname(route.iface, route.iface_name); + route.mac = getmac(route.iface_name); + if (route.mac == -1) { + int i = 0; + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); + } + assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); + if (rtm_family == AF_INET) { + struct trie_value { + __u8 prefix[4]; + __be64 value; + int ifindex; + int metric; + __be32 gw; + } *prefix_value; + + prefix_key = alloca(sizeof(*prefix_key) + 3); + prefix_value = alloca(sizeof(*prefix_value)); + + prefix_key->prefixlen = 32; + prefix_key->prefixlen = route.dst_len; + direct_entry.mac = route.mac & 0xffffffffffff; + direct_entry.ifindex = route.iface; + direct_entry.arp.mac = 0; + direct_entry.arp.dst = 0; + if (route.dst_len == 32) { + if (nh->nlmsg_type == RTM_DELROUTE) + assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); + else + if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) + direct_entry.arp.dst = route.dst; + assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + } + for (i = 0; i < 4; i++) + prefix_key->data[i] = (route.dst >> i * 8) & 0xff; + + printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n", + (int)prefix_key->data[0], + (int)prefix_key->data[1], + (int)prefix_key->data[2], + (int)prefix_key->data[3], + route.gw, route.dst_len, + route.metric, + route.iface_name); + if (bpf_map_lookup_elem(map_fd[0], prefix_key, + prefix_value) < 0) { + for (i = 0; i < 4; i++) + prefix_value->prefix[i] = prefix_key->data[i]; + prefix_value->value = route.mac & 0xffffffffffff; + prefix_value->ifindex = route.iface; + prefix_value->gw = route.gw; + prefix_value->metric = route.metric; + + assert(bpf_map_update_elem(map_fd[0], + prefix_key, + prefix_value, 0 + ) == 0); + } else { + if (nh->nlmsg_type == RTM_DELROUTE) { + printf("deleting entry\n"); + printf("prefix key=%d.%d.%d.%d/%d", + prefix_key->data[0], + prefix_key->data[1], + prefix_key->data[2], + prefix_key->data[3], + prefix_key->prefixlen); + assert(bpf_map_delete_elem(map_fd[0], + prefix_key + ) == 0); + /* Rereading the route table to check if + * there is an entry with the same + * prefix but a different metric as the + * deleted enty. + */ + get_route_table(AF_INET); + } else if (prefix_key->data[0] == + prefix_value->prefix[0] && + prefix_key->data[1] == + prefix_value->prefix[1] && + prefix_key->data[2] == + prefix_value->prefix[2] && + prefix_key->data[3] == + prefix_value->prefix[3] && + route.metric >= prefix_value->metric) { + continue; + } else { + for (i = 0; i < 4; i++) + prefix_value->prefix[i] = + prefix_key->data[i]; + prefix_value->value = + route.mac & 0xffffffffffff; + prefix_value->ifindex = route.iface; + prefix_value->gw = route.gw; + prefix_value->metric = route.metric; + assert(bpf_map_update_elem( + map_fd[0], + prefix_key, + prefix_value, + 0) == 0); + } + } + } + memset(&route, 0, sizeof(route)); + memset(dsts, 0, sizeof(dsts)); + memset(dsts_len, 0, sizeof(dsts_len)); + memset(gws, 0, sizeof(gws)); + memset(ifs, 0, sizeof(ifs)); + memset(&route, 0, sizeof(route)); + } +} + +/* Function to read the existing route table when the process is launched*/ +static int get_route_table(int rtm_family) +{ + struct sockaddr_nl sa; + struct nlmsghdr *nh; + int sock, seq = 0; + struct msghdr msg; + struct iovec iov; + int ret = 0; + int nll; + + struct { + struct nlmsghdr nl; + struct rtmsg rt; + char buf[8192]; + } req; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(&req, 0, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + + req.rt.rtm_family = rtm_family; + req.rt.rtm_table = RT_TABLE_MAIN; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++seq; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = (void *)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + printf("send to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(buf, 0, sizeof(buf)); + nll = recv_msg(sa, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + nh = (struct nlmsghdr *)buf; + read_route(nh, nll); +cleanup: + close(sock); + return ret; +} + +/* Function to parse the arp entry returned by netlink + * Updates the arp entry related map entries + */ +static void read_arp(struct nlmsghdr *nh, int nll) +{ + struct rtattr *rt_attr; + char dsts[24], mac[24]; + struct ndmsg *rt_msg; + int rtl, ndm_family; + + struct arp_table { + __be64 mac; + __be32 dst; + } arp_entry; + struct direct_map { + struct arp_table arp; + int ifindex; + __be64 mac; + } direct_entry; + + if (nh->nlmsg_type == RTM_GETNEIGH) + printf("READING arp entry\n"); + printf("Address\tHwAddress\n"); + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { + rt_msg = (struct ndmsg *)NLMSG_DATA(nh); + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); + ndm_family = rt_msg->ndm_family; + rtl = RTM_PAYLOAD(nh); + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { + switch (rt_attr->rta_type) { + case NDA_DST: + sprintf(dsts, "%u", + *((__be32 *)RTA_DATA(rt_attr))); + break; + case NDA_LLADDR: + sprintf(mac, "%lld", + *((__be64 *)RTA_DATA(rt_attr))); + break; + default: + break; + } + } + arp_entry.dst = atoi(dsts); + arp_entry.mac = atol(mac); + printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); + if (ndm_family == AF_INET) { + if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, + &direct_entry) == 0) { + if (nh->nlmsg_type == RTM_DELNEIGH) { + direct_entry.arp.dst = 0; + direct_entry.arp.mac = 0; + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { + direct_entry.arp.dst = arp_entry.dst; + direct_entry.arp.mac = arp_entry.mac; + } + assert(bpf_map_update_elem(map_fd[3], + &arp_entry.dst, + &direct_entry, 0 + ) == 0); + memset(&direct_entry, 0, sizeof(direct_entry)); + } + if (nh->nlmsg_type == RTM_DELNEIGH) { + assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { + assert(bpf_map_update_elem(map_fd[2], + &arp_entry.dst, + &arp_entry.mac, 0 + ) == 0); + } + } + memset(&arp_entry, 0, sizeof(arp_entry)); + memset(dsts, 0, sizeof(dsts)); + } +} + +/* Function to read the existing arp table when the process is launched*/ +static int get_arp_table(int rtm_family) +{ + struct sockaddr_nl sa; + struct nlmsghdr *nh; + int sock, seq = 0; + struct msghdr msg; + struct iovec iov; + int ret = 0; + int nll; + struct { + struct nlmsghdr nl; + struct ndmsg rt; + char buf[8192]; + } req; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(&req, 0, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETNEIGH; + req.rt.ndm_state = NUD_REACHABLE; + req.rt.ndm_family = rtm_family; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++seq; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = (void *)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + printf("send to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(buf, 0, sizeof(buf)); + nll = recv_msg(sa, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + nh = (struct nlmsghdr *)buf; + read_arp(nh, nll); +cleanup: + close(sock); + return ret; +} + +/* Function to keep track and update changes in route and arp table + * Give regular statistics of packets forwarded + */ +static int monitor_route(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + const unsigned int nr_keys = 256; + struct pollfd fds_route, fds_arp; + __u64 prev[nr_keys][nr_cpus]; + struct sockaddr_nl la, lr; + __u64 values[nr_cpus]; + struct nlmsghdr *nh; + int nll, ret = 0; + int interval = 5; + __u32 key; + int i; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + fcntl(sock, F_SETFL, O_NONBLOCK); + memset(&lr, 0, sizeof(lr)); + lr.nl_family = AF_NETLINK; + lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; + if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + fds_route.fd = sock; + fds_route.events = POLL_IN; + + sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock_arp < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + fcntl(sock_arp, F_SETFL, O_NONBLOCK); + memset(&la, 0, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; + if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + fds_arp.fd = sock_arp; + fds_arp.events = POLL_IN; + + memset(prev, 0, sizeof(prev)); + do { + signal(SIGINT, close_and_exit); + signal(SIGTERM, close_and_exit); + + sleep(interval); + for (key = 0; key < nr_keys; key++) { + __u64 sum = 0; + + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[key][i]); + if (sum) + printf("proto %u: %10llu pkt/s\n", + key, sum / interval); + memcpy(prev[key], values, sizeof(values)); + } + + memset(buf, 0, sizeof(buf)); + if (poll(&fds_route, 1, 3) == POLL_IN) { + nll = recv_msg(lr, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + + nh = (struct nlmsghdr *)buf; + printf("Routing table updated.\n"); + read_route(nh, nll); + } + memset(buf, 0, sizeof(buf)); + if (poll(&fds_arp, 1, 3) == POLL_IN) { + nll = recv_msg(la, sock_arp); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + + nh = (struct nlmsghdr *)buf; + read_arp(nh, nll); + } + + } while (1); +cleanup: + close(sock); + return ret; +} + +int main(int ac, char **argv) +{ + char filename[256]; + char **ifname_list; + int i = 1; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (ac < 2) { + printf("usage: %s [-S] Interface name list\n", argv[0]); + return 1; + } + if (!strcmp(argv[1], "-S")) { + flags = XDP_FLAGS_SKB_MODE; + total_ifindex = ac - 2; + ifname_list = (argv + 2); + } else { + flags = 0; + total_ifindex = ac - 1; + ifname_list = (argv + 1); + } + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + printf("\n**************loading bpf file*********************\n\n\n"); + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); + for (i = 0; i < total_ifindex; i++) { + ifindex_list[i] = if_nametoindex(ifname_list[i]); + if (!ifindex_list[i]) { + printf("Couldn't translate interface name: %s", + strerror(errno)); + return 1; + } + } + for (i = 0; i < total_ifindex; i++) { + if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { + printf("link set xdp fd failed\n"); + int recovery_index = i; + + for (i = 0; i < recovery_index; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + + return 1; + } + printf("Attached to %d\n", ifindex_list[i]); + } + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + printf("*******************ROUTE TABLE*************************\n\n\n"); + get_route_table(AF_INET); + printf("*******************ARP TABLE***************************\n\n\n"); + get_arp_table(AF_INET); + if (monitor_route() < 0) { + printf("Error in receiving route update"); + return 1; + } + + return 0; +} From 80661e7687f202514ee5eea0e74916d3c50c2606 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 5 Nov 2017 15:58:22 -0800 Subject: [PATCH 1924/2177] ila: cleanup checksum diff Consolidate computing checksum diff into one function. Add get_csum_diff_iaddr that computes the checksum diff between an address argument and locator being written. get_csum_diff calls this using the destination address in the IP header as the argument. Also moved ila_init_saved_csum to be close to the checksum diff functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ila/ila_common.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index aba0998ddbfb..f1d9248d8b86 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -13,15 +13,28 @@ #include #include "ila.h" -static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +void ila_init_saved_csum(struct ila_params *p) { - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + if (!p->locator_match.v64) + return; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator, + (__be32 *)&p->locator_match); +} + +static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p) +{ if (p->locator_match.v64) return p->csum_diff; else - return compute_csum_diff8((__be32 *)&iaddr->loc, - (__be32 *)&p->locator); + return compute_csum_diff8((__be32 *)&p->locator, + (__be32 *)&iaddr->loc); +} + +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p); } static void ila_csum_do_neutral(struct ila_addr *iaddr, @@ -30,13 +43,7 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr, __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff, fval; - /* Check if checksum adjust value has been cached */ - if (p->locator_match.v64) { - diff = p->csum_diff; - } else { - diff = compute_csum_diff8((__be32 *)&p->locator, - (__be32 *)iaddr); - } + diff = get_csum_diff_iaddr(iaddr, p); fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); @@ -134,16 +141,6 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, iaddr->loc = p->locator; } -void ila_init_saved_csum(struct ila_params *p) -{ - if (!p->locator_match.v64) - return; - - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator, - (__be32 *)&p->locator_match); -} - static int __init ila_init(void) { int ret; From 84287bb3285634b60c55c00a1d5ed843b44fde92 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 5 Nov 2017 15:58:23 -0800 Subject: [PATCH 1925/2177] ila: add checksum neutral map auto Add checksum neutral auto that performs checksum neutral mapping without using the C-bit. This is enabled by configuration of a mapping. The checksum neutral function has been split into ila_csum_do_neutral_fmt and ila_csum_do_neutral_nofmt. The former handles the C-bit and includes it in the adjustment value. The latter just sets the adjustment value on the locator diff only. Added configuration for checksum neutral map aut in ila_lwt and ila_xlat. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/ila.h | 1 + net/ipv6/ila/ila_common.c | 65 +++++++++++++++++++++++---------------- net/ipv6/ila/ila_lwt.c | 29 ++++++++--------- net/ipv6/ila/ila_xlat.c | 10 +++--- 4 files changed, 61 insertions(+), 44 deletions(-) diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index f54853288f99..0744881dcef3 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -41,6 +41,7 @@ enum { ILA_CSUM_ADJUST_TRANSPORT, ILA_CSUM_NEUTRAL_MAP, ILA_CSUM_NO_ACTION, + ILA_CSUM_NEUTRAL_MAP_AUTO, }; #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index f1d9248d8b86..8c88ecf29b93 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -37,8 +37,8 @@ static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p); } -static void ila_csum_do_neutral(struct ila_addr *iaddr, - struct ila_params *p) +static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr, + struct ila_params *p) { __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff, fval; @@ -60,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr, iaddr->ident.csum_neutral ^= 1; } +static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr, + struct ila_params *p) +{ + __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; + __wsum diff; + + diff = get_csum_diff_iaddr(iaddr, p); + + *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); +} + static void ila_csum_adjust_transport(struct sk_buff *skb, struct ila_params *p) { - __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); size_t nhoff = sizeof(struct ipv6hdr); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + __wsum diff; switch (ip6h->nexthdr) { case NEXTHDR_TCP: @@ -105,36 +115,39 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, } break; } - - /* Now change destination address */ - iaddr->loc = p->locator; } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, - bool set_csum_neutral) + bool sir2ila) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); - /* First deal with the transport checksum */ - if (ila_csum_neutral_set(iaddr->ident)) { - /* C-bit is set in the locator indicating that this - * is a locator being translated to a SIR address. - * Perform (receiver) checksum-neutral translation. - */ - if (!set_csum_neutral) - ila_csum_do_neutral(iaddr, p); - } else { - switch (p->csum_mode) { - case ILA_CSUM_ADJUST_TRANSPORT: - ila_csum_adjust_transport(skb, p); - break; - case ILA_CSUM_NEUTRAL_MAP: - ila_csum_do_neutral(iaddr, p); - break; - case ILA_CSUM_NO_ACTION: + switch (p->csum_mode) { + case ILA_CSUM_ADJUST_TRANSPORT: + ila_csum_adjust_transport(skb, p); + break; + case ILA_CSUM_NEUTRAL_MAP: + if (sir2ila) { + if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) { + /* Checksum flag should never be + * set in a formatted SIR address. + */ + break; + } + } else if (!ila_csum_neutral_set(iaddr->ident)) { + /* ILA to SIR translation and C-bit isn't + * set so we're good. + */ break; } + ila_csum_do_neutral_fmt(iaddr, p); + break; + case ILA_CSUM_NEUTRAL_MAP_AUTO: + ila_csum_do_neutral_nofmt(iaddr, p); + break; + case ILA_CSUM_NO_ACTION: + break; } /* Now change destination address */ diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 696281b4bca2..104af07d83a6 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -127,6 +127,7 @@ static int ila_build_state(struct nlattr *nla, struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; + u8 csum_mode = ILA_CSUM_NO_ACTION; int ret; if (family != AF_INET6) @@ -139,15 +140,6 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } - iaddr = (struct ila_addr *)&cfg6->fc_dst; - - if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { - /* Don't allow translation for a non-ILA address or checksum - * neutral flag to be set. - */ - return -EINVAL; - } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); if (ret < 0) return ret; @@ -155,6 +147,19 @@ static int ila_build_state(struct nlattr *nla, if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; + iaddr = (struct ila_addr *)&cfg6->fc_dst; + + if (tb[ILA_ATTR_CSUM_MODE]) + csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); + + if (csum_mode == ILA_CSUM_NEUTRAL_MAP && + ila_csum_neutral_set(iaddr->ident)) { + /* Don't allow translation if checksum neutral bit is + * configured and it's set in the SIR address. + */ + return -EINVAL; + } + newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) return -ENOMEM; @@ -168,17 +173,13 @@ static int ila_build_state(struct nlattr *nla, p = ila_params_lwtunnel(newts); + p->csum_mode = csum_mode; p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); /* Precompute checksum difference for translation since we * know both the old locator and the new one. */ p->locator_match = iaddr->loc; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - - if (tb[ILA_ATTR_CSUM_MODE]) - p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); ila_init_saved_csum(p); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 3123b9de91b5..213259629e66 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -138,6 +138,8 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[ILA_ATTR_CSUM_MODE]) xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); + else + xp->ip.csum_mode = ILA_CSUM_NO_ACTION; if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); @@ -198,7 +200,7 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int ila_nf_input(void *priv, @@ -396,7 +398,7 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || - nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) + nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) return -1; return 0; @@ -607,7 +609,7 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -626,7 +628,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); + ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila); rcu_read_unlock(); From 70d5aef48a421a68bd9d1bf8f8267af406681580 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 5 Nov 2017 15:58:24 -0800 Subject: [PATCH 1926/2177] ila: allow configuration of identifier type Allow identifier to be explicitly configured for a mapping. This can either be one of the identifier types specified in the ILA draft or a value of ILA_ATYPE_USE_FORMAT which means the identifier type is inferred from the identifier type field. If a value other than ILA_ATYPE_USE_FORMAT is set for a mapping then it is assumed that the identifier type field is not present in an identifier. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/ila.h | 13 ++++++++++ net/ipv6/ila/ila.h | 12 +--------- net/ipv6/ila/ila_lwt.c | 51 ++++++++++++++++++++++++++++++++++------ net/ipv6/ila/ila_xlat.c | 18 ++++++++++---- 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 0744881dcef3..8353c78a7781 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -17,6 +17,7 @@ enum { ILA_ATTR_DIR, /* u32 */ ILA_ATTR_PAD, ILA_ATTR_CSUM_MODE, /* u8 */ + ILA_ATTR_IDENT_TYPE, /* u8 */ __ILA_ATTR_MAX, }; @@ -44,4 +45,16 @@ enum { ILA_CSUM_NEUTRAL_MAP_AUTO, }; +enum { + ILA_ATYPE_IID = 0, + ILA_ATYPE_LUID, + ILA_ATYPE_VIRT_V4, + ILA_ATYPE_VIRT_UNI_V6, + ILA_ATYPE_VIRT_MULTI_V6, + ILA_ATYPE_NONLOCAL_ADDR, + ILA_ATYPE_RSVD_1, + ILA_ATYPE_RSVD_2, + + ILA_ATYPE_USE_FORMAT = 32, /* Get type from type field in identifier */ +}; #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index e0170f62bc39..3c7a11b62334 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -55,17 +55,6 @@ struct ila_identifier { }; }; -enum { - ILA_ATYPE_IID = 0, - ILA_ATYPE_LUID, - ILA_ATYPE_VIRT_V4, - ILA_ATYPE_VIRT_UNI_V6, - ILA_ATYPE_VIRT_MULTI_V6, - ILA_ATYPE_RSVD_1, - ILA_ATYPE_RSVD_2, - ILA_ATYPE_RSVD_3, -}; - #define CSUM_NEUTRAL_FLAG htonl(0x10000000) struct ila_addr { @@ -93,6 +82,7 @@ struct ila_params { struct ila_locator locator_match; __wsum csum_diff; u8 csum_mode; + u8 ident_type; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 104af07d83a6..4b97d573f223 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -114,6 +114,7 @@ drop: static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, }; static int ila_build_state(struct nlattr *nla, @@ -127,19 +128,14 @@ static int ila_build_state(struct nlattr *nla, struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; + u8 ident_type = ILA_ATYPE_USE_FORMAT; u8 csum_mode = ILA_CSUM_NO_ACTION; + u8 eff_ident_type; int ret; if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { - /* Need to have full locator and at least type field - * included in destination - */ - return -EINVAL; - } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); if (ret < 0) return ret; @@ -149,6 +145,41 @@ static int ila_build_state(struct nlattr *nla, iaddr = (struct ila_addr *)&cfg6->fc_dst; + if (tb[ILA_ATTR_IDENT_TYPE]) + ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]); + + if (ident_type == ILA_ATYPE_USE_FORMAT) { + /* Infer identifier type from type field in formatted + * identifier. + */ + + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { + /* Need to have full locator and at least type field + * included in destination + */ + return -EINVAL; + } + + eff_ident_type = iaddr->ident.type; + } else { + eff_ident_type = ident_type; + } + + switch (eff_ident_type) { + case ILA_ATYPE_IID: + /* Don't allow ILA for IID type */ + return -EINVAL; + case ILA_ATYPE_LUID: + break; + case ILA_ATYPE_VIRT_V4: + case ILA_ATYPE_VIRT_UNI_V6: + case ILA_ATYPE_VIRT_MULTI_V6: + case ILA_ATYPE_NONLOCAL_ADDR: + /* These ILA formats are not supported yet. */ + default: + return -EINVAL; + } + if (tb[ILA_ATTR_CSUM_MODE]) csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); @@ -174,6 +205,7 @@ static int ila_build_state(struct nlattr *nla, p = ila_params_lwtunnel(newts); p->csum_mode = csum_mode; + p->ident_type = ident_type; p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); /* Precompute checksum difference for translation since we @@ -208,9 +240,13 @@ static int ila_fill_encap_info(struct sk_buff *skb, if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, ILA_ATTR_PAD)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -221,6 +257,7 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */ 0; } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 213259629e66..6eb5e68f112a 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -121,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, @@ -141,6 +142,12 @@ static int parse_nl_config(struct genl_info *info, else xp->ip.csum_mode = ILA_CSUM_NO_ACTION; + if (info->attrs[ILA_ATTR_IDENT_TYPE]) + xp->ip.ident_type = nla_get_u8( + info->attrs[ILA_ATTR_IDENT_TYPE]); + else + xp->ip.ident_type = ILA_ATYPE_USE_FORMAT; + if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); @@ -398,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || - nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) + nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) || + nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type)) return -1; return 0; @@ -619,10 +627,10 @@ static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) /* Assumes skb contains a valid IPv6 header that is pulled */ - if (!ila_addr_is_ila(iaddr)) { - /* Type indicates this is not an ILA address */ - return 0; - } + /* No check here that ILA type in the mapping matches what is in the + * address. We assume that whatever sender gaves us can be translated. + * The checksum mode however is relevant. + */ rcu_read_lock(); From fddb231ebe647749782a9ebf11106a81f7168ba7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 5 Nov 2017 15:58:25 -0800 Subject: [PATCH 1927/2177] ila: Add a hook type for LWT routes In LWT tunnels both an input and output route method is defined. If both of these are executed in the same path then double translation happens and the effect is not correct. This patch adds a new attribute that indicates the hook type. Two values are defined for route output and route output. ILA translation is only done for the one that is set. The default is to enable ILA on route output. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/ila.h | 7 +++++++ net/ipv6/ila/ila_lwt.c | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 8353c78a7781..483b77af4eb8 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -18,6 +18,7 @@ enum { ILA_ATTR_PAD, ILA_ATTR_CSUM_MODE, /* u8 */ ILA_ATTR_IDENT_TYPE, /* u8 */ + ILA_ATTR_HOOK_TYPE, /* u8 */ __ILA_ATTR_MAX, }; @@ -57,4 +58,10 @@ enum { ILA_ATYPE_USE_FORMAT = 32, /* Get type from type field in identifier */ }; + +enum { + ILA_HOOK_ROUTE_OUTPUT, + ILA_HOOK_ROUTE_INPUT, +}; + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 4b97d573f223..3d56a2fb6f86 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -20,6 +20,7 @@ struct ila_lwt { struct ila_params p; struct dst_cache dst_cache; u32 connected : 1; + u32 lwt_output : 1; }; static inline struct ila_lwt *ila_lwt_lwtunnel( @@ -45,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), - true); + if (ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(orig_dst->lwtstate), + true); if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { /* Already have a next hop address in route, no need for @@ -98,11 +101,15 @@ drop: static int ila_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate); if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); + if (!ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(dst->lwtstate), + false); return dst->lwtstate->orig_input(skb); @@ -115,6 +122,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, + [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, }, }; static int ila_build_state(struct nlattr *nla, @@ -129,7 +137,9 @@ static int ila_build_state(struct nlattr *nla, const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; u8 ident_type = ILA_ATYPE_USE_FORMAT; + u8 hook_type = ILA_HOOK_ROUTE_OUTPUT; u8 csum_mode = ILA_CSUM_NO_ACTION; + bool lwt_output = true; u8 eff_ident_type; int ret; @@ -180,6 +190,20 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } + if (tb[ILA_ATTR_HOOK_TYPE]) + hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]); + + switch (hook_type) { + case ILA_HOOK_ROUTE_OUTPUT: + lwt_output = true; + break; + case ILA_HOOK_ROUTE_INPUT: + lwt_output = false; + break; + default: + return -EINVAL; + } + if (tb[ILA_ATTR_CSUM_MODE]) csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); @@ -202,6 +226,8 @@ static int ila_build_state(struct nlattr *nla, return ret; } + ilwt->lwt_output = !!lwt_output; + p = ila_params_lwtunnel(newts); p->csum_mode = csum_mode; @@ -236,6 +262,7 @@ static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ila_params *p = ila_params_lwtunnel(lwtstate); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate); if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, ILA_ATTR_PAD)) @@ -247,6 +274,11 @@ static int ila_fill_encap_info(struct sk_buff *skb, if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE, + ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT : + ILA_HOOK_ROUTE_INPUT)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -258,6 +290,7 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */ 0; } From 7afc19bc217475000e5cd8eb0009665453c8439e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 5 Nov 2017 15:58:26 -0800 Subject: [PATCH 1928/2177] ila: Add ila.txt Add documenation for kernel ILA. This describes ILA, features, configuration gives some examples. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ila.txt | 285 +++++++++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 Documentation/networking/ila.txt diff --git a/Documentation/networking/ila.txt b/Documentation/networking/ila.txt new file mode 100644 index 000000000000..78df879abd26 --- /dev/null +++ b/Documentation/networking/ila.txt @@ -0,0 +1,285 @@ +Identifier Locator Addressing (ILA) + + +Introduction +============ + +Identifier-locator addressing (ILA) is a technique used with IPv6 that +differentiates between location and identity of a network node. Part of an +address expresses the immutable identity of the node, and another part +indicates the location of the node which can be dynamic. Identifier-locator +addressing can be used to efficiently implement overlay networks for +network virtualization as well as solutions for use cases in mobility. + +ILA can be thought of as means to implement an overlay network without +encapsulation. This is accomplished by performing network address +translation on destination addresses as a packet traverses a network. To +the network, an ILA translated packet appears to be no different than any +other IPv6 packet. For instance, if the transport protocol is TCP then an +ILA translated packet looks like just another TCP/IPv6 packet. The +advantage of this is that ILA is transparent to the network so that +optimizations in the network, such as ECMP, RSS, GRO, GSO, etc., just work. + +The ILA protocol is described in Internet-Draft draft-herbert-intarea-ila. + + +ILA terminology +=============== + + - Identifier A number that identifies an addressable node in the network + independent of its location. ILA identifiers are sixty-four + bit values. + + - Locator A network prefix that routes to a physical host. Locators + provide the topological location of an addressed node. ILA + locators are sixty-four bit prefixes. + + - ILA mapping + A mapping of an ILA identifier to a locator (or to a + locator and meta data). An ILA domain maintains a database + that contains mappings for all destinations in the domain. + + - SIR address + An IPv6 address composed of a SIR prefix (upper sixty- + four bits) and an identifier (lower sixty-four bits). + SIR addresses are visible to applications and provide a + means for them to address nodes independent of their + location. + + - ILA address + An IPv6 address composed of a locator (upper sixty-four + bits) and an identifier (low order sixty-four bits). ILA + addresses are never visible to an application. + + - ILA host An end host that is capable of performing ILA translations + on transmit or receive. + + - ILA router A network node that performs ILA translation and forwarding + of translated packets. + + - ILA forwarding cache + A type of ILA router that only maintains a working set + cache of mappings. + + - ILA node A network node capable of performing ILA translations. This + can be an ILA router, ILA forwarding cache, or ILA host. + + +Operation +========= + +There are two fundamental operations with ILA: + + - Translate a SIR address to an ILA address. This is performed on ingress + to an ILA overlay. + + - Translate an ILA address to a SIR address. This is performed on egress + from the ILA overlay. + +ILA can be deployed either on end hosts or intermediate devices in the +network; these are provided by "ILA hosts" and "ILA routers" respectively. +Configuration and datapath for these two points of deployment is somewhat +different. + +The diagram below illustrates the flow of packets through ILA as well +as showing ILA hosts and routers. + + +--------+ +--------+ + | Host A +-+ +--->| Host B | + | | | (2) ILA (') | | + +--------+ | ...addressed.... ( ) +--------+ + V +---+--+ . packet . +---+--+ (_) + (1) SIR | | ILA |----->-------->---->| ILA | | (3) SIR + addressed +->|router| . . |router|->-+ addressed + packet +---+--+ . IPv6 . +---+--+ packet + / . Network . + / . . +--+-++--------+ + +--------+ / . . |ILA || Host | + | Host +--+ . .- -|host|| | + | | . . +--+-++--------+ + +--------+ ................ + + +Transport checksum handling +=========================== + +When an address is translated by ILA, an encapsulated transport checksum +that includes the translated address in a pseudo header may be rendered +incorrect on the wire. This is a problem for intermediate devices, +including checksum offload in NICs, that process the checksum. There are +three options to deal with this: + +- no action Allow the checksum to be incorrect on the wire. Before + a receiver verifies a checksum the ILA to SIR address + translation must be done. + +- adjust transport checksum + When ILA translation is performed the packet is parsed + and if a transport layer checksum is found then it is + adjusted to reflect the correct checksum per the + translated address. + +- checksum neutral mapping + When an address is translated the difference can be offset + elsewhere in a part of the packet that is covered by the + the checksum. The low order sixteen bits of the identifier + are used. This method is preferred since it doesn't require + parsing a packet beyond the IP header and in most cases the + adjustment can be precomputed and saved with the mapping. + +Note that the checksum neutral adjustment affects the low order sixteen +bits of the identifier. When ILA to SIR address translation is done on +egress the low order bits are restored to the original value which +restores the identifier as it was originally sent. + + +Identifier types +================ + +ILA defines different types of identifiers for different use cases. + +The defined types are: + + 0: interface identifier + + 1: locally unique identifier + + 2: virtual networking identifier for IPv4 address + + 3: virtual networking identifier for IPv6 unicast address + + 4: virtual networking identifier for IPv6 multicast address + + 5: non-local address identifier + +In the current implementation of kernel ILA only locally unique identifiers +(LUID) are supported. LUID allows for a generic, unformatted 64 bit +identifier. + + +Identifier formats +================== + +Kernel ILA supports two optional fields in an identifier for formatting: +"C-bit" and "identifier type". The presence of these fields is determined +by configuration as demonstrated below. + +If the identifier type is present it occupies the three highest order +bits of an identifier. The possible values are given in the above list. + +If the C-bit is present, this is used as an indication that checksum +neutral mapping has been done. The C-bit can only be set in an +ILA address, never a SIR address. + +In the simplest format the identifier types, C-bit, and checksum +adjustment value are not present so an identifier is considered an +unstructured sixty-four bit value. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | + + + + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The checksum neutral adjustment may be configured to always be +present using neutral-map-auto. In this case there is no C-bit, but the +checksum adjustment is in the low order 16 bits. The identifier is +still sixty-four bits. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | + | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The C-bit may used to explicitly indicate that checksum neutral +mapping has been applied to an ILA address. The format is: + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | |C| Identifier | + | +-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The identifier type field may be present to indicate the identifier +type. If it is not present then the type is inferred based on mapping +configuration. The checksum neutral adjustment may automatically +used with the identifier type as illustrated below. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type| Identifier | + +-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +If the identifier type and the C-bit can be present simultaneously so +the identifier format would be: + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type|C| Identifier | + +-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + +Configuration +============= + +There are two methods to configure ILA mappings. One is by using LWT routes +and the other is ila_xlat (called from NFHOOK PREROUTING hook). ila_xlat +is intended to be used in the receive path for ILA hosts . + +An ILA router has also been implemented in XDP. Description of that is +outside the scope of this document. + +The usage of for ILA LWT routes is: + +ip route add DEST/128 encap ila LOC csum-mode MODE ident-type TYPE via ADDR + +Destination (DEST) can either be a SIR address (for an ILA host or ingress +ILA router) or an ILA address (egress ILA router). LOC is the sixty-four +bit locator (with format W:X:Y:Z) that overwrites the upper sixty-four +bits of the destination address. Checksum MODE is one of "no-action", +"adj-transport", "neutral-map", and "neutral-map-auto". If neutral-map is +set then the C-bit will be present. Identifier TYPE one of "luid" or +"use-format." In the case of use-format, the identifier type field is +present and the effective type is taken from that. + +The usage of ila_xlat is: + +ip ila add loc_match MATCH loc LOC csum-mode MODE ident-type TYPE + +MATCH indicates the incoming locator that must be matched to apply +a the translaiton. LOC is the locator that overwrites the upper +sixty-four bits of the destination address. MODE and TYPE have the +same meanings as described above. + + +Some examples +============= + +# Configure an ILA route that uses checksum neutral mapping as well +# as type field. Note that the type field is set in the SIR address +# (the 2000 implies type is 1 which is LUID). +ip route add 3333:0:0:1:2000:0:1:87/128 encap ila 2001:0:87:0 \ + csum-mode neutral-map ident-type use-format + +# Configure an ILA LWT route that uses auto checksum neutral mapping +# (no C-bit) and configure identifier type to be LUID so that the +# identifier type field will not be present. +ip route add 3333:0:0:1:2000:0:2:87/128 encap ila 2001:0:87:1 \ + csum-mode neutral-map-auto ident-type luid + +ila_xlat configuration + +# Configure an ILA to SIR mapping that matches a locator and overwrites +# it with a SIR address (3333:0:0:1 in this example). The C-bit and +# identifier field are used. +ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \ + csum-mode neutral-map-auto ident-type use-format + +# Configure an ILA to SIR mapping where checksum neutral is automatically +# set without the C-bit and the identifier type is configured to be LUID +# so that the identifier type field is not present. +ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \ + csum-mode neutral-map-auto ident-type use-format From aaf151b9e68101b03ba42d581e8a424bdd0110fe Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Sun, 5 Nov 2017 18:44:10 -0800 Subject: [PATCH 1929/2177] bpf: Rename tcp_bbf.readme to tcp_bpf.readme The original patch had the wrong filename. Fixes: bfdf75693875 ("bpf: create samples/bpf/tcp_bpf.readme") Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/{tcp_bbf.readme => tcp_bpf.readme} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename samples/bpf/{tcp_bbf.readme => tcp_bpf.readme} (100%) diff --git a/samples/bpf/tcp_bbf.readme b/samples/bpf/tcp_bpf.readme similarity index 100% rename from samples/bpf/tcp_bbf.readme rename to samples/bpf/tcp_bpf.readme From 602f3baf22188aad24b9a58be3209ab774b97d74 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:41 +0100 Subject: [PATCH 1930/2177] net_sch: red: Add offload ability to RED qdisc Add the ability to offload RED qdisc by using ndo_setup_tc. There are four commands for RED offloading: * TC_RED_SET: handles set and change. * TC_RED_DESTROY: handle qdisc destroy. * TC_RED_STATS: update the qdiscs counters (given as reference) * TC_RED_XSTAT: returns red xstats. Whether RED is being offloaded is being determined every time dump action is being called because parent change of this qdisc could change its offload state but doesn't require any RED function to be called. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 30 +++++++++++++ include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_red.c | 79 ++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fda527ccb263..71968a2ca9f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -777,6 +777,7 @@ enum tc_setup_type { TC_SETUP_CLSBPF, TC_SETUP_BLOCK, TC_SETUP_CBS, + TC_SETUP_QDISC_RED, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 98fef3221227..03c208d3c922 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -703,4 +703,34 @@ struct tc_cookie { u8 *data; u32 len; }; + +enum tc_red_command { + TC_RED_REPLACE, + TC_RED_DESTROY, + TC_RED_STATS, + TC_RED_XSTATS, +}; + +struct tc_red_qopt_offload_params { + u32 min; + u32 max; + u32 probability; + bool is_ecn; +}; +struct tc_red_qopt_offload_stats { + struct gnet_stats_basic_packed *bstats; + struct gnet_stats_queue *qstats; +}; + +struct tc_red_qopt_offload { + enum tc_red_command command; + u32 handle; + u32 parent; + union { + struct tc_red_qopt_offload_params set; + struct tc_red_qopt_offload_stats stats; + struct red_stats *xstats; + }; +}; + #endif diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 5002562868cc..6a2c5ea7e9c4 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_OFFLOADED 8 }; struct tc_red_xstats { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index fdfdb56aaae2..007dd8ef8aac 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -148,11 +149,37 @@ static void red_reset(struct Qdisc *sch) red_restart(&q->vars); } +static int red_offload(struct Qdisc *sch, bool enable) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_RED_REPLACE; + opt.set.min = q->parms.qth_min >> q->parms.Wlog; + opt.set.max = q->parms.qth_max >> q->parms.Wlog; + opt.set.probability = q->parms.max_P; + opt.set.is_ecn = red_use_ecn(q); + } else { + opt.command = TC_RED_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); +} + static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); del_timer_sync(&q->adapt_timer); + red_offload(sch, false); qdisc_destroy(q->qdisc); } @@ -219,6 +246,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); + red_offload(sch, true); return 0; } @@ -244,6 +272,33 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) return red_change(sch, opt); } +static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload hw_stats = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_RED_STATS, + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }; + int err; + + opt->flags &= ~TC_RED_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + opt->flags |= TC_RED_OFFLOADED; + + return err; +} + static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); @@ -257,8 +312,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Plog = q->parms.Plog, .Scell_log = q->parms.Scell_log, }; + int err; sch->qstats.backlog = q->qdisc->qstats.backlog; + err = red_dump_offload(sch, &opt); + if (err) + goto nla_put_failure; + opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; @@ -275,6 +335,7 @@ nla_put_failure: static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); struct tc_red_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .pdrop = q->stats.pdrop, @@ -282,6 +343,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .marked = q->stats.prob_mark + q->stats.forced_mark, }; + if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { + struct red_stats hw_stats = {0}; + struct tc_red_qopt_offload hw_stats_request = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_RED_XSTATS, + .xstats = &hw_stats, + }; + if (!dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_RED, + &hw_stats_request)) { + st.early += hw_stats.prob_drop + hw_stats.forced_drop; + st.pdrop += hw_stats.pdrop; + st.other += hw_stats.other; + st.marked += hw_stats.prob_mark + hw_stats.forced_mark; + } + } + return gnet_stats_copy_app(d, &st, sizeof(st)); } From 575ed7d39e2fbe602a3894bc766a8cb49af83bd3 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:42 +0100 Subject: [PATCH 1931/2177] net_sch: mqprio: Change TC_SETUP_MQPRIO to TC_SETUP_QDISC_MQPRIO Change TC_SETUP_MQPRIO to TC_SETUP_QDISC_MQPRIO to match the new convention. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/sfc/falcon/tx.c | 2 +- drivers/net/ethernet/sfc/tx.c | 2 +- drivers/net/ethernet/ti/netcp_core.c | 2 +- include/linux/netdevice.h | 2 +- net/sched/sch_mqprio.c | 5 +++-- 15 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 3d53153ce751..a74a8fbad53a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2206,7 +2206,7 @@ static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type, struct tc_mqprio_qopt *mqprio = type_data; u8 tc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1216c1f1e052..4c739d5355d2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4289,7 +4289,7 @@ int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 96416f5d97f3..e5472e5ae7b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7388,7 +7388,7 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return bnxt_setup_tc_block(dev, type_data); - case TC_SETUP_MQPRIO: { + case TC_SETUP_QDISC_MQPRIO: { struct tc_mqprio_qopt *mqprio = type_data; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index ebc55b6a6349..784dbf5a3e12 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -351,7 +351,7 @@ static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type, u8 num_tc; int i; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 2a0af11c9b59..59415090ff0f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1252,7 +1252,7 @@ out: static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; return hns3_setup_tc(dev, type_data); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 81e4425f0529..adc62fb38c49 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1389,7 +1389,7 @@ static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 05b94d87a6c3..17e6f64299cf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7550,7 +7550,7 @@ static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return i40e_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: return i40e_setup_tc_block(netdev, type_data); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e5dcb25be398..6eaca8366ac8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9431,7 +9431,7 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return ixgbe_setup_tc_block(dev, type_data); - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return ixgbe_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 736a6ccaf05e..99051a294fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -135,7 +135,7 @@ static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f877f2f5f2a5..5d5d2e50e4bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3146,7 +3146,7 @@ int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, case TC_SETUP_BLOCK: return mlx5e_setup_tc_block(dev, type_data); #endif - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c index 6a75f4140a4b..1b978d69e702 100644 --- a/drivers/net/ethernet/sfc/falcon/tx.c +++ b/drivers/net/ethernet/sfc/falcon/tx.c @@ -435,7 +435,7 @@ int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 32bf1fecf864..ea27b8a7f465 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -663,7 +663,7 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 437d36289786..15e2e3031d36 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1887,7 +1887,7 @@ static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type, /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 71968a2ca9f3..703885aed856 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -770,7 +770,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); enum tc_setup_type { - TC_SETUP_MQPRIO, + TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 4d5ed45123f0..b85885a9d8a1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -50,7 +50,8 @@ static void mqprio_destroy(struct Qdisc *sch) switch (priv->mode) { case TC_MQPRIO_MODE_DCB: case TC_MQPRIO_MODE_CHANNEL: - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, + dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, &mqprio); break; default: @@ -265,7 +266,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } err = dev->netdev_ops->ndo_setup_tc(dev, - TC_SETUP_MQPRIO, + TC_SETUP_QDISC_MQPRIO, &mqprio); if (err) return err; From 8521db4c7e155d12fb280686c0552e47f77e9110 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:43 +0100 Subject: [PATCH 1932/2177] net_sch: cbs: Change TC_SETUP_CBS to TC_SETUP_QDISC_CBS Change TC_SETUP_CBS to TC_SETUP_QDISC_CBS to match the new convention.. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- include/linux/netdevice.h | 2 +- net/sched/sch_cbs.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e22bce7cdacd..43cf39527660 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2488,7 +2488,7 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, struct igb_adapter *adapter = netdev_priv(dev); switch (type) { - case TC_SETUP_CBS: + case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); default: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 703885aed856..30f0f2928808 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -776,7 +776,7 @@ enum tc_setup_type { TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, - TC_SETUP_CBS, + TC_SETUP_QDISC_CBS, TC_SETUP_QDISC_RED, }; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index bdb533b7fb8c..7a72980c1509 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -212,7 +212,7 @@ static void cbs_disable_offload(struct net_device *dev, cbs.queue = q->queue; cbs.enable = 0; - err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) pr_warn("Couldn't disable CBS offload for queue %d\n", cbs.queue); @@ -236,7 +236,7 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, cbs.idleslope = opt->idleslope; cbs.sendslope = opt->sendslope; - err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) return err; From ad53fa06c126d2d739563802cc412cdcc9c32e63 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:44 +0100 Subject: [PATCH 1933/2177] mlxsw: reg: Add cwtp & cwtpm registers This patch adds 2 new registers: - Congestion WRED ECN TClass Profile Register [CWTP] - Congestion WRED ECN TClass and Pool Mapping Register [CWTPM] These registers would later be needed to offload RED-related functionality to the HW. Signed-off-by: Yuval Mintz Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 187 ++++++++++++++++++++++ 1 file changed, 187 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5066553dd0b6..db394ec2a4dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1758,6 +1758,191 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* CWTP - Congetion WRED ECN TClass Profile + * ---------------------------------------- + * Configures the profiles for queues of egress port and traffic class + */ +#define MLXSW_REG_CWTP_ID 0x2802 +#define MLXSW_REG_CWTP_BASE_LEN 0x28 +#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08 +#define MLXSW_REG_CWTP_LEN 0x40 + +MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN); + +/* reg_cwtp_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, local_port, 0, 16, 8); + +/* reg_cwtp_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8); + +/* reg_cwtp_profile_min + * Minimum Average Queue Size of the profile in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false); + +/* reg_cwtp_profile_percent + * Percentage of WRED and ECN marking for maximum Average Queue size + * Range is 0 to 100, units of integer percentage + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN, + 24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +/* reg_cwtp_profile_max + * Maximum Average Queue size of the profile in cells + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +#define MLXSW_REG_CWTP_MIN_VALUE 64 +#define MLXSW_REG_CWTP_MAX_PROFILE 2 +#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1 + +static inline void mlxsw_reg_cwtp_pack(char *payload, u8 local_port, + u8 traffic_class) +{ + int i; + + MLXSW_REG_ZERO(cwtp, payload); + mlxsw_reg_cwtp_local_port_set(payload, local_port); + mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class); + + for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) { + mlxsw_reg_cwtp_profile_min_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + mlxsw_reg_cwtp_profile_max_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + } +} + +#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1) + +static inline void +mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max, + u32 probability) +{ + u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile); + + mlxsw_reg_cwtp_profile_min_set(payload, index, min); + mlxsw_reg_cwtp_profile_max_set(payload, index, max); + mlxsw_reg_cwtp_profile_percent_set(payload, index, probability); +} + +/* CWTPM - Congestion WRED ECN TClass and Pool Mapping + * --------------------------------------------------- + * The CWTPM register maps each egress port and traffic class to profile num. + */ +#define MLXSW_REG_CWTPM_ID 0x2803 +#define MLXSW_REG_CWTPM_LEN 0x44 + +MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN); + +/* reg_cwtpm_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, local_port, 0, 16, 8); + +/* reg_cwtpm_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8); + +/* reg_cwtpm_ew + * Control enablement of WRED for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1); + +/* reg_cwtpm_ee + * Control enablement of ECN for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1); + +/* reg_cwtpm_tcp_g + * TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2); + +/* reg_cwtpm_tcp_y + * TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2); + +/* reg_cwtpm_tcp_r + * TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2); + +/* reg_cwtpm_ntcp_g + * Non-TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2); + +/* reg_cwtpm_ntcp_y + * Non-TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2); + +/* reg_cwtpm_ntcp_r + * Non-TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2); + +#define MLXSW_REG_CWTPM_RESET_PROFILE 0 + +static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port, + u8 traffic_class, u8 profile, + bool wred, bool ecn) +{ + MLXSW_REG_ZERO(cwtpm, payload); + mlxsw_reg_cwtpm_local_port_set(payload, local_port); + mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class); + mlxsw_reg_cwtpm_ew_set(payload, wred); + mlxsw_reg_cwtpm_ee_set(payload, ecn); + mlxsw_reg_cwtpm_tcp_g_set(payload, profile); + mlxsw_reg_cwtpm_tcp_y_set(payload, profile); + mlxsw_reg_cwtpm_tcp_r_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_g_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_y_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_r_set(payload, profile); +} + /* PPBT - Policy-Engine Port Binding Table * --------------------------------------- * This register is used for configuration of the Port Binding Table. @@ -7405,6 +7590,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(svpe), MLXSW_REG(sfmr), MLXSW_REG(spvmlr), + MLXSW_REG(cwtp), + MLXSW_REG(cwtpm), MLXSW_REG(ppbt), MLXSW_REG(pacl), MLXSW_REG(pagt), From 96f17e0776c285b7373bdccbfc7300dbeac3878c Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:45 +0100 Subject: [PATCH 1934/2177] mlxsw: spectrum: Support RED qdisc offload Add support for ndo_setup_tc with enum tc_setup_type value of TC_SETUP_RED. This call sets RED qdisc on a traffic class. This patch supports RED qdisc only as a root qdisc and set in on the default tclass. It can be set with or without ECN. Signed-off-by: Yuval Mintz Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 3 +- .../net/ethernet/mellanox/mlxsw/spectrum.c | 2 + .../net/ethernet/mellanox/mlxsw/spectrum.h | 15 ++ .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 174 ++++++++++++++++++ 4 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 80f4efd3e82f..9463c3fa254f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -19,7 +19,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ - spectrum_mr.o spectrum_mr_tcam.o + spectrum_mr.o spectrum_mr_tcam.o \ + spectrum_qdisc.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 63e50877796b..e42b3e7bd588 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1797,6 +1797,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_RED: + return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 47dd7e06fd29..76ebd58b6248 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -203,6 +203,16 @@ struct mlxsw_sp_port_vlan { struct list_head bridge_vlan_node; }; +enum mlxsw_sp_qdisc_type { + MLXSW_SP_QDISC_NO_QDISC, + MLXSW_SP_QDISC_RED, +}; + +struct mlxsw_sp_qdisc { + u32 handle; + enum mlxsw_sp_qdisc_type type; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -236,6 +246,7 @@ struct mlxsw_sp_port { } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; + struct mlxsw_sp_qdisc root_qdisc; }; static inline bool @@ -546,6 +557,10 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); +/* spectrum_qdisc.c */ +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p); + /* spectrum_fid.c */ int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c new file mode 100644 index 000000000000..c33e51a2b538 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -0,0 +1,174 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Nogah Frankel + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "spectrum.h" +#include "reg.h" + +static int +mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num, u32 min, u32 max, + u32 probability, bool is_ecn) +{ + char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num); + mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE, + roundup(min, MLXSW_REG_CWTP_MIN_VALUE), + roundup(max, MLXSW_REG_CWTP_MIN_VALUE), + probability); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd); + if (err) + return err; + + mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); +} + +static int +mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTPM_RESET_PROFILE, false, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); +} + +static int +mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + int err; + + if (mlxsw_sp_qdisc->handle != handle) + return 0; + + err = mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_NO_QDISC; + + return err; +} + +static int +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_params *p) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 min, max; + u64 prob; + int err = 0; + + if (p->min > p->max) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: min %u is bigger then max %u\n", p->min, + p->max); + goto err_bad_param; + } + if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: max value %u is too big\n", p->max); + goto err_bad_param; + } + if (p->min == 0 || p->max == 0) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: 0 value is illegal for min and max\n"); + goto err_bad_param; + } + + /* calculate probability in percentage */ + prob = p->probability; + prob *= 100; + prob = DIV_ROUND_UP(prob, 1 << 16); + prob = DIV_ROUND_UP(prob, 1 << 16); + min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); + max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); + err = mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, + max, prob, p->is_ecn); + if (err) + goto err_config; + + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED; + mlxsw_sp_qdisc->handle = handle; + return 0; + +err_bad_param: + err = -EINVAL; +err_config: + mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, mlxsw_sp_qdisc->handle, + mlxsw_sp_qdisc, tclass_num); + return err; +} + +#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 + +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int tclass_num; + + if (p->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + mlxsw_sp_qdisc = &mlxsw_sp_port->root_qdisc; + tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + + switch (p->command) { + case TC_RED_REPLACE: + return mlxsw_sp_qdisc_red_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->set); + case TC_RED_DESTROY: + return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num); + default: + return -EOPNOTSUPP; + } +} From 0afc1221ffecbbe4a9fdd6b46697cc7c31ecf8aa Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 6 Nov 2017 07:23:46 +0100 Subject: [PATCH 1935/2177] mlxsw: reg: Add ext and tc-cong counter groups This adds the counter group definitions for 2 new counter groups which are necessary for gaining ECN & wred counters. Signed-off-by: Yuval Mintz Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index db394ec2a4dc..6c4e08b8058a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3341,8 +3341,10 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_EXT_CNT = 0x5, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, MLXSW_REG_PPCNT_TC_CNT = 0x11, + MLXSW_REG_PPCNT_TC_CONG_TC = 0x13, }; /* reg_ppcnt_grp @@ -3358,6 +3360,7 @@ enum mlxsw_reg_ppcnt_grp { * 0x10: Per Priority Counters * 0x11: Per Traffic Class Counters * 0x12: Physical Layer Counters + * 0x13: Per Traffic Class Congestion Counters * Access: Index */ MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6); @@ -3496,6 +3499,14 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); +/* Ethernet Extended Counter Group Counters */ + +/* reg_ppcnt_ecn_marked + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + /* Ethernet Per Priority Group Counters */ /* reg_ppcnt_rx_octets @@ -3571,6 +3582,14 @@ MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); +/* Ethernet Per Traffic Class Congestion Group Counters */ + +/* reg_ppcnt_wred_discard + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, wred_discard, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, u8 prio_tc) From 075ab8adaf4e7443159bee6412cb85434c63ed15 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:47 +0100 Subject: [PATCH 1936/2177] mlxsw: spectrum: Collect tclass related stats periodically Add more statistics to be collected from the HW periodically. These stats are tclass based (beside ECN marked packet, that exist only port based). They are needed to expose RED qdisc stats and xstats correctly. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 34 +++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 +++++ 2 files changed, 43 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e42b3e7bd588..1497b436be78 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1324,6 +1324,38 @@ out: return err; } +static void +mlxsw_sp_port_get_hw_xstats(struct net_device *dev, + struct mlxsw_sp_port_xstats *xstats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err, i; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + ppcnt_pl); + if (!err) + xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl); + + for (i = 0; i < TC_MAX_QUEUE; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, + MLXSW_REG_PPCNT_TC_CONG_TC, + i, ppcnt_pl); + if (!err) + xstats->wred_drop[i] = + mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->backlog[i] = + mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + xstats->tail_drop[i] = + mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); + } +} + static void update_stats_cache(struct work_struct *work) { struct mlxsw_sp_port *mlxsw_sp_port = @@ -1335,6 +1367,8 @@ static void update_stats_cache(struct work_struct *work) mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, &mlxsw_sp_port->periodic_hw_stats.stats); + mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev, + &mlxsw_sp_port->periodic_hw_stats.xstats); out: mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 76ebd58b6248..e68299e6a963 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -213,6 +213,14 @@ struct mlxsw_sp_qdisc { enum mlxsw_sp_qdisc_type type; }; +/* No need an internal lock; At worse - miss a single periodic iteration */ +struct mlxsw_sp_port_xstats { + u64 ecn; + u64 wred_drop[TC_MAX_QUEUE]; + u64 tail_drop[TC_MAX_QUEUE]; + u64 backlog[TC_MAX_QUEUE]; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -242,6 +250,7 @@ struct mlxsw_sp_port { struct { #define MLXSW_HW_STATS_UPDATE_TIME HZ struct rtnl_link_stats64 stats; + struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; From 861fb8294d83ad950dfaa62b0bf8384c66e2cd5e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:48 +0100 Subject: [PATCH 1937/2177] mlxsw: spectrum: Support RED xstats Add support for ndo_setup_tc with enum tc_setup_type value of TC_SETUP_RED_XSTATS. This call returns the RED qdisc xstats from the cache if the handle ID that is asked for matching the root qdisc ID and fails otherwise. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 ++++ .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 51 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e68299e6a963..a86a493788dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -48,6 +48,7 @@ #include #include #include +#include #include "port.h" #include "core.h" @@ -211,6 +212,14 @@ enum mlxsw_sp_qdisc_type { struct mlxsw_sp_qdisc { u32 handle; enum mlxsw_sp_qdisc_type type; + struct red_stats xstats_base; + union { + struct { + u64 tail_drop_base; + u64 ecn_base; + u64 wred_drop_base; + } red; + } xstats; }; /* No need an internal lock; At worse - miss a single periodic iteration */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index c33e51a2b538..b97b30e08d3a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "spectrum.h" #include "reg.h" @@ -77,6 +78,27 @@ mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); } +static void +mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + + switch (mlxsw_sp_qdisc->type) { + case MLXSW_SP_QDISC_RED: + xstats_base->prob_mark = xstats->ecn; + xstats_base->prob_drop = xstats->wred_drop[tclass_num]; + xstats_base->pdrop = xstats->tail_drop[tclass_num]; + break; + default: + break; + } +} + static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, @@ -135,6 +157,11 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, goto err_config; mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED; + if (mlxsw_sp_qdisc->handle != handle) + mlxsw_sp_setup_tc_qdisc_clean_stats(mlxsw_sp_port, + mlxsw_sp_qdisc, + tclass_num); + mlxsw_sp_qdisc->handle = handle; return 0; @@ -146,6 +173,26 @@ err_config: return err; } +static int +mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, struct red_stats *res) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + + res->prob_drop = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + res->prob_mark = xstats->ecn - xstats_base->prob_mark; + res->pdrop = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + return 0; +} + #define MLXSW_SP_PORT_DEFAULT_TCLASS 0 int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, @@ -168,6 +215,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, case TC_RED_DESTROY: return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, tclass_num); + case TC_RED_XSTATS: + return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + p->xstats); default: return -EOPNOTSUPP; } From 3670756fe6f370c0748b0c9227f3807fddf0e1ac Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:49 +0100 Subject: [PATCH 1938/2177] mlxsw: spectrum: Support general qdisc stats Add support for ndo_setup_tc with enum tc_setup_type value of TC_SETUP_QDISC_STATS. This call updates the generic qdisc stats from the cache if the handle ID that is asked for matching the root qdisc ID and fails otherwise. Currently doesn't support qlen and rqueues. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 ++ .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 51 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a86a493788dd..58cf222fb985 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -220,6 +220,11 @@ struct mlxsw_sp_qdisc { u64 wred_drop_base; } red; } xstats; + + u64 tx_bytes; + u64 tx_packets; + u64 drops; + u64 overlimits; }; /* No need an internal lock; At worse - miss a single periodic iteration */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index b97b30e08d3a..c33beac5def0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -85,14 +85,24 @@ mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, { struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + mlxsw_sp_qdisc->tx_packets = stats->tx_packets; + mlxsw_sp_qdisc->tx_bytes = stats->tx_bytes; switch (mlxsw_sp_qdisc->type) { case MLXSW_SP_QDISC_RED: xstats_base->prob_mark = xstats->ecn; xstats_base->prob_drop = xstats->wred_drop[tclass_num]; xstats_base->pdrop = xstats->tail_drop[tclass_num]; + + mlxsw_sp_qdisc->overlimits = xstats_base->prob_drop + + xstats_base->prob_mark; + mlxsw_sp_qdisc->drops = xstats_base->prob_drop + + xstats_base->pdrop; break; default: break; @@ -193,6 +203,43 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, return 0; } +static int +mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_stats *res) +{ + u64 tx_bytes, tx_packets, overlimits, drops; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + tx_bytes = stats->tx_bytes - mlxsw_sp_qdisc->tx_bytes; + tx_packets = stats->tx_packets - mlxsw_sp_qdisc->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - + mlxsw_sp_qdisc->overlimits; + drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - + mlxsw_sp_qdisc->drops; + + _bstats_update(res->bstats, tx_bytes, tx_packets); + res->qstats->overlimits += overlimits; + res->qstats->drops += drops; + res->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + xstats->backlog[tclass_num]); + + mlxsw_sp_qdisc->drops += drops; + mlxsw_sp_qdisc->overlimits += overlimits; + mlxsw_sp_qdisc->tx_bytes += tx_bytes; + mlxsw_sp_qdisc->tx_packets += tx_packets; + return 0; +} + #define MLXSW_SP_PORT_DEFAULT_TCLASS 0 int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, @@ -219,6 +266,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, tclass_num, p->xstats); + case TC_RED_STATS: + return mlxsw_sp_qdisc_get_red_stats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->stats); default: return -EOPNOTSUPP; } From 29130853fe6dee04ad88d0586ff39182fa408a75 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 6 Nov 2017 11:12:08 +0000 Subject: [PATCH 1939/2177] dpaa_eth: fix error return code in dpaa_eth_probe() Fix to return a negative error code from the dpaa_bp_alloc() error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 784dbf5a3e12..7caa8da48421 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2728,11 +2728,11 @@ static int dpaa_eth_probe(struct platform_device *pdev) /* bp init */ for (i = 0; i < DPAA_BPS_NUM; i++) { - int err; - dpaa_bps[i] = dpaa_bp_alloc(dev); - if (IS_ERR(dpaa_bps[i])) + if (IS_ERR(dpaa_bps[i])) { + err = PTR_ERR(dpaa_bps[i]); goto free_dpaa_bps; + } /* the raw size of the buffers used for reception */ dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM); /* avoid runtime computations by keeping the usable size here */ From d86fd113ebbb37726ef7c7cc6fd6d5ce377455d6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 6 Nov 2017 11:11:28 +0000 Subject: [PATCH 1940/2177] mlxsw: spectrum: Fix error return code in mlxsw_sp_port_create() Fix to return a negative error code from the VID create error handling case instead of 0, as done elsewhere in this function. Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN") Signed-off-by: Wei Yongjun Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1497b436be78..b2cd1ebf4e36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3043,6 +3043,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", mlxsw_sp_port->local_port); + err = PTR_ERR(mlxsw_sp_port_vlan); goto err_port_vlan_get; } From 620a5c860b774a81ce3f193eefb52bf4d128cca5 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 12:42:01 +0100 Subject: [PATCH 1941/2177] net: dsa: lan9303: Correct register names in comments Two comments refer to registers, but lack the LAN9303_ prefix. Fix that. Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index 05d8d136baab..f48a85c377de 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -13,8 +13,8 @@ struct lan9303_phy_ops { #define LAN9303_NUM_ALR_RECORDS 512 struct lan9303_alr_cache_entry { u8 mac_addr[ETH_ALEN]; - u8 port_map; /* Bitmap of ports. Zero if unused entry */ - u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set LAN9303_ALR_DAT1_AGE_OVERRID */ }; struct lan9303 { @@ -28,7 +28,9 @@ struct lan9303 { struct mutex indirect_mutex; /* protect indexed register access */ const struct lan9303_phy_ops *ops; bool is_bridged; /* true if port 1 and 2 are bridged */ - u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + + /* remember LAN9303_SWE_PORT_STATE while not bridged */ + u32 swe_port_state; /* LAN9303 do not offer reading specific ALR entry. Cache all * static entries in a flat table **/ From 68d50fa494f6265288b60151a6460b33290593c2 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 12:42:02 +0100 Subject: [PATCH 1942/2177] net: dsa: lan9303: Fix syntax errors in device tree examples Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/lan9303.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/lan9303.txt b/Documentation/devicetree/bindings/net/dsa/lan9303.txt index 4448d063ddf6..464d6bf87605 100644 --- a/Documentation/devicetree/bindings/net/dsa/lan9303.txt +++ b/Documentation/devicetree/bindings/net/dsa/lan9303.txt @@ -52,7 +52,7 @@ I2C managed mode: port@1 { /* external port 1 */ reg = <1>; - label = "lan1; + label = "lan1"; }; port@2 { /* external port 2 */ @@ -89,7 +89,7 @@ MDIO managed mode: port@1 { /* external port 1 */ reg = <1>; - label = "lan1; + label = "lan1"; }; port@2 { /* external port 2 */ From ec5c91c6ca8b2d5ca6edfc968dbfeeaae4ed5572 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 12:42:03 +0100 Subject: [PATCH 1943/2177] net: dsa: lan9303: Replace msleep(1) with usleep_range() Remove scripts/checkpatch.pl WARNING by replacing msleep(1) with usleep_range() Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index c4afc8f1a66d..70ecd18a5e7d 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -284,7 +284,7 @@ static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip) } if (!(reg & LAN9303_PMI_ACCESS_MII_BUSY)) return 0; - msleep(1); + usleep_range(1000, 2000); } return -EIO; @@ -376,7 +376,7 @@ static int lan9303_switch_wait_for_completion(struct lan9303 *chip) } if (!(reg & LAN9303_SWITCH_CSR_CMD_BUSY)) return 0; - msleep(1); + usleep_range(1000, 2000); } return -EIO; From 92f25cafe821f6bd7f7815128bc7584a69512f68 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 12:42:04 +0100 Subject: [PATCH 1944/2177] net: dsa: lan9303: Adjust indenting Remove scripts/checkpatch.pl CHECKs by adjusting indenting. Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303_i2c.c | 2 +- drivers/net/dsa/lan9303_mdio.c | 2 +- net/dsa/tag_lan9303.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 24ec20f7f444..909a7e864246 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -50,7 +50,7 @@ static int lan9303_i2c_probe(struct i2c_client *client, return -ENOMEM; sw_dev->chip.regmap = devm_regmap_init_i2c(client, - &lan9303_i2c_regmap_config); + &lan9303_i2c_regmap_config); if (IS_ERR(sw_dev->chip.regmap)) { ret = PTR_ERR(sw_dev->chip.regmap); dev_err(&client->dev, "Failed to allocate register map: %d\n", diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 0bc56b9900f9..cc9c2ea1c4fe 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -116,7 +116,7 @@ static int lan9303_mdio_probe(struct mdio_device *mdiodev) return -ENOMEM; sw_dev->chip.regmap = devm_regmap_init(&mdiodev->dev, NULL, sw_dev, - &lan9303_mdio_regmap_config); + &lan9303_mdio_regmap_config); if (IS_ERR(sw_dev->chip.regmap)) { ret = PTR_ERR(sw_dev->chip.regmap); dev_err(&mdiodev->dev, "regmap init failed: %d\n", ret); diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index e526c8967b98..5ba01fc3c6ba 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -88,7 +88,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt) { u16 *lan9303_tag; unsigned int source_port; From 42ca728b829b8fee8ac85adb79eaffd36f0b4e06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Nov 2017 14:43:01 +0300 Subject: [PATCH 1945/2177] bnxt: delete some unreachable code We return on the previous line so this "return 0;" statement should just be deleted. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index b6aa7db99705..69186d188c43 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -148,7 +148,6 @@ static int bnxt_vf_rep_setup_tc_block(struct net_device *dev, return tcf_block_cb_register(f->block, bnxt_vf_rep_setup_tc_block_cb, vf_rep, vf_rep); - return 0; case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, bnxt_vf_rep_setup_tc_block_cb, vf_rep); From 96c623e51f1c40bf524decc48c6fac7ce5dd41f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:26:10 +0100 Subject: [PATCH 1946/2177] of: add of_property_read_variable_* dummy helpers Commit a67e9472da42 ("of: Add array read functions with min/max size limits") added a new interface for reading variable-length arrays from DT properties. One user was added in dsa recently and this causes a build error because that code can be built with CONFIG_OF disabled: net/dsa/dsa2.c: In function 'dsa_switch_parse_member_of': net/dsa/dsa2.c:678:7: error: implicit declaration of function 'of_property_read_variable_u32_array'; did you mean 'of_property_read_u32_array'? [-Werror=implicit-function-declaration] This adds a dummy functions for of_property_read_variable_u32_array() and a few others that had been missing here. I decided to move of_property_read_string() and of_property_read_string_helper() in the process to make it easier to compare the two sets of function prototypes to make sure they match. Fixes: 975e6e32215e ("net: dsa: rework switch parsing") Signed-off-by: Arnd Bergmann Acked-by: Rob Herring Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/of.h | 62 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/include/linux/of.h b/include/linux/of.h index b240ed69dc96..b32d418d011a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -675,12 +675,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_u32_index(const struct device_node *np, - const char *propname, u32 index, u32 *out_value) -{ - return -ENOSYS; -} - static inline int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz) { @@ -707,16 +701,14 @@ static inline int of_property_read_u64_array(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string(const struct device_node *np, - const char *propname, - const char **out_string) +static inline int of_property_read_u32_index(const struct device_node *np, + const char *propname, u32 index, u32 *out_value) { return -ENOSYS; } -static inline int of_property_read_string_helper(const struct device_node *np, - const char *propname, - const char **out_strs, size_t sz, int index) +static inline int of_property_read_u64_index(const struct device_node *np, + const char *propname, u32 index, u64 *out_value) { return -ENOSYS; } @@ -744,12 +736,51 @@ static inline int of_n_size_cells(struct device_node *np) return 0; } +static inline int of_property_read_variable_u8_array(const struct device_node *np, + const char *propname, u8 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u16_array(const struct device_node *np, + const char *propname, u16 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { return -ENOSYS; } +static inline int of_property_read_variable_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_string(const struct device_node *np, + const char *propname, + const char **out_string) +{ + return -ENOSYS; +} + static inline int of_property_match_string(const struct device_node *np, const char *propname, const char *string) @@ -757,6 +788,13 @@ static inline int of_property_match_string(const struct device_node *np, return -ENOSYS; } +static inline int of_property_read_string_helper(const struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) +{ + return -ENOSYS; +} + static inline struct device_node *of_parse_phandle(const struct device_node *np, const char *phandle_name, int index) From 7dfaa7bc99498da1c6c4a48bee8d2d5265161a8c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 15:04:39 +0100 Subject: [PATCH 1947/2177] bnxt: fix bnxt_hwrm_fw_set_time for y2038 On 32-bit architectures, rtc_time_to_tm() returns incorrect results in 2038 or later, and do_gettimeofday() is broken for the same reason. This changes the code to use ktime_get_real_seconds() and time64_to_tm() instead, both of them are 2038-safe, and we can also get rid of the CONFIG_RTC_LIB dependency that way. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e5472e5ae7b2..33c49ad697e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4915,16 +4915,14 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { -#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; - struct rtc_time tm; - struct timeval tv; + struct tm tm; + time64_t now = ktime_get_real_seconds(); if (bp->hwrm_spec_code < 0x10400) return -EOPNOTSUPP; - do_gettimeofday(&tv); - rtc_time_to_tm(tv.tv_sec, &tm); + time64_to_tm(now, 0, &tm); bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); req.year = cpu_to_le16(1900 + tm.tm_year); req.month = 1 + tm.tm_mon; @@ -4933,9 +4931,6 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); -#else - return -EOPNOTSUPP; -#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) From ac71a1f94418b9b83c9bf04f76954570d88c9590 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 15:19:49 +0100 Subject: [PATCH 1948/2177] net: dsa: lan9303: Drop port range check Now that ds->num_ports is 3, there is no need to check range of "port" parameter. Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 70ecd18a5e7d..320651a57c6f 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1057,17 +1057,7 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port, { struct lan9303 *chip = ds->priv; - /* enable internal packet processing */ - switch (port) { - case 1: - case 2: - return lan9303_enable_processing_port(chip, port); - default: - dev_dbg(chip->dev, - "Error: request to power up invalid port %d\n", port); - } - - return -ENODEV; + return lan9303_enable_processing_port(chip, port); } static void lan9303_port_disable(struct dsa_switch *ds, int port, @@ -1075,18 +1065,9 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port, { struct lan9303 *chip = ds->priv; - /* disable internal packet processing */ - switch (port) { - case 1: - case 2: - lan9303_disable_processing_port(chip, port); - lan9303_phy_write(ds, chip->phy_addr_sel_strap + port, - MII_BMCR, BMCR_PDOWN); - break; - default: - dev_dbg(chip->dev, - "Error: request to power down invalid port %d\n", port); - } + lan9303_disable_processing_port(chip, port); + lan9303_phy_write(ds, chip->phy_addr_sel_strap + port, + MII_BMCR, BMCR_PDOWN); } static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, From 03ac738d5cf2f97ffa1209f6fd5d4bc211a933de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Nov 2017 15:04:54 +0000 Subject: [PATCH 1949/2177] rtnetlink: fix missing size for IFLA_IF_NETNSID The size for IFLA_IF_NETNSID is missing from the size calculation because the proceeding semicolon was not removed. Fix this by removing the semicolon. Detected by CoverityScan, CID#1461135 ("Structurally dead code") Fixes: 79e1ad148c84 ("rtnetlink: use netnsid to query interface") Signed-off-by: Colin Ian King Acked-by: Jiri Benc Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dc5ad84ac096..dabba2a91fc8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -920,7 +920,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ - + nla_total_size(1); /* IFLA_PROTO_DOWN */ + + nla_total_size(1) /* IFLA_PROTO_DOWN */ + nla_total_size(4) /* IFLA_IF_NETNSID */ + 0; } From 5adb55c92918225005873aaac5e6af36789bf0ad Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 6 Nov 2017 22:53:29 +0100 Subject: [PATCH 1950/2177] fsl/fman: Remove a useless call to 'dev_set_drvdata()' Commit c6e26ea8c893 ("dpaa_eth: change device used") has removed usage of 'dev_set_drvdata()' in the 'mac_probe() function. This call should also be axed. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 1d6da1ea7bfb..c27667a005f7 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -713,7 +713,6 @@ static int mac_probe(struct platform_device *_of_dev) __devm_release_region(dev, fman_get_mem_region(priv->fman), res.start, res.end + 1 - res.start); devm_kfree(dev, mac_dev); - dev_set_drvdata(dev, NULL); return -ENODEV; } From 336eac4347e74589f868e5da9ca0106953942aa8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 6 Nov 2017 22:53:30 +0100 Subject: [PATCH 1951/2177] fsl/fman: Remove some useless code There is no need to release explicitly some devm_ allocated resources. If the 'mac_probe()' probe function fails, they will be released automatically, as already done in the other error handling paths of this function. Also goto '_return_of_get_parent' as in the other error handling paths. This is useless (priv->fixed_link is NULL at this point), but at least it is consistent. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index c27667a005f7..ca12e28129ed 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -709,11 +709,8 @@ static int mac_probe(struct platform_device *_of_dev) } if (!of_device_is_available(mac_node)) { - devm_iounmap(dev, priv->vaddr); - __devm_release_region(dev, fman_get_mem_region(priv->fman), - res.start, res.end + 1 - res.start); - devm_kfree(dev, mac_dev); - return -ENODEV; + err = -ENODEV; + goto _return_of_get_parent; } /* Get the cell-index */ From 25850c31c8b5c2ce7fb922f5e80de7227ecf6be4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 6 Nov 2017 22:53:31 +0100 Subject: [PATCH 1952/2177] fsl/fman: Add a missing 'of_node_put()' call in an error handling path If 'of_phy_find_device()' fails, we must undo the previous 'of_node_get()' call, as done the the following error handling code. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index ca12e28129ed..86c1e69f44d6 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -821,6 +821,7 @@ static int mac_probe(struct platform_device *_of_dev) phy = of_phy_find_device(mac_dev->phy_node); if (!phy) { err = -EINVAL; + of_node_put(mac_dev->phy_node); goto _return_of_get_parent; } From e51f37bd3ae8e09d131bce5485ec7c5b03726b02 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 6 Nov 2017 22:53:32 +0100 Subject: [PATCH 1953/2177] fsl/fman: Remove a useless 'dev_err()' call Memory allocation functions already display some informaton in case of memory allocation failure. There is no need to add an extra 'dev_err' here. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 86c1e69f44d6..88c0a0636b44 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -615,7 +615,6 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev = devm_kzalloc(dev, sizeof(*mac_dev), GFP_KERNEL); if (!mac_dev) { err = -ENOMEM; - dev_err(dev, "devm_kzalloc() = %d\n", err); goto _return; } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); From 118d6298f6f0556e54331a6e86de2313d134fdbb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 6 Nov 2017 22:56:53 +0100 Subject: [PATCH 1954/2177] net: mvpp2: add ethtool GOP statistics Add ethtool statistics support by reading the GOP statistics from the hardware counters. Also implement a workqueue to gather the statistics every second or some 32-bit counters could overflow. Suggested-by: Stefan Chulski Signed-off-by: Miquel Raynal Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 228 ++++++++++++++++++++++++++- 1 file changed, 223 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 965b6a829a5d..aa38bca597f2 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -799,6 +799,42 @@ enum mvpp2_bm_type { MVPP2_BM_SWF_SHORT }; +/* GMAC MIB Counters register definitions */ +#define MVPP21_MIB_COUNTERS_OFFSET 0x1000 +#define MVPP21_MIB_COUNTERS_PORT_SZ 0x400 +#define MVPP22_MIB_COUNTERS_OFFSET 0x0 +#define MVPP22_MIB_COUNTERS_PORT_SZ 0x100 + +#define MVPP2_MIB_GOOD_OCTETS_RCVD 0x0 +#define MVPP2_MIB_BAD_OCTETS_RCVD 0x8 +#define MVPP2_MIB_CRC_ERRORS_SENT 0xc +#define MVPP2_MIB_UNICAST_FRAMES_RCVD 0x10 +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD 0x18 +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD 0x1c +#define MVPP2_MIB_FRAMES_64_OCTETS 0x20 +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS 0x24 +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS 0x28 +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS 0x2c +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS 0x30 +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS 0x34 +#define MVPP2_MIB_GOOD_OCTETS_SENT 0x38 +#define MVPP2_MIB_UNICAST_FRAMES_SENT 0x40 +#define MVPP2_MIB_MULTICAST_FRAMES_SENT 0x48 +#define MVPP2_MIB_BROADCAST_FRAMES_SENT 0x4c +#define MVPP2_MIB_FC_SENT 0x54 +#define MVPP2_MIB_FC_RCVD 0x58 +#define MVPP2_MIB_RX_FIFO_OVERRUN 0x5c +#define MVPP2_MIB_UNDERSIZE_RCVD 0x60 +#define MVPP2_MIB_FRAGMENTS_RCVD 0x64 +#define MVPP2_MIB_OVERSIZE_RCVD 0x68 +#define MVPP2_MIB_JABBER_RCVD 0x6c +#define MVPP2_MIB_MAC_RCV_ERROR 0x70 +#define MVPP2_MIB_BAD_CRC_EVENT 0x74 +#define MVPP2_MIB_COLLISION 0x78 +#define MVPP2_MIB_LATE_COLLISION 0x7c + +#define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ) + /* Definitions */ /* Shared Packet Processor resources */ @@ -826,6 +862,7 @@ struct mvpp2 { struct clk *axi_clk; /* List of pointers to port structures */ + int port_count; struct mvpp2_port **port_list; /* Aggregated TXQs */ @@ -847,6 +884,12 @@ struct mvpp2 { /* Maximum number of RXQs per port */ unsigned int max_port_rxqs; + + /* Workqueue to gather hardware statistics with its lock */ + struct mutex gather_stats_lock; + struct delayed_work stats_work; + char queue_name[30]; + struct workqueue_struct *stats_queue; }; struct mvpp2_pcpu_stats { @@ -891,6 +934,7 @@ struct mvpp2_port { /* Per-port registers' base address */ void __iomem *base; + void __iomem *stats_base; struct mvpp2_rx_queue **rxqs; unsigned int nrxqs; @@ -909,6 +953,7 @@ struct mvpp2_port { u16 tx_ring_size; u16 rx_ring_size; struct mvpp2_pcpu_stats __percpu *stats; + u64 *ethtool_stats; phy_interface_t phy_interface; struct device_node *phy_node; @@ -4778,9 +4823,136 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port) writel(val, port->base + MVPP2_GMAC_CTRL_1_REG); } +struct mvpp2_ethtool_counter { + unsigned int offset; + const char string[ETH_GSTRING_LEN]; + bool reg_is_64b; +}; + +static u64 mvpp2_read_count(struct mvpp2_port *port, + const struct mvpp2_ethtool_counter *counter) +{ + u64 val; + + val = readl(port->stats_base + counter->offset); + if (counter->reg_is_64b) + val += (u64)readl(port->stats_base + counter->offset + 4) << 32; + + return val; +} + +/* Due to the fact that software statistics and hardware statistics are, by + * design, incremented at different moments in the chain of packet processing, + * it is very likely that incoming packets could have been dropped after being + * counted by hardware but before reaching software statistics (most probably + * multicast packets), and in the oppposite way, during transmission, FCS bytes + * are added in between as well as TSO skb will be split and header bytes added. + * Hence, statistics gathered from userspace with ifconfig (software) and + * ethtool (hardware) cannot be compared. + */ +static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = { + { MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true }, + { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" }, + { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" }, + { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" }, + { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" }, + { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" }, + { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" }, + { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" }, + { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" }, + { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" }, + { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" }, + { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" }, + { MVPP2_MIB_GOOD_OCTETS_SENT, "good_octets_sent", true }, + { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" }, + { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" }, + { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" }, + { MVPP2_MIB_FC_SENT, "fc_sent" }, + { MVPP2_MIB_FC_RCVD, "fc_received" }, + { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" }, + { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" }, + { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" }, + { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" }, + { MVPP2_MIB_JABBER_RCVD, "jabber_received" }, + { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" }, + { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" }, + { MVPP2_MIB_COLLISION, "collision" }, + { MVPP2_MIB_LATE_COLLISION, "late_collision" }, +}; + +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, + u8 *data) +{ + if (sset == ETH_SS_STATS) { + int i; + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + memcpy(data + i * ETH_GSTRING_LEN, + &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); + } +} + +static void mvpp2_gather_hw_statistics(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work); + struct mvpp2_port *port; + u64 *pstats; + int i, j; + + mutex_lock(&priv->gather_stats_lock); + + for (i = 0; i < priv->port_count; i++) { + if (!priv->port_list[i]) + continue; + + port = priv->port_list[i]; + pstats = port->ethtool_stats; + for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_regs); j++) + *pstats++ += mvpp2_read_count(port, + &mvpp2_ethtool_regs[j]); + } + + /* No need to read again the counters right after this function if it + * was called asynchronously by the user (ie. use of ethtool). + */ + cancel_delayed_work(&priv->stats_work); + queue_delayed_work(priv->stats_queue, &priv->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + + mutex_unlock(&priv->gather_stats_lock); +} + +static void mvpp2_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mvpp2_port *port = netdev_priv(dev); + + /* Update statistics for all ports, copy only those actually needed */ + mvpp2_gather_hw_statistics(&port->priv->stats_work.work); + + mutex_lock(&port->priv->gather_stats_lock); + memcpy(data, port->ethtool_stats, + sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs)); + mutex_unlock(&port->priv->gather_stats_lock); +} + +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) + return ARRAY_SIZE(mvpp2_ethtool_regs); + + return -EOPNOTSUPP; +} + static void mvpp2_port_reset(struct mvpp2_port *port) { u32 val; + unsigned int i; + + /* Read the GOP statistics to reset the hardware counters */ + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & ~MVPP2_GMAC_PORT_RESET_MASK; @@ -6912,6 +7084,10 @@ static int mvpp2_open(struct net_device *dev) if (priv->hw_version == MVPP22) mvpp22_init_rss(port); + /* Start hardware statistics gathering */ + queue_delayed_work(priv->stats_queue, &priv->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + return 0; err_free_link_irq: @@ -6956,6 +7132,9 @@ static int mvpp2_stop(struct net_device *dev) mvpp2_cleanup_rxqs(port); mvpp2_cleanup_txqs(port); + cancel_delayed_work_sync(&priv->stats_work); + flush_workqueue(priv->stats_queue); + return 0; } @@ -7267,6 +7446,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = { .get_drvinfo = mvpp2_ethtool_get_drvinfo, .get_ringparam = mvpp2_ethtool_get_ringparam, .set_ringparam = mvpp2_ethtool_set_ringparam, + .get_strings = mvpp2_ethtool_get_strings, + .get_ethtool_stats = mvpp2_ethtool_get_stats, + .get_sset_count = mvpp2_ethtool_get_sset_count, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; @@ -7670,6 +7852,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, err = PTR_ERR(port->base); goto err_free_irq; } + + port->stats_base = port->priv->lms_base + + MVPP21_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ; } else { if (of_property_read_u32(port_node, "gop-port-id", &port->gop_id)) { @@ -7679,15 +7865,26 @@ static int mvpp2_port_probe(struct platform_device *pdev, } port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id); + port->stats_base = port->priv->iface_base + + MVPP22_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ; } - /* Alloc per-cpu stats */ + /* Alloc per-cpu and ethtool stats */ port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats); if (!port->stats) { err = -ENOMEM; goto err_free_irq; } + port->ethtool_stats = devm_kcalloc(&pdev->dev, + ARRAY_SIZE(mvpp2_ethtool_regs), + sizeof(u64), GFP_KERNEL); + if (!port->ethtool_stats) { + err = -ENOMEM; + goto err_free_stats; + } + mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); port->tx_ring_size = MVPP2_MAX_TXD; @@ -8010,7 +8207,7 @@ static int mvpp2_probe(struct platform_device *pdev) struct mvpp2 *priv; struct resource *res; void __iomem *base; - int port_count, i; + int i; int err; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -8125,14 +8322,14 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_mg_clk; } - port_count = of_get_available_child_count(dn); - if (port_count == 0) { + priv->port_count = of_get_available_child_count(dn); + if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; goto err_mg_clk; } - priv->port_list = devm_kcalloc(&pdev->dev, port_count, + priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count, sizeof(*priv->port_list), GFP_KERNEL); if (!priv->port_list) { @@ -8149,6 +8346,24 @@ static int mvpp2_probe(struct platform_device *pdev) i++; } + /* Statistics must be gathered regularly because some of them (like + * packets counters) are 32-bit registers and could overflow quite + * quickly. For instance, a 10Gb link used at full bandwidth with the + * smallest packets (64B) will overflow a 32-bit counter in less than + * 30 seconds. Then, use a workqueue to fill 64-bit counters. + */ + mutex_init(&priv->gather_stats_lock); + snprintf(priv->queue_name, sizeof(priv->queue_name), + "stats-wq-%s%s", netdev_name(priv->port_list[0]->dev), + priv->port_count > 1 ? "+" : ""); + priv->stats_queue = create_singlethread_workqueue(priv->queue_name); + if (!priv->stats_queue) { + err = -ENOMEM; + goto err_mg_clk; + } + + INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics); + platform_set_drvdata(pdev, priv); return 0; @@ -8170,6 +8385,9 @@ static int mvpp2_remove(struct platform_device *pdev) struct device_node *port_node; int i = 0; + destroy_workqueue(priv->stats_queue); + mutex_destroy(&priv->gather_stats_lock); + for_each_available_child_of_node(dn, port_node) { if (priv->port_list[i]) mvpp2_port_remove(priv->port_list[i]); From 7f5d3f2721b07ab5896526c5992edd2ab1665561 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Nov 2017 11:38:32 +0100 Subject: [PATCH 1955/2177] pktgen: document 32-bit timestamp overflow Timestamps in pktgen are currently retrieved using the deprecated do_gettimeofday() function that wraps its signed 32-bit seconds in 2038 (on 32-bit architectures) and requires a division operation to calculate microseconds. The pktgen header is also defined with the same limitations, hardcoding to a 32-bit seconds field that can be interpreted as unsigned to produce times that only wrap in 2106. Whatever code reads the timestamps should be aware of that problem in general, but probably doesn't care too much as we are mostly interested in the time passing between packets, and that is correctly represented. Using 64-bit nanoseconds would be cheaper and good for 584 years. Using monotonic times would also make this unambiguous by avoiding the overflow, but would make it harder to correlate to the times with those on remote machines. Either approach would require adding a new runtime flag and implementing the same thing on the remote side, which we probably don't want to do unless someone sees it as a real problem. Also, this should be coordinated with other pktgen implementations and might need a new magic number. For the moment, I'm documenting the overflow in the source code, and changing the implementation over to an open-coded ktime_get_real_ts64() plus division, so we don't have to look at it again while scanning for deprecated time interfaces. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/core/pktgen.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e3fa53a07d34..40db0b7e37ac 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, int datalen) { - struct timeval timestamp; + struct timespec64 timestamp; struct pktgen_hdr *pgh; pgh = skb_put(skb, sizeof(*pgh)); @@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_sec = 0; pgh->tv_usec = 0; } else { - do_gettimeofday(×tamp); + /* + * pgh->tv_sec wraps in y2106 when interpreted as unsigned + * as done by wireshark, or y2038 when interpreted as signed. + * This is probably harmless, but if anyone wants to improve + * it, we could introduce a variant that puts 64-bit nanoseconds + * into the respective header bytes. + * This would also be slightly faster to read. + */ + ktime_get_real_ts64(×tamp); pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); + pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC); } } From b2d0f5d5dc53532e6f07bc546a476a55ebdfe0f3 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Tue, 7 Nov 2017 21:07:02 +0800 Subject: [PATCH 1956/2177] openvswitch: enable NSH support v16->17 - Fixed disputed check code: keep them in nsh_push and nsh_pop but also add them in __ovs_nla_copy_actions v15->v16 - Add csum recalculation for nsh_push, nsh_pop and set_nsh pointed out by Pravin - Move nsh key into the union with ipv4 and ipv6 and add check for nsh key in match_validate pointed out by Pravin - Add nsh check in validate_set and __ovs_nla_copy_actions v14->v15 - Check size in nsh_hdr_from_nlattr - Fixed four small issues pointed out By Jiri and Eric v13->v14 - Rename skb_push_nsh to nsh_push per Dave's comment - Rename skb_pop_nsh to nsh_pop per Dave's comment v12->v13 - Fix NSH header length check in set_nsh v11->v12 - Fix missing changes old comments pointed out - Fix new comments for v11 v10->v11 - Fix the left three disputable comments for v9 but not fixed in v10. v9->v10 - Change struct ovs_key_nsh to struct ovs_nsh_key_base base; __be32 context[NSH_MD1_CONTEXT_SIZE]; - Fix new comments for v9 v8->v9 - Fix build error reported by daily intel build because nsh module isn't selected by openvswitch v7->v8 - Rework nested value and mask for OVS_KEY_ATTR_NSH - Change pop_nsh to adapt to nsh kernel module - Fix many issues per comments from Jiri Benc v6->v7 - Remove NSH GSO patches in v6 because Jiri Benc reworked it as another patch series and they have been merged. - Change it to adapt to nsh kernel module added by NSH GSO patch series v5->v6 - Fix the rest comments for v4. - Add NSH GSO support for VxLAN-gpe + NSH and Eth + NSH. v4->v5 - Fix many comments by Jiri Benc and Eric Garver for v4. v3->v4 - Add new NSH match field ttl - Update NSH header to the latest format which will be final format and won't change per its author's confirmation. - Fix comments for v3. v2->v3 - Change OVS_KEY_ATTR_NSH to nested key to handle length-fixed attributes and length-variable attriubte more flexibly. - Remove struct ovs_action_push_nsh completely - Add code to handle nested attribute for SET_MASKED - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH to transfer NSH header data. - Fix comments and coding style issues by Jiri and Eric v1->v2 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh - Dynamically allocate struct ovs_action_push_nsh for length-variable metadata. OVS master and 2.8 branch has merged NSH userspace patch series, this patch is to enable NSH support in kernel data path in order that OVS can support NSH in compat mode by porting this. Signed-off-by: Yi Yang Acked-by: Jiri Benc Acked-by: Eric Garver Acked-by: Pravin Shelar Signed-off-by: David S. Miller --- include/net/nsh.h | 3 + include/uapi/linux/openvswitch.h | 29 +++ net/nsh/nsh.c | 60 ++++++ net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 116 +++++++++++ net/openvswitch/flow.c | 51 +++++ net/openvswitch/flow.h | 7 + net/openvswitch/flow_netlink.c | 343 ++++++++++++++++++++++++++++++- net/openvswitch/flow_netlink.h | 5 + 9 files changed, 613 insertions(+), 2 deletions(-) diff --git a/include/net/nsh.h b/include/net/nsh.h index a1eaea20be96..350b1ad11c7f 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); } +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); +int nsh_pop(struct sk_buff *skb); + #endif /* __NET_NSH_H */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 501e4c4e2a03..ec75a685f1dd 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -336,6 +336,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ + OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -495,6 +496,30 @@ struct ovs_key_ct_tuple_ipv6 { __u8 ipv6_proto; }; +enum ovs_nsh_key_attr { + OVS_NSH_KEY_ATTR_UNSPEC, + OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */ + OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */ + OVS_NSH_KEY_ATTR_MD2, /* variable-length octets for MD type 2. */ + __OVS_NSH_KEY_ATTR_MAX +}; + +#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1) + +struct ovs_nsh_key_base { + __u8 flags; + __u8 ttl; + __u8 mdtype; + __u8 np; + __be32 path_hdr; +}; + +#define NSH_MD1_CONTEXT_SIZE 4 + +struct ovs_nsh_key_md1 { + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow @@ -811,6 +836,8 @@ struct ovs_action_push_eth { * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the * packet. * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet. + * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet. + * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -841,6 +868,8 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ OVS_ACTION_ATTR_POP_ETH, /* No argument. */ OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ + OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ + OVS_ACTION_ATTR_POP_NSH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 58fb827439a8..d7da99a0b0b8 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -14,6 +14,66 @@ #include #include +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) +{ + struct nshhdr *nh; + size_t length = nsh_hdr_len(pushed_nh); + u8 next_proto; + + if (skb->mac_len) { + next_proto = TUN_P_ETHERNET; + } else { + next_proto = tun_p_from_eth_p(skb->protocol); + if (!next_proto) + return -EAFNOSUPPORT; + } + + /* Add the NSH header */ + if (skb_cow_head(skb, length) < 0) + return -ENOMEM; + + skb_push(skb, length); + nh = (struct nshhdr *)(skb->data); + memcpy(nh, pushed_nh, length); + nh->np = next_proto; + skb_postpush_rcsum(skb, nh, length); + + skb->protocol = htons(ETH_P_NSH); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_push); + +int nsh_pop(struct sk_buff *skb) +{ + struct nshhdr *nh; + size_t length; + __be16 inner_proto; + + if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) + return -ENOMEM; + nh = (struct nshhdr *)(skb->data); + length = nsh_hdr_len(nh); + inner_proto = tun_p_to_eth_p(nh->np); + if (!pskb_may_pull(skb, length)) + return -ENOMEM; + + if (!inner_proto) + return -EAFNOSUPPORT; + + skb_pull_rcsum(skb, length); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = inner_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_pop); + static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index ce947292ae77..2650205cdaf9 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -14,6 +14,7 @@ config OPENVSWITCH select MPLS select NET_MPLS_GSO select DST_CACHE + select NET_NSH ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a551232daf61..9a6a6d51e421 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -43,6 +43,7 @@ #include "flow.h" #include "conntrack.h" #include "vport.h" +#include "flow_netlink.h" struct deferred_action { struct sk_buff *skb; @@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, + const struct nshhdr *nh) +{ + int err; + + err = nsh_push(skb, nh); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + err = nsh_pop(skb); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + if (skb->protocol == htons(ETH_P_TEB)) + key->mac_proto = MAC_PROTO_ETHERNET; + else + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + struct nshhdr *nh; + size_t length; + int err; + u8 flags; + u8 ttl; + int i; + + struct ovs_key_nsh key; + struct ovs_key_nsh mask; + + err = nsh_key_from_nlattr(a, &key, &mask); + if (err) + return err; + + /* Make sure the NSH base header is there */ + if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) + return -ENOMEM; + + nh = nsh_hdr(skb); + length = nsh_hdr_len(nh); + + /* Make sure the whole NSH header is there */ + err = skb_ensure_writable(skb, skb_network_offset(skb) + + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + skb_postpull_rcsum(skb, nh, length); + flags = nsh_get_flags(nh); + flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); + flow_key->nsh.base.flags = flags; + ttl = nsh_get_ttl(nh); + ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); + flow_key->nsh.base.ttl = ttl; + nsh_set_flags_and_ttl(nh, flags, ttl); + nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, + mask.base.path_hdr); + flow_key->nsh.base.path_hdr = nh->path_hdr; + switch (nh->mdtype) { + case NSH_M_TYPE1: + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { + nh->md1.context[i] = + OVS_MASKED(nh->md1.context[i], key.context[i], + mask.context[i]); + } + memcpy(flow_key->nsh.context, nh->md1.context, + sizeof(nh->md1.context)); + break; + case NSH_M_TYPE2: + memset(flow_key->nsh.context, 0, + sizeof(flow_key->nsh.context)); + break; + default: + return -EINVAL; + } + skb_postpush_rcsum(skb, nh, length); + return 0; +} + /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; + case OVS_KEY_ATTR_NSH: + err = set_nsh(skb, flow_key, a); + break; + case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1214,6 +1314,22 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_ETH: err = pop_eth(skb, key); break; + + case OVS_ACTION_ATTR_PUSH_NSH: { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; + + err = nsh_hdr_from_nlattr(nla_data(a), nh, + NSH_HDR_MAX_LEN); + if (unlikely(err)) + break; + err = push_nsh(skb, key, nh); + break; + } + + case OVS_ACTION_ATTR_POP_NSH: + err = pop_nsh(skb, key); + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8c94cef25a72..864ddb1e3642 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "conntrack.h" #include "datapath.h" @@ -490,6 +491,52 @@ invalid: return 0; } +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct nshhdr *nh; + unsigned int nh_ofs = skb_network_offset(skb); + u8 version, length; + int err; + + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + version = nsh_get_ver(nh); + length = nsh_hdr_len(nh); + + if (version != 0) + return -EINVAL; + + err = check_header(skb, nh_ofs + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + key->nsh.base.flags = nsh_get_flags(nh); + key->nsh.base.ttl = nsh_get_ttl(nh); + key->nsh.base.mdtype = nh->mdtype; + key->nsh.base.np = nh->np; + key->nsh.base.path_hdr = nh->path_hdr; + switch (key->nsh.base.mdtype) { + case NSH_M_TYPE1: + if (length != NSH_M_TYPE1_LEN) + return -EINVAL; + memcpy(key->nsh.context, nh->md1.context, + sizeof(nh->md1)); + break; + case NSH_M_TYPE2: + memset(key->nsh.context, 0, + sizeof(nh->md1)); + break; + default: + return -EINVAL; + } + + return 0; +} + /** * key_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the @@ -735,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->tp, 0, sizeof(key->tp)); } } + } else if (key->eth.type == htons(ETH_P_NSH)) { + error = parse_nsh(skb, key); + if (error) + return error; } return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1875bba4f865..c670dd24b8b7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -35,6 +35,7 @@ #include #include #include +#include struct sk_buff; @@ -66,6 +67,11 @@ struct vlan_head { (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) +struct ovs_key_nsh { + struct ovs_nsh_key_base base; + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + struct sw_flow_key { u8 tun_opts[IP_TUNNEL_OPTS_MAX]; u8 tun_opts_len; @@ -143,6 +149,7 @@ struct sw_flow_key { } nd; }; } ipv6; + struct ovs_key_nsh nsh; /* network service header */ }; struct { /* Connection tracking fields not packed above. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc0d79092e74..4201f9293af3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "flow_netlink.h" @@ -80,9 +81,11 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_NSH: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_ETH: case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_NSH: case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: @@ -175,7 +178,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) | (1 << OVS_KEY_ATTR_ND) - | (1 << OVS_KEY_ATTR_MPLS)); + | (1 << OVS_KEY_ATTR_MPLS) + | (1 << OVS_KEY_ATTR_NSH)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -284,6 +288,14 @@ static bool match_validate(const struct sw_flow_match *match, } } + if (match->key->eth.type == htons(ETH_P_NSH)) { + key_expected |= 1 << OVS_KEY_ATTR_NSH; + if (match->mask && + match->mask->key.eth.type == htons(0xffff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_NSH; + } + } + if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", @@ -325,12 +337,25 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ } +size_t ovs_nsh_key_attr_size(void) +{ + /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ + /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are + * mutually exclusive, so the bigger one can cover + * the small one. + */ + + nla_total_size(NSH_CTX_HDRS_MAX_LEN); +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -344,6 +369,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ + + ovs_nsh_key_attr_size() + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -377,6 +404,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, }; +static const struct ovs_len_tbl +ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { + [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, + [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, + [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, @@ -409,6 +443,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, + [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, + .next = ovs_nsh_key_attr_lens, }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -1227,6 +1263,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return 0; } +int nsh_hdr_from_nlattr(const struct nlattr *attr, + struct nshhdr *nh, size_t size) +{ + struct nlattr *a; + int rem; + u8 flags = 0; + u8 ttl = 0; + int mdlen = 0; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + if (size < NSH_BASE_HDR_LEN) + return -ENOBUFS; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + flags = base->flags; + ttl = base->ttl; + nh->np = base->np; + nh->mdtype = base->mdtype; + nh->path_hdr = base->path_hdr; + break; + } + case OVS_NSH_KEY_ATTR_MD1: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md1, nla_data(a), mdlen); + break; + + case OVS_NSH_KEY_ATTR_MD2: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md2, nla_data(a), mdlen); + break; + + default: + return -EINVAL; + } + } + + /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ + nh->ver_flags_ttl_len = 0; + nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); + + return 0; +} + +int nsh_key_from_nlattr(const struct nlattr *attr, + struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) +{ + struct nlattr *a; + int rem; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + const struct ovs_nsh_key_base *base_mask = base + 1; + + nsh->base = *base; + nsh_mask->base = *base_mask; + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; + + memcpy(nsh->context, md1->context, sizeof(*md1)); + memcpy(nsh_mask->context, md1_mask->context, + sizeof(*md1_mask)); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + /* Not supported yet */ + return -ENOTSUPP; + default: + return -EINVAL; + } + } + + return 0; +} + +static int nsh_key_put_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool is_push_nsh, bool log) +{ + struct nlattr *a; + int rem; + bool has_base = false; + bool has_md1 = false; + bool has_md2 = false; + u8 mdtype = 0; + int mdlen = 0; + + if (WARN_ON(is_push_nsh && is_mask)) + return -EINVAL; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int i; + + if (type > OVS_NSH_KEY_ATTR_MAX) { + OVS_NLERR(log, "nsh attr %d is out of range max %d", + type, OVS_NSH_KEY_ATTR_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_nsh_key_attr_lens[type].len)) { + OVS_NLERR( + log, + "nsh attr %d has unexpected len %d expected %d", + type, + nla_len(a), + ovs_nsh_key_attr_lens[type].len + ); + return -EINVAL; + } + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + has_base = true; + mdtype = base->mdtype; + SW_FLOW_KEY_PUT(match, nsh.base.flags, + base->flags, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.ttl, + base->ttl, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.mdtype, + base->mdtype, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.np, + base->np, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, + base->path_hdr, is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + + has_md1 = true; + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) + SW_FLOW_KEY_PUT(match, nsh.context[i], + md1->context[i], is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + if (!is_push_nsh) /* Not supported MD type 2 yet */ + return -ENOTSUPP; + + has_md2 = true; + mdlen = nla_len(a); + if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { + OVS_NLERR( + log, + "Invalid MD length %d for MD type %d", + mdlen, + mdtype + ); + return -EINVAL; + } + break; + default: + OVS_NLERR(log, "Unknown nsh attribute %d", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); + return -EINVAL; + } + + if (has_md1 && has_md2) { + OVS_NLERR( + 1, + "invalid nsh attribute: md1 and md2 are exclusive." + ); + return -EINVAL; + } + + if (!is_mask) { + if ((has_md1 && mdtype != NSH_M_TYPE1) || + (has_md2 && mdtype != NSH_M_TYPE2)) { + OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", + mdtype); + return -EINVAL; + } + + if (is_push_nsh && + (!has_base || (!has_md1 && !has_md2))) { + OVS_NLERR( + 1, + "push_nsh: missing base or metadata attributes" + ); + return -EINVAL; + } + } + + return 0; +} + static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask, bool log) @@ -1354,6 +1605,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_NSH)) { + if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, + is_mask, false, log) < 0) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_NSH); + } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; @@ -1670,6 +1928,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, return 0; } +static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + if (!start) + return -EMSGSIZE; + + if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) + goto nla_put_failure; + + if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { + if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, + sizeof(nsh->context), nsh->context)) + goto nla_put_failure; + } + + /* Don't support MD type 2 yet */ + + nla_nest_end(skb, start); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) @@ -1798,6 +2084,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_NSH)) { + if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) + goto nla_put_failure; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; @@ -2292,6 +2581,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +static bool validate_nsh(const struct nlattr *attr, bool is_mask, + bool is_push_nsh, bool log) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int ret = 0; + + ovs_match_init(&match, &key, true, NULL); + ret = nsh_key_put_from_nlattr(attr, &match, is_mask, + is_push_nsh, log); + return !ret; +} + /* Return false if there are any non-masked bits set. * Mask follows data immediately, before any netlink padding. */ @@ -2434,6 +2736,13 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_NSH: + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + if (!validate_nsh(nla_data(a), masked, false, log)) + return -EINVAL; + break; + default: return -EINVAL; } @@ -2533,6 +2842,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, + [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, + [OVS_ACTION_ATTR_POP_NSH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2690,6 +3001,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, mac_proto = MAC_PROTO_ETHERNET; break; + case OVS_ACTION_ATTR_PUSH_NSH: + if (mac_proto != MAC_PROTO_ETHERNET) { + u8 next_proto; + + next_proto = tun_p_from_eth_p(eth_type); + if (!next_proto) + return -EINVAL; + } + mac_proto = MAC_PROTO_NONE; + if (!validate_nsh(nla_data(a), false, true, true)) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_POP_NSH: { + __be16 inner_proto; + + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + inner_proto = tun_p_to_eth_p(key->nsh.base.np); + if (!inner_proto) + return -EINVAL; + if (key->nsh.base.np == TUN_P_ETHERNET) + mac_proto = MAC_PROTO_ETHERNET; + else + mac_proto = MAC_PROTO_NONE; + break; + } + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 929c665ac3aa..6657606b2b47 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr, void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); +int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, + struct ovs_key_nsh *nsh_mask); +int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, + size_t size); + #endif /* flow_netlink.h */ From 6bc05d5d8e9d65f67b6e6f8cf3b240668146338f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 1 Nov 2017 10:29:16 -0500 Subject: [PATCH 1957/2177] rtlwifi: rtl_pci: Fix formatting errors in pci.h Checkpatch.pl reports a number of formatting problems in this header file. None of the changes cause any functional changes in the driver. Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.h | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index 1af92b34979d..e331f5a3b35f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -27,10 +27,9 @@ #define __RTL_PCI_H__ #include -/* -1: MSDU packet queue, -2: Rx Command Queue -*/ +/* 1: MSDU packet queue, + * 2: Rx Command Queue + */ #define RTL_PCI_RX_MPDU_QUEUE 0 #define RTL_PCI_RX_CMD_QUEUE 1 #define RTL_PCI_MAX_RX_QUEUE 2 @@ -223,8 +222,9 @@ struct rtl_pci { u8 const_hostpci_aspm_setting; /*pci-e device */ u8 const_devicepci_aspm_setting; - /*If it supports ASPM, Offset[560h] = 0x40, - otherwise Offset[560h] = 0x00. */ + /* If it supports ASPM, Offset[560h] = 0x40, + * otherwise Offset[560h] = 0x00. + */ bool support_aspm; bool support_backdoor; @@ -279,7 +279,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw); extern const struct rtl_intf_ops rtl_pci_ops; int rtl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id); + const struct pci_device_id *id); void rtl_pci_disconnect(struct pci_dev *pdev); #ifdef CONFIG_PM_SLEEP int rtl_pci_suspend(struct device *dev); @@ -287,34 +287,34 @@ int rtl_pci_resume(struct device *dev); #endif /* CONFIG_PM_SLEEP */ static inline u8 pci_read8_sync(struct rtl_priv *rtlpriv, u32 addr) { - return readb((u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + return readb((u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline u16 pci_read16_sync(struct rtl_priv *rtlpriv, u32 addr) { - return readw((u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + return readw((u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline u32 pci_read32_sync(struct rtl_priv *rtlpriv, u32 addr) { - return readl((u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + return readl((u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline void pci_write8_async(struct rtl_priv *rtlpriv, u32 addr, u8 val) { - writeb(val, (u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + writeb(val, (u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline void pci_write16_async(struct rtl_priv *rtlpriv, u32 addr, u16 val) { - writew(val, (u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + writew(val, (u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline void pci_write32_async(struct rtl_priv *rtlpriv, u32 addr, u32 val) { - writel(val, (u8 __iomem *) rtlpriv->io.pci_mem_start + addr); + writel(val, (u8 __iomem *)rtlpriv->io.pci_mem_start + addr); } static inline u16 calc_fifo_space(u16 rp, u16 wp) From ae0122b6793d51f04b3841f900c8aa5aeee40ee3 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 1 Nov 2017 10:29:17 -0500 Subject: [PATCH 1958/2177] rtlwifi: rtl_pci: Fix formatting problems in pci.c Checkpatch.pl reports a number of formatting problems in this source file. None of the changes cause any functional changes in the driver. Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 205 +++++++++------------ 1 file changed, 92 insertions(+), 113 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index eb12818b46b3..7325806e40a2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -54,8 +54,7 @@ static const u8 ac_to_hwq[] = { BK_QUEUE }; -static u8 _rtl_mac_to_hwqueue(struct ieee80211_hw *hw, - struct sk_buff *skb) +static u8 _rtl_mac_to_hwqueue(struct ieee80211_hw *hw, struct sk_buff *skb) { struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); __le16 fc = rtl_get_fc(skb); @@ -104,20 +103,18 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) break; case 3: - /* - * Always enable ASPM and Clock Req + /* Always enable ASPM and Clock Req * from initialization to halt. - * */ + */ ppsc->reg_rfps_level &= ~(RT_RF_LPS_LEVEL_ASPM); ppsc->reg_rfps_level |= (RT_RF_PS_LEVEL_ALWAYS_ASPM | RT_RF_OFF_LEVL_CLK_REQ); break; case 4: - /* - * Always enable ASPM without Clock Req + /* Always enable ASPM without Clock Req * from initialization to halt. - * */ + */ ppsc->reg_rfps_level &= ~(RT_RF_LPS_LEVEL_ASPM | RT_RF_OFF_LEVL_CLK_REQ); ppsc->reg_rfps_level |= RT_RF_PS_LEVEL_ALWAYS_ASPM; @@ -180,10 +177,11 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) } /* toshiba aspm issue, toshiba will set aspm selfly - * so we should not set aspm in driver */ + * so we should not set aspm in driver + */ pci_read_config_byte(rtlpci->pdev, 0x80, &init_aspm); if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8192SE && - init_aspm == 0x43) + init_aspm == 0x43) ppsc->support_aspm = false; } @@ -263,8 +261,7 @@ static void rtl_pci_disable_aspm(struct ieee80211_hw *hw) udelay(50); } -/* - *Enable RTL8192SE ASPM & Enable Pci Bridge ASPM for +/*Enable RTL8192SE ASPM & Enable Pci Bridge ASPM for *power saving We should follow the sequence to enable *RTL8192SE first then enable Pci Bridge ASPM *or the system will show bluescreen. @@ -334,7 +331,7 @@ static bool rtl_pci_get_amd_l1_patch(struct ieee80211_hw *hw) bool status = false; u8 offset_e0; - unsigned offset_e4; + unsigned int offset_e4; pci_write_config_byte(rtlpci->pdev, 0xe0, 0xa0); @@ -369,12 +366,12 @@ static bool rtl_pci_check_buddy_priv(struct ieee80211_hw *hw, "tpcipriv->ndis_adapter.funcnumber %x\n", tpcipriv->ndis_adapter.funcnumber); - if ((pcipriv->ndis_adapter.busnumber == - tpcipriv->ndis_adapter.busnumber) && - (pcipriv->ndis_adapter.devnumber == - tpcipriv->ndis_adapter.devnumber) && - (pcipriv->ndis_adapter.funcnumber != - tpcipriv->ndis_adapter.funcnumber)) { + if (pcipriv->ndis_adapter.busnumber == + tpcipriv->ndis_adapter.busnumber && + pcipriv->ndis_adapter.devnumber == + tpcipriv->ndis_adapter.devnumber && + pcipriv->ndis_adapter.funcnumber != + tpcipriv->ndis_adapter.funcnumber) { find_buddy_priv = true; break; } @@ -407,7 +404,7 @@ static void rtl_pci_get_linkcontrol_field(struct ieee80211_hw *hw) } static void rtl_pci_parse_configuration(struct pci_dev *pdev, - struct ieee80211_hw *hw) + struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci_priv *pcipriv = rtl_pcipriv(hw); @@ -441,7 +438,6 @@ static void rtl_pci_init_aspm(struct ieee80211_hw *hw) rtl_pci_enable_aspm(hw); RT_SET_PS_LEVEL(ppsc, RT_RF_PS_LEVEL_ALWAYS_ASPM); } - } static void _rtl_pci_io_handler_init(struct device *dev, @@ -458,11 +454,11 @@ static void _rtl_pci_io_handler_init(struct device *dev, rtlpriv->io.read8_sync = pci_read8_sync; rtlpriv->io.read16_sync = pci_read16_sync; rtlpriv->io.read32_sync = pci_read32_sync; - } static bool _rtl_update_earlymode_info(struct ieee80211_hw *hw, - struct sk_buff *skb, struct rtl_tcb_desc *tcb_desc, u8 tid) + struct sk_buff *skb, + struct rtl_tcb_desc *tcb_desc, u8 tid) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -520,13 +516,15 @@ static void _rtl_pci_tx_chk_waitq(struct ieee80211_hw *hw) (rtlpriv->buddy_priv && rtlpriv->buddy_priv->easy_concurrent_ctl.switch_in_process))) return; - /* we juse use em for BE/BK/VI/VO */ + /* we just use em for BE/BK/VI/VO */ for (tid = 7; tid >= 0; tid--) { u8 hw_queue = ac_to_hwq[rtl_tid_to_ac(tid)]; struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; + while (!mac->act_scanning && rtlpriv->psc.rfpwr_state == ERFON) { struct rtl_tcb_desc tcb_desc; + memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); spin_lock_bh(&rtlpriv->locks.waitq_lock); @@ -541,7 +539,8 @@ static void _rtl_pci_tx_chk_waitq(struct ieee80211_hw *hw) spin_unlock_bh(&rtlpriv->locks.waitq_lock); /* Some macaddr can't do early mode. like - * multicast/broadcast/no_qos data */ + * multicast/broadcast/no_qos data + */ info = IEEE80211_SKB_CB(skb); if (info->flags & IEEE80211_TX_CTL_AMPDU) _rtl_update_earlymode_info(hw, skb, @@ -552,7 +551,6 @@ static void _rtl_pci_tx_chk_waitq(struct ieee80211_hw *hw) } } - static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) { struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -603,7 +601,6 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) if (prio == TXCMD_QUEUE) { dev_kfree_skb(skb); goto tx_status_ok; - } /* for sw LPS, just after NULL skb send out, we can @@ -643,15 +640,12 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) ieee80211_tx_status_irqsafe(hw, skb); if ((ring->entries - skb_queue_len(&ring->queue)) <= 4) { - RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "more desc left, wake skb_queue@%d, ring->idx = %d, skb_queue_len = 0x%x\n", prio, ring->idx, skb_queue_len(&ring->queue)); - ieee80211_wake_queue(hw, - skb_get_queue_mapping - (skb)); + ieee80211_wake_queue(hw, skb_get_queue_mapping(skb)); } tx_status_ok: skb = NULL; @@ -659,7 +653,7 @@ tx_status_ok: if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) + rtlpriv->link_info.num_rx_inperiod > 2) rtl_lps_leave(hw); } @@ -817,7 +811,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) new_skb = dev_alloc_skb(rtlpci->rxbuffersize); if (unlikely(!new_skb)) goto no_new; - memset(&rx_status , 0 , sizeof(rx_status)); + memset(&rx_status, 0, sizeof(rx_status)); rtlpriv->cfg->ops->query_rx_desc(hw, &stats, &rx_status, (u8 *)pdesc, skb); @@ -847,12 +841,11 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) /* handle command packet here */ if (rtlpriv->cfg->ops->rx_command_packet && rtlpriv->cfg->ops->rx_command_packet(hw, &stats, skb)) { - dev_kfree_skb_any(skb); - goto new_trx_end; + dev_kfree_skb_any(skb); + goto new_trx_end; } - /* - * NOTICE This can not be use for mac80211, + /* NOTICE This can not be use for mac80211, * this is done in mac80211 code, * if done here sec DHCP will fail * skb_trim(skb, skb->len - 4); @@ -889,9 +882,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) /* for sw lps */ rtl_swlps_beacon(hw, (void *)skb->data, skb->len); rtl_recognize_peer(hw, (void *)skb->data, skb->len); - if ((rtlpriv->mac80211.opmode == NL80211_IFTYPE_AP) && - (rtlpriv->rtlhal.current_bandtype == - BAND_ON_2_4G) && + if (rtlpriv->mac80211.opmode == NL80211_IFTYPE_AP && + rtlpriv->rtlhal.current_bandtype == BAND_ON_2_4G && (ieee80211_is_beacon(fc) || ieee80211_is_probe_resp(fc))) { dev_kfree_skb_any(skb); @@ -913,7 +905,7 @@ new_trx_end: } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) + rtlpriv->link_info.num_rx_inperiod > 2) rtl_lps_leave(hw); skb = new_skb; no_new: @@ -952,30 +944,27 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) if (rtlpci->irq_enabled == 0) return ret; - spin_lock_irqsave(&rtlpriv->locks.irq_th_lock , flags); + spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags); rtlpriv->cfg->ops->disable_interrupt(hw); /*read ISR: 4/8bytes */ rtlpriv->cfg->ops->interrupt_recognized(hw, &inta, &intb); - /*Shared IRQ or HW disappared */ + /*Shared IRQ or HW disappeared */ if (!inta || inta == 0xffff) goto done; /*<1> beacon related */ - if (inta & rtlpriv->cfg->maps[RTL_IMR_TBDOK]) { + if (inta & rtlpriv->cfg->maps[RTL_IMR_TBDOK]) RT_TRACE(rtlpriv, COMP_INTR, DBG_TRACE, "beacon ok interrupt!\n"); - } - if (unlikely(inta & rtlpriv->cfg->maps[RTL_IMR_TBDER])) { + if (unlikely(inta & rtlpriv->cfg->maps[RTL_IMR_TBDER])) RT_TRACE(rtlpriv, COMP_INTR, DBG_TRACE, "beacon err interrupt!\n"); - } - if (inta & rtlpriv->cfg->maps[RTL_IMR_BDOK]) { + if (inta & rtlpriv->cfg->maps[RTL_IMR_BDOK]) RT_TRACE(rtlpriv, COMP_INTR, DBG_TRACE, "beacon interrupt!\n"); - } if (inta & rtlpriv->cfg->maps[RTL_IMR_BCNINT]) { RT_TRACE(rtlpriv, COMP_INTR, DBG_TRACE, @@ -1130,7 +1119,7 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) /*NB: the beacon data buffer must be 32-bit aligned. */ pskb = ieee80211_beacon_get(hw, mac->vif); - if (pskb == NULL) + if (!pskb) return; hdr = rtl_get_hdr(pskb); info = IEEE80211_SKB_CB(pskb); @@ -1152,7 +1141,6 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN, &temp_one); } - return; } static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw) @@ -1171,8 +1159,7 @@ static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw) for (i = 0; i < RTL_PCI_MAX_TX_QUEUE_COUNT; i++) rtlpci->txringcount[i] = desc_num; - /* - *we just alloc 2 desc for beacon queue, + /*we just alloc 2 desc for beacon queue, *because we just need first desc in hw beacon. */ rtlpci->txringcount[BEACON_QUEUE] = 2; @@ -1189,7 +1176,7 @@ static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw) } static void _rtl_pci_init_struct(struct ieee80211_hw *hw, - struct pci_dev *pdev) + struct pci_dev *pdev) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1361,7 +1348,7 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx) } static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw, - unsigned int prio) + unsigned int prio) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); @@ -1378,8 +1365,7 @@ static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw, entry = (u8 *)(&ring->desc[ring->idx]); pci_unmap_single(rtlpci->pdev, - rtlpriv->cfg-> - ops->get_desc(hw, (u8 *)entry, + rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); @@ -1451,8 +1437,7 @@ static int _rtl_pci_init_trx_ring(struct ieee80211_hw *hw) } for (i = 0; i < RTL_PCI_MAX_TX_QUEUE_COUNT; i++) { - ret = _rtl_pci_init_tx_ring(hw, i, - rtlpci->txringcount[i]); + ret = _rtl_pci_init_tx_ring(hw, i, rtlpci->txringcount[i]); if (ret) goto err_free_rings; } @@ -1500,7 +1485,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) /* force the rx_ring[RX_MPDU_QUEUE/ * RX_CMD_QUEUE].idx to the first one *new trx flow, do nothing - */ + */ if (!rtlpriv->use_new_trx_flow && rtlpci->rx_ring[rxring_idx].desc) { struct rtl_rx_desc *entry = NULL; @@ -1510,8 +1495,8 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) entry = &rtlpci->rx_ring[rxring_idx].desc[i]; bufferaddress = rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry, - false , HW_DESC_RXBUFF_ADDR); - memset((u8 *)entry , 0 , + false, HW_DESC_RXBUFF_ADDR); + memset((u8 *)entry, 0, sizeof(*rtlpci->rx_ring [rxring_idx].desc));/*clear one entry*/ if (rtlpriv->use_new_trx_flow) { @@ -1540,8 +1525,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) rtlpci->rx_ring[rxring_idx].idx = 0; } - /* - *after reset, release previous pending packet, + /*after reset, release previous pending packet, *and force the tx idx to the first one */ spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags); @@ -1642,7 +1626,7 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, if (rtlpriv->psc.sw_ps_enabled) { if (ieee80211_is_data(fc) && !ieee80211_is_nullfunc(fc) && - !ieee80211_has_pm(fc)) + !ieee80211_has_pm(fc)) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); } @@ -1674,7 +1658,7 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN); - if ((own == 1) && (hw_queue != BEACON_QUEUE)) { + if (own == 1 && hw_queue != BEACON_QUEUE) { RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "No more TX desc@%d, ring->idx = %d, idx = %d, skb_queue_len = 0x%x\n", hw_queue, ring->idx, idx, @@ -1688,11 +1672,10 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, if (rtlpriv->cfg->ops->get_available_desc && rtlpriv->cfg->ops->get_available_desc(hw, hw_queue) == 0) { - RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, - "get_available_desc fail\n"); - spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, - flags); - return skb->len; + RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, + "get_available_desc fail\n"); + spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags); + return skb->len; } if (ieee80211_is_data(fc)) @@ -1751,7 +1734,7 @@ static void rtl_pci_flush(struct ieee80211_hw *hw, u32 queues, bool drop) ring = &pcipriv->dev.tx_ring[queue_id]; queue_len = skb_queue_len(&ring->queue); if (queue_len == 0 || queue_id == BEACON_QUEUE || - queue_id == TXCMD_QUEUE) { + queue_id == TXCMD_QUEUE) { queue_id--; continue; } else { @@ -1761,7 +1744,7 @@ static void rtl_pci_flush(struct ieee80211_hw *hw, u32 queues, bool drop) /* we just wait 1s for all queues */ if (rtlpriv->psc.rfpwr_state == ERFOFF || - is_hal_stop(rtlhal) || i >= 200) + is_hal_stop(rtlhal) || i >= 200) return; } } @@ -1779,7 +1762,6 @@ static void rtl_pci_deinit(struct ieee80211_hw *hw) flush_workqueue(rtlpriv->works.rtl_wq); destroy_workqueue(rtlpriv->works.rtl_wq); - } static int rtl_pci_init(struct ieee80211_hw *hw, struct pci_dev *pdev) @@ -1837,7 +1819,7 @@ static int rtl_pci_start(struct ieee80211_hw *hw) rtlpci->up_first_time = false; - RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "rtl_pci_start OK\n"); + RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "%s OK\n", __func__); return 0; } @@ -1848,13 +1830,12 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); unsigned long flags; - u8 RFInProgressTimeOut = 0; + u8 rf_timeout = 0; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_halt_notify(); - /* - *should be before disable interrupt&adapter + /*should be before disable interrupt&adapter *and will do it immediately. */ set_hal_stop(rtlhal); @@ -1866,12 +1847,12 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) spin_lock_irqsave(&rtlpriv->locks.rf_ps_lock, flags); while (ppsc->rfchange_inprogress) { spin_unlock_irqrestore(&rtlpriv->locks.rf_ps_lock, flags); - if (RFInProgressTimeOut > 100) { + if (rf_timeout > 100) { spin_lock_irqsave(&rtlpriv->locks.rf_ps_lock, flags); break; } mdelay(1); - RFInProgressTimeOut++; + rf_timeout++; spin_lock_irqsave(&rtlpriv->locks.rf_ps_lock, flags); } ppsc->rfchange_inprogress = true; @@ -1891,7 +1872,7 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) } static bool _rtl_pci_find_adapter(struct pci_dev *pdev, - struct ieee80211_hw *hw) + struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci_priv *pcipriv = rtl_pcipriv(hw); @@ -1946,13 +1927,12 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, venderid, deviceid); rtlhal->hw_type = HARDWARE_TYPE_RTL8192SE; break; - } } else if (deviceid == RTL_PCI_8723AE_DID) { rtlhal->hw_type = HARDWARE_TYPE_RTL8723AE; RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, - "8723AE PCI-E is found - " - "vid/did=%x/%x\n", venderid, deviceid); + "8723AE PCI-E is found - vid/did=%x/%x\n", + venderid, deviceid); } else if (deviceid == RTL_PCI_8192CET_DID || deviceid == RTL_PCI_8192CE_DID || deviceid == RTL_PCI_8191CE_DID || @@ -1972,21 +1952,21 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Find adapter, Hardware type is 8188EE\n"); } else if (deviceid == RTL_PCI_8723BE_DID) { - rtlhal->hw_type = HARDWARE_TYPE_RTL8723BE; - RT_TRACE(rtlpriv, COMP_INIT , DBG_LOUD, - "Find adapter, Hardware type is 8723BE\n"); + rtlhal->hw_type = HARDWARE_TYPE_RTL8723BE; + RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, + "Find adapter, Hardware type is 8723BE\n"); } else if (deviceid == RTL_PCI_8192EE_DID) { - rtlhal->hw_type = HARDWARE_TYPE_RTL8192EE; - RT_TRACE(rtlpriv, COMP_INIT , DBG_LOUD, - "Find adapter, Hardware type is 8192EE\n"); + rtlhal->hw_type = HARDWARE_TYPE_RTL8192EE; + RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, + "Find adapter, Hardware type is 8192EE\n"); } else if (deviceid == RTL_PCI_8821AE_DID) { - rtlhal->hw_type = HARDWARE_TYPE_RTL8821AE; - RT_TRACE(rtlpriv, COMP_INIT , DBG_LOUD, - "Find adapter, Hardware type is 8821AE\n"); + rtlhal->hw_type = HARDWARE_TYPE_RTL8821AE; + RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, + "Find adapter, Hardware type is 8821AE\n"); } else if (deviceid == RTL_PCI_8812AE_DID) { - rtlhal->hw_type = HARDWARE_TYPE_RTL8812AE; - RT_TRACE(rtlpriv, COMP_INIT , DBG_LOUD, - "Find adapter, Hardware type is 8812AE\n"); + rtlhal->hw_type = HARDWARE_TYPE_RTL8812AE; + RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, + "Find adapter, Hardware type is 8812AE\n"); } else { RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Err: Unknown device - vid/did=%x/%x\n", @@ -2109,7 +2089,7 @@ static int rtl_pci_intr_mode_msi(struct ieee80211_hw *hw) rtlpci->using_msi = true; - RT_TRACE(rtlpriv, COMP_INIT|COMP_INTR, DBG_DMESG, + RT_TRACE(rtlpriv, COMP_INIT | COMP_INTR, DBG_DMESG, "MSI Interrupt Mode!\n"); return 0; } @@ -2127,7 +2107,7 @@ static int rtl_pci_intr_mode_legacy(struct ieee80211_hw *hw) return ret; rtlpci->using_msi = false; - RT_TRACE(rtlpriv, COMP_INIT|COMP_INTR, DBG_DMESG, + RT_TRACE(rtlpriv, COMP_INIT | COMP_INTR, DBG_DMESG, "Pin-based Interrupt Mode!\n"); return 0; } @@ -2164,7 +2144,7 @@ static void platform_enable_dma64(struct pci_dev *pdev, bool dma64) } int rtl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id) + const struct pci_device_id *id) { struct ieee80211_hw *hw = NULL; @@ -2343,7 +2323,6 @@ fail1: pci_disable_device(pdev); return err; - } EXPORT_SYMBOL(rtl_pci_probe); @@ -2402,20 +2381,20 @@ EXPORT_SYMBOL(rtl_pci_disconnect); #ifdef CONFIG_PM_SLEEP /*************************************** -kernel pci power state define: -PCI_D0 ((pci_power_t __force) 0) -PCI_D1 ((pci_power_t __force) 1) -PCI_D2 ((pci_power_t __force) 2) -PCI_D3hot ((pci_power_t __force) 3) -PCI_D3cold ((pci_power_t __force) 4) -PCI_UNKNOWN ((pci_power_t __force) 5) + * kernel pci power state define: + * PCI_D0 ((pci_power_t __force) 0) + * PCI_D1 ((pci_power_t __force) 1) + * PCI_D2 ((pci_power_t __force) 2) + * PCI_D3hot ((pci_power_t __force) 3) + * PCI_D3cold ((pci_power_t __force) 4) + * PCI_UNKNOWN ((pci_power_t __force) 5) -This function is called when system -goes into suspend state mac80211 will -call rtl_mac_stop() from the mac80211 -suspend function first, So there is -no need to call hw_disable here. -****************************************/ + * This function is called when system + * goes into suspend state mac80211 will + * call rtl_mac_stop() from the mac80211 + * suspend function first, So there is + * no need to call hw_disable here. + ****************************************/ int rtl_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); From 5f647f4dfe419106274b6a509e33ee4a5db1133f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 1 Nov 2017 10:29:18 -0500 Subject: [PATCH 1959/2177] rtlwifi: rtl_pci: Simplify some code be eliminating extraneous variables In several places, the code assigns a variable inside an "if" or "case" block, but uses it only once. The code is simplified by eliminating the extraneous variable. With this change, one level of indenting is saved. This patch does not cause any functional changes in the binary code. Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 35 +++++++--------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 7325806e40a2..01c65c807126 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -143,32 +143,19 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) /*Set HW definition to determine if it supports ASPM. */ switch (rtlpci->const_support_pciaspm) { - case 0:{ - /*Not support ASPM. */ - bool support_aspm = false; - ppsc->support_aspm = support_aspm; - break; - } - case 1:{ - /*Support ASPM. */ - bool support_aspm = true; - bool support_backdoor = true; - ppsc->support_aspm = support_aspm; - - /*if (priv->oem_id == RT_CID_TOSHIBA && - !priv->ndis_adapter.amd_l1_patch) - support_backdoor = false; */ - - ppsc->support_backdoor = support_backdoor; - - break; - } + case 0: + /*Not support ASPM. */ + ppsc->support_aspm = false; + break; + case 1: + /*Support ASPM. */ + ppsc->support_aspm = true; + ppsc->support_backdoor = true; + break; case 2: /*ASPM value set by chipset. */ - if (pcibridge_vendor == PCI_BRIDGE_VENDOR_INTEL) { - bool support_aspm = true; - ppsc->support_aspm = support_aspm; - } + if (pcibridge_vendor == PCI_BRIDGE_VENDOR_INTEL) + ppsc->support_aspm = true; break; default: pr_err("switch case %#x not processed\n", From 57869e4ba77a75714ffcec628c9c440baaafa836 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 1 Nov 2017 10:29:19 -0500 Subject: [PATCH 1960/2177] rtlwifi: rtl_pci: Add support for 8822be TX/RX BD The number of TX/RX BD desc for 8822BE is 512. The TX/RX BD architecture of 8822BE is the same as 8192EE. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 14 +++++++++++--- drivers/net/wireless/realtek/rtlwifi/pci.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 01c65c807126..e8c8ce601229 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1140,6 +1140,8 @@ static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw) if (rtlhal->hw_type == HARDWARE_TYPE_RTL8192EE) desc_num = TX_DESC_NUM_92E; + else if (rtlhal->hw_type == HARDWARE_TYPE_RTL8822BE) + desc_num = TX_DESC_NUM_8822B; else desc_num = RT_TXDESC_NUM; @@ -1981,11 +1983,17 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, } } - /* 92ee use new trx flow */ - if (rtlhal->hw_type == HARDWARE_TYPE_RTL8192EE) + switch (rtlhal->hw_type) { + case HARDWARE_TYPE_RTL8192EE: + case HARDWARE_TYPE_RTL8822BE: + /* use new trx flow */ rtlpriv->use_new_trx_flow = true; - else + break; + + default: rtlpriv->use_new_trx_flow = false; + break; + } /*find bus info */ pcipriv->ndis_adapter.busnumber = pdev->bus->number; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index e331f5a3b35f..0f1a0f8585b6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -39,6 +39,7 @@ #define RT_TXDESC_NUM 128 #define TX_DESC_NUM_92E 512 +#define TX_DESC_NUM_8822B 512 #define RT_TXDESC_NUM_BE_QUEUE 256 #define BK_QUEUE 0 From 89d3e8abcf24462fec0c3c82c5917196e9345e3e Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 1 Nov 2017 10:29:20 -0500 Subject: [PATCH 1961/2177] rtlwifi: rtl_pci: Add fill_tx_special_desc to issue H2C data, and process TXOK in interrupt. With the RTL8822BE, an H2C tx queue is added to download FW and special data. This change implements the support code in rtl_pci. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 11 +++++++++++ drivers/net/wireless/realtek/rtlwifi/pci.h | 1 + drivers/net/wireless/realtek/rtlwifi/wifi.h | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index e8c8ce601229..0c30f4bf657b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -926,6 +926,7 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) unsigned long flags; u32 inta = 0; u32 intb = 0; + u32 intd = 0; irqreturn_t ret = IRQ_HANDLED; if (rtlpci->irq_enabled == 0) @@ -1007,6 +1008,16 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) _rtl_pci_tx_isr(hw, VO_QUEUE); } + if (rtlhal->hw_type == HARDWARE_TYPE_RTL8822BE) { + if (intd & rtlpriv->cfg->maps[RTL_IMR_H2CDOK]) { + rtlpriv->link_info.num_tx_inperiod++; + + RT_TRACE(rtlpriv, COMP_INTR, DBG_TRACE, + "H2C TX OK interrupt!\n"); + _rtl_pci_tx_isr(hw, H2C_QUEUE); + } + } + if (rtlhal->hw_type == HARDWARE_TYPE_RTL8192SE) { if (inta & rtlpriv->cfg->maps[RTL_IMR_COMDOK]) { rtlpriv->link_info.num_tx_inperiod++; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index 0f1a0f8585b6..6bf346ec4616 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -51,6 +51,7 @@ #define MGNT_QUEUE 6 #define HIGH_QUEUE 7 #define HCCA_QUEUE 8 +#define H2C_QUEUE TXCMD_QUEUE /* In 8822B */ #define RTL_PCI_DEVICE(vend, dev, cfg) \ .vendor = (vend), \ diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 22afc14c3da6..f6b00505a69f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -709,6 +709,7 @@ enum rtl_var_map { RTL_IMR_RXFOVW, /*Receive FIFO Overflow */ RTL_IMR_RDU, /*Receive Descriptor Unavailable */ RTL_IMR_ATIMEND, /*For 92C,ATIM Window End Interrupt */ + RTL_IMR_H2CDOK, /*H2C Queue DMA OK Interrupt */ RTL_IMR_BDOK, /*Beacon Queue DMA OK Interrup */ RTL_IMR_HIGHDOK, /*High Queue DMA OK Interrupt */ RTL_IMR_COMDOK, /*Command Queue DMA OK Interrupt*/ @@ -2144,6 +2145,9 @@ struct rtl_hal_ops { void (*fill_tx_cmddesc) (struct ieee80211_hw *hw, u8 *pdesc, bool firstseg, bool lastseg, struct sk_buff *skb); + void (*fill_tx_special_desc)(struct ieee80211_hw *hw, + u8 *pdesc, u8 *pbd_desc, + struct sk_buff *skb, u8 hw_queue); bool (*query_rx_desc) (struct ieee80211_hw *hw, struct rtl_stats *stats, struct ieee80211_rx_status *rx_status, From 68929a83800079164143d44b0d017678f7a05d86 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 1 Nov 2017 10:29:21 -0500 Subject: [PATCH 1962/2177] rtlwifi: rtl_pci: Add ID for 8822BE When the driver for the RTL8822BE is added, it will need an ID for further use. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 5 +++++ drivers/net/wireless/realtek/rtlwifi/pci.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 0c30f4bf657b..874e1e593da9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1967,6 +1967,11 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, rtlhal->hw_type = HARDWARE_TYPE_RTL8812AE; RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Find adapter, Hardware type is 8812AE\n"); + } else if (deviceid == RTL_PCI_8822BE_DID) { + rtlhal->hw_type = HARDWARE_TYPE_RTL8822BE; + rtlhal->bandset = BAND_ON_BOTH; + RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, + "Find adapter, Hardware type is 8822BE\n"); } else { RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Err: Unknown device - vid/did=%x/%x\n", diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index 6bf346ec4616..ce33fe7bc7c4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -109,6 +109,7 @@ #define RTL_PCI_8192EE_DID 0x818B /*8192ee*/ #define RTL_PCI_8821AE_DID 0x8821 /*8821ae*/ #define RTL_PCI_8812AE_DID 0x8812 /*8812ae*/ +#define RTL_PCI_8822BE_DID 0xB822 /*8822be*/ /*8192 support 16 pages of IO registers*/ #define RTL_MEM_MAPPED_IO_RANGE_8190PCI 0x1000 From c1b586402c159427fd5d4f553d1d4ef75bab84c7 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 1 Nov 2017 10:29:22 -0500 Subject: [PATCH 1963/2177] rtlwifi: rtl_pci: Extend recognized interrupt parameters from two to four ISR 8822be checks H2CQ by int_d, so we extend to four ISR. Also, irq_mask is extended to four. Signed-off-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/pci.h | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h | 3 ++- drivers/net/wireless/realtek/rtlwifi/wifi.h | 3 ++- 19 files changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 874e1e593da9..c2575b0b9440 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -926,6 +926,7 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) unsigned long flags; u32 inta = 0; u32 intb = 0; + u32 intc = 0; u32 intd = 0; irqreturn_t ret = IRQ_HANDLED; @@ -936,7 +937,7 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) rtlpriv->cfg->ops->disable_interrupt(hw); /*read ISR: 4/8bytes */ - rtlpriv->cfg->ops->interrupt_recognized(hw, &inta, &intb); + rtlpriv->cfg->ops->interrupt_recognized(hw, &inta, &intb, &intc, &intd); /*Shared IRQ or HW disappeared */ if (!inta || inta == 0xffff) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h index ce33fe7bc7c4..e7d070e8da2d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.h +++ b/drivers/net/wireless/realtek/rtlwifi/pci.h @@ -211,7 +211,7 @@ struct rtl_pci { /*irq */ u8 irq_alloc; - u32 irq_mask[2]; + u32 irq_mask[4]; /* 0-1: normal, 2: unused, 3: h2c */ u32 sys_irq_mask; /*Bcn control register setting */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 2c671364c521..e30a18e64ff5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -1472,7 +1472,8 @@ void rtl88ee_card_disable(struct ieee80211_hw *hw) } void rtl88ee_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h index ab8488da9409..cdf49de1e6ed 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h @@ -29,7 +29,8 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl88ee_read_eeprom_info(struct ieee80211_hw *hw); void rtl88ee_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl88ee_hw_init(struct ieee80211_hw *hw); void rtl88ee_card_disable(struct ieee80211_hw *hw); void rtl88ee_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 9310fad69cd9..0f4c86a28716 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -1375,7 +1375,8 @@ void rtl92ce_card_disable(struct ieee80211_hw *hw) } void rtl92ce_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h index 7683c5dfe851..b5c8e2fc1ba2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h @@ -42,7 +42,8 @@ static inline u8 rtl92c_get_chnl_group(u8 chnl) void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92ce_read_eeprom_info(struct ieee80211_hw *hw); void rtl92ce_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl92ce_hw_init(struct ieee80211_hw *hw); void rtl92ce_card_disable(struct ieee80211_hw *hw); void rtl92ce_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 5a67f85fa165..0da6c0136857 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1356,7 +1356,8 @@ void rtl92de_card_disable(struct ieee80211_hw *hw) } void rtl92de_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h index 85c565b86ae3..9236aa91273d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h @@ -29,7 +29,8 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92de_read_eeprom_info(struct ieee80211_hw *hw); void rtl92de_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl92de_hw_init(struct ieee80211_hw *hw); void rtl92de_card_disable(struct ieee80211_hw *hw); void rtl92de_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index 6fc3090c4b72..fe5da637e77a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -1694,7 +1694,8 @@ void rtl92ee_card_disable(struct ieee80211_hw *hw) } void rtl92ee_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h index cd6aeb44b996..cd6d3322f033 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h @@ -29,7 +29,8 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92ee_read_eeprom_info(struct ieee80211_hw *hw); void rtl92ee_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl92ee_hw_init(struct ieee80211_hw *hw); void rtl92ee_card_disable(struct ieee80211_hw *hw); void rtl92ee_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index 66be79ca4247..76bf089cced4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -1559,7 +1559,7 @@ void rtl92se_card_disable(struct ieee80211_hw *hw) } void rtl92se_interrupt_recognized(struct ieee80211_hw *hw, u32 *p_inta, - u32 *p_intb) + u32 *p_intb, u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h index 3c93d30fcae7..607056010974 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h @@ -42,7 +42,8 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl92se_read_eeprom_info(struct ieee80211_hw *hw); void rtl92se_interrupt_recognized(struct ieee80211_hw *hw, - u32 *inta, u32 *intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl92se_hw_init(struct ieee80211_hw *hw); void rtl92se_card_disable(struct ieee80211_hw *hw); void rtl92se_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index 8cfd4993c90a..c3f98d58124c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -1340,7 +1340,8 @@ void rtl8723e_card_disable(struct ieee80211_hw *hw) } void rtl8723e_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h index 1e7063105c96..19e467a37c72 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h @@ -34,7 +34,8 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8723e_read_eeprom_info(struct ieee80211_hw *hw); void rtl8723e_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl8723e_hw_init(struct ieee80211_hw *hw); void rtl8723e_card_disable(struct ieee80211_hw *hw); void rtl8723e_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 239518bd31f1..7cd1ffa7d4a7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -1682,7 +1682,8 @@ void rtl8723be_card_disable(struct ieee80211_hw *hw) } void rtl8723be_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h index 54d7afa7297e..2215a792f6bf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h @@ -30,7 +30,8 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8723be_read_eeprom_info(struct ieee80211_hw *hw); void rtl8723be_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl8723be_hw_init(struct ieee80211_hw *hw); void rtl8723be_card_disable(struct ieee80211_hw *hw); void rtl8723be_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 60c82a5b51cd..aa69d10348c2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -2488,7 +2488,8 @@ void rtl8821ae_card_disable(struct ieee80211_hw *hw) } void rtl8821ae_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb) + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h index 50fa9c718189..284d259fe557 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h @@ -30,7 +30,8 @@ void rtl8821ae_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val); void rtl8821ae_read_eeprom_info(struct ieee80211_hw *hw); void rtl8821ae_interrupt_recognized(struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int rtl8821ae_hw_init(struct ieee80211_hw *hw); void rtl8821ae_card_disable(struct ieee80211_hw *hw); void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index f6b00505a69f..49f6918ae18b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2101,7 +2101,8 @@ struct rtl_hal_ops { void (*read_chip_version)(struct ieee80211_hw *hw); void (*read_eeprom_info) (struct ieee80211_hw *hw); void (*interrupt_recognized) (struct ieee80211_hw *hw, - u32 *p_inta, u32 *p_intb); + u32 *p_inta, u32 *p_intb, + u32 *p_intc, u32 *p_intd); int (*hw_init) (struct ieee80211_hw *hw); void (*hw_disable) (struct ieee80211_hw *hw); void (*hw_suspend) (struct ieee80211_hw *hw); From 3f2a162fab15aee243178b5308bb5d1206fc4043 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:55:35 +0100 Subject: [PATCH 1964/2177] rtlwifi: fix uninitialized rtlhal->last_suspend_sec time We set rtlhal->last_suspend_sec to an uninitialized stack variable, but unfortunately gcc never warned about this, I only found it while working on another patch. I opened a gcc bug for this. Presumably the value of rtlhal->last_suspend_sec is not all that important, but it does get used, so we probably want the patch backported to stable kernels. Cc: stable@vger.kernel.org Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82839 Signed-off-by: Arnd Bergmann Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index aa69d10348c2..52f17bdbaaaf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1373,6 +1373,7 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) ppsc->wakeup_reason = 0; + do_gettimeofday(&ts); rtlhal->last_suspend_sec = ts.tv_sec; switch (fw_reason) { From 3c92d5517af8eeab81da3aee4fc14faa198bbb8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:55:36 +0100 Subject: [PATCH 1965/2177] rtlwifi: use ktime_get_real_seconds() for suspend time do_gettimeofday() is deprecated and slower than necessary for the purpose of reading the seconds. This changes rtl_op_suspend/resume to use ktime_get_real_seconds() instead, which is simpler and avoids confusion about whether it is y2038-safe or not. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 10 ++++------ drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 3 +-- drivers/net/wireless/realtek/rtlwifi/wifi.h | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 1147327e6f52..533e9cc4c84b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -550,15 +550,13 @@ static int rtl_op_suspend(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_hal *rtlhal = rtl_hal(rtlpriv); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); - struct timeval ts; RT_TRACE(rtlpriv, COMP_POWER, DBG_DMESG, "\n"); if (WARN_ON(!wow)) return -EINVAL; /* to resolve s4 can not wake up*/ - do_gettimeofday(&ts); - rtlhal->last_suspend_sec = ts.tv_sec; + rtlhal->last_suspend_sec = ktime_get_real_seconds(); if ((ppsc->wo_wlan_mode & WAKE_ON_PATTERN_MATCH) && wow->n_patterns) _rtl_add_wowlan_patterns(hw, wow); @@ -577,7 +575,7 @@ static int rtl_op_resume(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_hal *rtlhal = rtl_hal(rtlpriv); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); - struct timeval ts; + time64_t now; RT_TRACE(rtlpriv, COMP_POWER, DBG_DMESG, "\n"); rtlhal->driver_is_goingto_unload = false; @@ -585,8 +583,8 @@ static int rtl_op_resume(struct ieee80211_hw *hw) rtlhal->wake_from_pnp_sleep = true; /* to resovle s4 can not wake up*/ - do_gettimeofday(&ts); - if (ts.tv_sec - rtlhal->last_suspend_sec < 5) + now = ktime_get_real_seconds(); + if (now - rtlhal->last_suspend_sec < 5) return -1; rtl_op_start(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 52f17bdbaaaf..5a5ce98acb0b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1373,8 +1373,7 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) ppsc->wakeup_reason = 0; - do_gettimeofday(&ts); - rtlhal->last_suspend_sec = ts.tv_sec; + rtlhal->last_suspend_sec = ktime_get_real_seconds(); switch (fw_reason) { case FW_WOW_V2_PTK_UPDATE_EVENT: diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 49f6918ae18b..c96abc926cfd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -1600,7 +1600,7 @@ struct rtl_hal { bool enter_pnp_sleep; bool wake_from_pnp_sleep; bool wow_enabled; - __kernel_time_t last_suspend_sec; + time64_t last_suspend_sec; u32 wowlan_fwsize; u8 *wowlan_firmware; From ac978dc79a91939a97a271f646ba891d62fce5d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:55:37 +0100 Subject: [PATCH 1966/2177] rtlwifi: drop unused ppsc->last_wakeup_time The calculation of ppsc->last_wakeup_time is not y2038-safe, but the variable is not used at all, so we can simply drop it. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 5 ----- drivers/net/wireless/realtek/rtlwifi/wifi.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 5a5ce98acb0b..43e18c4c1e68 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1364,7 +1364,6 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtlpriv); u8 fw_reason = 0; - struct timeval ts; fw_reason = rtl_read_byte(rtlpriv, REG_MCUTST_WOWLAN); @@ -1378,15 +1377,11 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) switch (fw_reason) { case FW_WOW_V2_PTK_UPDATE_EVENT: ppsc->wakeup_reason = WOL_REASON_PTK_UPDATE; - do_gettimeofday(&ts); - ppsc->last_wakeup_time = ts.tv_sec*1000 + ts.tv_usec/1000; RT_TRACE(rtlpriv, COMP_POWER, DBG_DMESG, "It's a WOL PTK Key update event!\n"); break; case FW_WOW_V2_GTK_UPDATE_EVENT: ppsc->wakeup_reason = WOL_REASON_GTK_UPDATE; - do_gettimeofday(&ts); - ppsc->last_wakeup_time = ts.tv_sec*1000 + ts.tv_usec/1000; RT_TRACE(rtlpriv, COMP_POWER, DBG_DMESG, "It's a WOL GTK Key update event!\n"); break; diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index c96abc926cfd..92d4859ec906 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -1954,8 +1954,6 @@ struct rtl_ps_ctl { u8 gtk_offload_enable; /* Used for WOL, indicates the reason for waking event.*/ u32 wakeup_reason; - /* Record the last waking time for comparison with setting key. */ - u64 last_wakeup_time; }; struct rtl_stats { From 82e730e521ce6d4c633c7ff7edfd8fb4242c0c6e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 3 Nov 2017 14:09:22 +0000 Subject: [PATCH 1967/2177] rtlwifi: remove redundant pointer tid_data tid_data is assigned but never read, hence it is redundant and can be removed. Cleans up clang warning: drivers/net/wireless/realtek/rtlwifi/base.c:1581:2: warning: Value stored to 'tid_data' is never read Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 7e3107f9e37f..cad2272ae21b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1630,7 +1630,6 @@ int rtl_tx_agg_stop(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u16 tid) { struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_tid_data *tid_data; struct rtl_sta_info *sta_entry = NULL; if (sta == NULL) @@ -1643,7 +1642,6 @@ int rtl_tx_agg_stop(struct ieee80211_hw *hw, struct ieee80211_vif *vif, return -EINVAL; sta_entry = (struct rtl_sta_info *)sta->drv_priv; - tid_data = &sta_entry->tids[tid]; sta_entry->tids[tid].agg.agg_state = RTL_AGG_STOP; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); From f80ead1cd5fa9da78a3c2aad1de18f01e2ca18f2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 4 Nov 2017 19:37:59 +0000 Subject: [PATCH 1968/2177] rtlwifi: remove redundant initialization to cfg_cmd cfg_cmd is initialized to zero and this value is never read, instead it is over-written in the start of a do-while loop. Remove the redundant initialization. Cleans up clang warning: drivers/net/wireless/realtek/rtlwifi/core.c:1750:22: warning: Value stored to 'cfg_cmd' during its initialization is never read Signed-off-by: Colin Ian King Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 533e9cc4c84b..3cb88825473e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1746,7 +1746,7 @@ bool rtl_hal_pwrseqcmdparsing(struct rtl_priv *rtlpriv, u8 cut_version, u8 faversion, u8 interface_type, struct wlan_pwr_cfg pwrcfgcmd[]) { - struct wlan_pwr_cfg cfg_cmd = {0}; + struct wlan_pwr_cfg cfg_cmd; bool polling_bit = false; u32 ary_idx = 0; u8 value = 0; From 6c6e253113123189c06ff7433c67d917cc9f68df Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Nov 2017 09:26:22 +0000 Subject: [PATCH 1969/2177] iwlegacy: remove redundant pointer sta_priv Pointer sta_priv is assigned but never read, hence it is redundant and can be removed. Cleans up clang warning: drivers/net/wireless/intel/iwlegacy/4965-rs.c:2163:2: warning: Value stored to 'sta_priv' is never read Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/4965-rs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index c055f6da11c6..365a4187fc37 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -2154,13 +2154,11 @@ il4965_rs_initialize_lq(struct il_priv *il, struct ieee80211_conf *conf, u8 use_green; u8 active_tbl = 0; u8 valid_tx_ant; - struct il_station_priv *sta_priv; if (!sta || !lq_sta) return; use_green = il4965_rs_use_green(il, sta); - sta_priv = (void *)sta->drv_priv; i = lq_sta->last_txrate_idx; From 9b741b2a3148250e652f9dfc1a5bbc4410ec1a57 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 3 Nov 2017 13:45:28 +0000 Subject: [PATCH 1970/2177] orinoco_usb: remove redundant pointer dev The pointer dev is assigned but never read, hence it is redundant and can be removed. Cleans up clang warning: drivers/net/wireless/intersil/orinoco/orinoco_usb.c:1468:2: warning: Value stored to 'dev' is never read Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f6e3b71f48..501180584b4b 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1457,7 +1457,6 @@ static void ezusb_bulk_in_callback(struct urb *urb) static inline void ezusb_delete(struct ezusb_priv *upriv) { - struct net_device *dev; struct list_head *item; struct list_head *tmp_item; unsigned long flags; @@ -1465,7 +1464,6 @@ static inline void ezusb_delete(struct ezusb_priv *upriv) BUG_ON(in_interrupt()); BUG_ON(!upriv); - dev = upriv->dev; mutex_lock(&upriv->mtx); upriv->udev = NULL; /* No timer will be rearmed from here */ From 03e40f1e76808886f452b61761a292ddad25f43f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Nov 2017 18:56:30 +0000 Subject: [PATCH 1971/2177] zd1201: remove unused variable framelen Variable framelen is assigned but never read, hence it is redundant and can be removed. Cleans up clang warning: drivers/net/wireless/zydas/zd1201.c:234:3: warning: Value stored to 'framelen' is never read Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/zydas/zd1201.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c index 581e8577a221..253403899fe9 100644 --- a/drivers/net/wireless/zydas/zd1201.c +++ b/drivers/net/wireless/zydas/zd1201.c @@ -230,8 +230,7 @@ static void zd1201_usbrx(struct urb *urb) /* Info frame */ if (type == ZD1201_PACKET_INQUIRE) { int i = 0; - unsigned short infotype, framelen, copylen; - framelen = le16_to_cpu(*(__le16*)&data[4]); + unsigned short infotype, copylen; infotype = le16_to_cpu(*(__le16*)&data[6]); if (infotype == ZD1201_INF_LINKSTATUS) { From 24a9332a58b7f41a0d36c35a2c6897242bffdbc0 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:43 -0500 Subject: [PATCH 1972/2177] net: dsa: constify cpu_dp member of dsa_port A DSA port has a dedicated CPU port assigned to it, stored in the cpu_dp member. It is not meant to be modified by a port, thus make it const. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- net/dsa/slave.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index e54332968417..2a8613b5a23d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -190,7 +190,7 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; - struct dsa_port *cpu_dp; + const struct dsa_port *cpu_dp; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 814ced75a0cc..cc7fe47dd4bf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1147,7 +1147,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val) int dsa_slave_create(struct dsa_port *port) { - struct dsa_port *cpu_dp = port->cpu_dp; + const struct dsa_port *cpu_dp = port->cpu_dp; struct net_device *master = cpu_dp->master; struct dsa_switch *ds = port->ds; const char *name = port->name; From f070464cf000131928b2c3fd592efd1946610eea Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:44 -0500 Subject: [PATCH 1973/2177] net: dsa: setup and teardown default CPU port The dsa_dst_parse function called just before dsa_dst_apply does not parse the tree but does only one thing: it assigns the default CPU port to dst->cpu_dp and to each user ports. This patch simplifies this by calling a dsa_tree_setup_default_cpu function at the beginning of dsa_dst_apply directly. A dsa_port_is_user helper is added for convenience. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 153 ++++++++++++++++++++++--------------------------- 1 file changed, 68 insertions(+), 85 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 283104e5ca6a..0a63a2119cd0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -112,6 +112,11 @@ static bool dsa_port_is_cpu(struct dsa_port *port) return port->type == DSA_PORT_TYPE_CPU; } +static bool dsa_port_is_user(struct dsa_port *dp) +{ + return dp->type == DSA_PORT_TYPE_USER; +} + static bool dsa_ds_find_port_dn(struct dsa_switch *ds, struct device_node *port) { @@ -218,6 +223,64 @@ static int dsa_dst_complete(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; + + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dsa_port_is_cpu(dp)) + return dp; + } + } + + return NULL; +} + +static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; + + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = dsa_tree_find_first_cpu(dst); + if (!dst->cpu_dp) { + pr_warn("Tree has no master device\n"); + return -EINVAL; + } + + /* Assign the default CPU port to all ports of the fabric */ + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dsa_port_is_user(dp)) + dp->cpu_dp = dst->cpu_dp; + } + } + + return 0; +} + +static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +{ + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = NULL; +} + static int dsa_dsa_port_apply(struct dsa_port *port) { struct dsa_switch *ds = port->ds; @@ -412,6 +475,10 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) u32 index; int err; + err = dsa_tree_setup_default_cpu(dst); + if (err) + return err; + for (index = 0; index < DSA_MAX_SWITCHES; index++) { ds = dst->ds[index]; if (!ds) @@ -464,7 +531,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dst->cpu_dp = NULL; + dsa_tree_teardown_default_cpu(dst); pr_info("DSA: tree %d unapplied\n", dst->index); dst->applied = false; @@ -532,86 +599,6 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) return 0; } -static int dsa_cpu_parse(struct dsa_port *port, u32 index, - struct dsa_switch_tree *dst, - struct dsa_switch *ds) -{ - if (!dst->cpu_dp) - dst->cpu_dp = port; - - return 0; -} - -static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) -{ - struct dsa_port *port; - u32 index; - int err; - - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port) || - dsa_port_is_dsa(port)) - continue; - - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_parse(port, index, dst, ds); - if (err) - return err; - } - - } - - pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index); - - return 0; -} - -static int dsa_dst_parse(struct dsa_switch_tree *dst) -{ - struct dsa_switch *ds; - struct dsa_port *dp; - u32 index; - int port; - int err; - - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - err = dsa_ds_parse(dst, ds); - if (err) - return err; - } - - if (!dst->cpu_dp) { - pr_warn("Tree has no master device\n"); - return -EINVAL; - } - - /* Assign the default CPU port to all ports of the fabric */ - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - for (port = 0; port < ds->num_ports; port++) { - dp = &ds->ports[port]; - if (!dsa_port_is_valid(dp) || - dsa_port_is_dsa(dp) || - dsa_port_is_cpu(dp)) - continue; - - dp->cpu_dp = dst->cpu_dp; - } - } - - pr_info("DSA: tree %d parsed\n", dst->index); - - return 0; -} - static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); @@ -810,10 +797,6 @@ static int _dsa_register_switch(struct dsa_switch *ds) return -EINVAL; } - err = dsa_dst_parse(dst); - if (err) - goto out_del_dst; - err = dsa_dst_apply(dst); if (err) { dsa_dst_unapply(dst); From 17a22fcfc84a422d98a0f54e67d4ee8ee3875849 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:45 -0500 Subject: [PATCH 1974/2177] net: dsa: setup and teardown master device Add DSA helpers to setup and teardown a master net device wired to its CPU port. This centralizes the dsa_ptr assignment. This also makes the master ethtool helpers static at the same time. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 36 +++++++++++++++++++----------------- net/dsa/dsa_priv.h | 4 ++-- net/dsa/legacy.c | 20 ++------------------ net/dsa/master.c | 30 ++++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 39 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 0a63a2119cd0..c9b50339fcac 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -469,6 +469,23 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) } +static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; + + /* DSA currently supports a single pair of CPU port and master device */ + return dsa_master_setup(master, cpu_dp); +} + +static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; + + return dsa_master_teardown(master); +} + static int dsa_dst_apply(struct dsa_switch_tree *dst) { struct dsa_switch *ds; @@ -489,14 +506,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - /* If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dst->cpu_dp->master->dsa_ptr = dst->cpu_dp; - - err = dsa_master_ethtool_setup(dst->cpu_dp->master); + err = dsa_tree_setup_master(dst); if (err) return err; @@ -513,15 +523,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dsa_master_ethtool_restore(dst->cpu_dp->master); - - dst->cpu_dp->master->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + dsa_tree_teardown_master(dst); for (index = 0; index < DSA_MAX_SWITCHES; index++) { ds = dst->ds[index]; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 253a613c40cd..bb0218c1b570 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -108,8 +108,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 vid); /* master.c */ -int dsa_master_ethtool_setup(struct net_device *dev); -void dsa_master_ethtool_restore(struct net_device *dev); +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_master_teardown(struct net_device *dev); static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int device, int port) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 0511fe2feff7..4863e3e398b6 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -593,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, if (!configured) return -EPROBE_DEFER; - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dev->dsa_ptr = dst->cpu_dp; - - return dsa_master_ethtool_setup(dst->cpu_dp->master); + return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp); } static int dsa_probe(struct platform_device *pdev) @@ -666,15 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dsa_master_ethtool_restore(dst->cpu_dp->master); - - dst->cpu_dp->master->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + dsa_master_teardown(dst->cpu_dp->master); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/net/dsa/master.c b/net/dsa/master.c index 5f3f57e372e0..00589147f042 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -85,7 +85,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -int dsa_master_ethtool_setup(struct net_device *dev) +static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; @@ -108,10 +108,36 @@ int dsa_master_ethtool_setup(struct net_device *dev) return 0; } -void dsa_master_ethtool_restore(struct net_device *dev) +static void dsa_master_ethtool_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; } + +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) +{ + /* If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + + dev->dsa_ptr = cpu_dp; + + return dsa_master_ethtool_setup(dev); +} + +void dsa_master_teardown(struct net_device *dev) +{ + dsa_master_ethtool_teardown(dev); + + dev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); +} From ec15dd4269d0cbf947c9a2dfdcf08a917098fab1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:46 -0500 Subject: [PATCH 1975/2177] net: dsa: setup and teardown tree This commit provides better scope for the DSA tree setup and teardown functions. It renames the "applied" bool to "setup" and print a message when the tree is setup, as it is done during teardown. At the same time, check dst->setup in dsa_tree_setup, where it is set to true. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- net/dsa/dsa2.c | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a8613b5a23d..6c239257309b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -122,7 +122,7 @@ struct dsa_switch_tree { struct kref refcount; /* Has this tree been applied to the hardware? */ - bool applied; + bool setup; /* * Configuration data for the platform device that owns diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c9b50339fcac..1a8df0a177b5 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -486,12 +486,18 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) return dsa_master_teardown(master); } -static int dsa_dst_apply(struct dsa_switch_tree *dst) +static int dsa_tree_setup(struct dsa_switch_tree *dst) { struct dsa_switch *ds; u32 index; int err; + if (dst->setup) { + pr_err("DSA: tree %d already setup! Disjoint trees?\n", + dst->index); + return -EEXIST; + } + err = dsa_tree_setup_default_cpu(dst); if (err) return err; @@ -510,17 +516,19 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) if (err) return err; - dst->applied = true; + dst->setup = true; + + pr_info("DSA: tree %d setup\n", dst->index); return 0; } -static void dsa_dst_unapply(struct dsa_switch_tree *dst) +static void dsa_tree_teardown(struct dsa_switch_tree *dst) { struct dsa_switch *ds; u32 index; - if (!dst->applied) + if (!dst->setup) return; dsa_tree_teardown_master(dst); @@ -535,8 +543,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_tree_teardown_default_cpu(dst); - pr_info("DSA: tree %d unapplied\n", dst->index); - dst->applied = false; + pr_info("DSA: tree %d torn down\n", dst->index); + + dst->setup = false; } static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, @@ -794,14 +803,9 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (err == 1) return 0; - if (dst->applied) { - pr_info("DSA: Disjoint trees?\n"); - return -EINVAL; - } - - err = dsa_dst_apply(dst); + err = dsa_tree_setup(dst); if (err) { - dsa_dst_unapply(dst); + dsa_tree_teardown(dst); goto out_del_dst; } @@ -852,7 +856,7 @@ static void _dsa_unregister_switch(struct dsa_switch *ds) struct dsa_switch_tree *dst = ds->dst; unsigned int index = ds->index; - dsa_dst_unapply(dst); + dsa_tree_teardown(dst); dsa_tree_remove_switch(dst, index); } From 1f08f9e9cbc63d92c246f40ae4221cba86ef8ec6 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:47 -0500 Subject: [PATCH 1976/2177] net: dsa: setup and teardown switches This patches brings no functional changes. It removes the unused dst argument from the dsa_ds_apply and dsa_ds_unapply functions, rename them to dsa_switch_setup and dsa_switch_teardown for a more explicit scope. This clarifies the steps of the setup or teardown of a switch fabric. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 62 +++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1a8df0a177b5..2b3b2a86791d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -362,7 +362,7 @@ static void dsa_user_port_unapply(struct dsa_port *port) } } -static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_port *port; u32 index; @@ -433,7 +433,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) return 0; } -static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static void dsa_switch_teardown(struct dsa_switch *ds) { struct dsa_port *port; u32 index; @@ -469,6 +469,39 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) } +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +{ + struct dsa_switch *ds; + int device; + int err; + + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + err = dsa_switch_setup(ds); + if (err) + return err; + } + + return 0; +} + +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +{ + struct dsa_switch *ds; + int device; + + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + dsa_switch_teardown(ds); + } +} + static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp = dst->cpu_dp; @@ -488,8 +521,6 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) static int dsa_tree_setup(struct dsa_switch_tree *dst) { - struct dsa_switch *ds; - u32 index; int err; if (dst->setup) { @@ -502,15 +533,9 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) return err; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - err = dsa_ds_apply(dst, ds); - if (err) - return err; - } + err = dsa_tree_setup_switches(dst); + if (err) + return err; err = dsa_tree_setup_master(dst); if (err) @@ -525,21 +550,12 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_switch *ds; - u32 index; - if (!dst->setup) return; dsa_tree_teardown_master(dst); - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - dsa_ds_unapply(dst, ds); - } + dsa_tree_teardown_switches(dst); dsa_tree_teardown_default_cpu(dst); From 1d27732f411d57f0168af30be2adb504b8b7749d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:48 -0500 Subject: [PATCH 1977/2177] net: dsa: setup and teardown ports The dsa_dsa_port_apply and dsa_cpu_port_apply functions do exactly the same. The dsa_user_port_apply function does not try to register a fixed link but try to create a slave. This commit factorizes and scopes all that in two convenient dsa_port_setup and dsa_port_teardown functions. It won't hurt to register a devlink_port for unused port as well. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 175 +++++++++++++++++-------------------------------- 1 file changed, 60 insertions(+), 115 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 2b3b2a86791d..676c0bc943dd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -281,91 +281,65 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) dst->cpu_dp = NULL; } -static int dsa_dsa_port_apply(struct dsa_port *port) +static int dsa_port_setup(struct dsa_port *dp) { - struct dsa_switch *ds = port->ds; + struct dsa_switch *ds = dp->ds; int err; - err = dsa_port_fixed_link_register_of(port); - if (err) { - dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", - port->index, err); - return err; - } + memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - - return devlink_port_register(ds->devlink, &port->devlink_port, - port->index); -} - -static void dsa_dsa_port_unapply(struct dsa_port *port) -{ - devlink_port_unregister(&port->devlink_port); - dsa_port_fixed_link_unregister_of(port); -} - -static int dsa_cpu_port_apply(struct dsa_port *port) -{ - struct dsa_switch *ds = port->ds; - int err; - - err = dsa_port_fixed_link_register_of(port); - if (err) { - dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", - port->index, err); - return err; - } - - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); - return err; -} - -static void dsa_cpu_port_unapply(struct dsa_port *port) -{ - devlink_port_unregister(&port->devlink_port); - dsa_port_fixed_link_unregister_of(port); -} - -static int dsa_user_port_apply(struct dsa_port *port) -{ - struct dsa_switch *ds = port->ds; - int err; - - err = dsa_slave_create(port); - if (err) { - dev_warn(ds->dev, "Failed to create slave %d: %d\n", - port->index, err); - port->slave = NULL; - return err; - } - - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); + err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); if (err) return err; - devlink_port_type_eth_set(&port->devlink_port, port->slave); + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + err = dsa_port_fixed_link_register_of(dp); + if (err) { + dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + ds->index, dp->index); + return err; + } + + break; + case DSA_PORT_TYPE_USER: + err = dsa_slave_create(dp); + if (err) + dev_err(ds->dev, "failed to create slave for port %d.%d\n", + ds->index, dp->index); + else + devlink_port_type_eth_set(&dp->devlink_port, dp->slave); + break; + } return 0; } -static void dsa_user_port_unapply(struct dsa_port *port) +static void dsa_port_teardown(struct dsa_port *dp) { - devlink_port_unregister(&port->devlink_port); - if (port->slave) { - dsa_slave_destroy(port->slave); - port->slave = NULL; + devlink_port_unregister(&dp->devlink_port); + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + dsa_port_fixed_link_unregister_of(dp); + break; + case DSA_PORT_TYPE_USER: + if (dp->slave) { + dsa_slave_destroy(dp->slave); + dp->slave = NULL; + } + break; } } static int dsa_switch_setup(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus @@ -406,56 +380,11 @@ static int dsa_switch_setup(struct dsa_switch *ds) return err; } - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; - - if (dsa_port_is_dsa(port)) { - err = dsa_dsa_port_apply(port); - if (err) - return err; - continue; - } - - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_port_apply(port); - if (err) - return err; - continue; - } - - err = dsa_user_port_apply(port); - if (err) - continue; - } - return 0; } static void dsa_switch_teardown(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; - - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; - - if (dsa_port_is_dsa(port)) { - dsa_dsa_port_unapply(port); - continue; - } - - if (dsa_port_is_cpu(port)) { - dsa_cpu_port_unapply(port); - continue; - } - - dsa_user_port_unapply(port); - } - if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); @@ -472,7 +401,8 @@ static void dsa_switch_teardown(struct dsa_switch *ds) static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - int device; + struct dsa_port *dp; + int device, port; int err; for (device = 0; device < DSA_MAX_SWITCHES; device++) { @@ -483,6 +413,14 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) err = dsa_switch_setup(ds); if (err) return err; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + err = dsa_port_setup(dp); + if (err) + return err; + } } return 0; @@ -491,13 +429,20 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - int device; + struct dsa_port *dp; + int device, port; for (device = 0; device < DSA_MAX_SWITCHES; device++) { ds = dst->ds[device]; if (!ds) continue; + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + dsa_port_teardown(dp); + } + dsa_switch_teardown(ds); } } From f163da8853aa9d8060157a96ed314299b87ba070 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:49 -0500 Subject: [PATCH 1978/2177] net: dsa: add find port by node helper Instead of having two dsa_ds_find_port_dn (which returns a bool) and dsa_dst_find_port_dn (which returns a switch) functions, provide a more explicit dsa_tree_find_port_by_node function which returns a matching port. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 676c0bc943dd..0f6f8c1701f9 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -117,30 +117,24 @@ static bool dsa_port_is_user(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_USER; } -static bool dsa_ds_find_port_dn(struct dsa_switch *ds, - struct device_node *port) -{ - u32 index; - - for (index = 0; index < ds->num_ports; index++) - if (ds->ports[index].dn == port) - return true; - return false; -} - -static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst, - struct device_node *port) +static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, + struct device_node *dn) { struct dsa_switch *ds; - u32 index; + struct dsa_port *dp; + int device, port; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - if (dsa_ds_find_port_dn(ds, port)) - return ds; + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dp->dn == dn) + return dp; + } } return NULL; @@ -154,18 +148,21 @@ static int dsa_port_complete(struct dsa_switch_tree *dst, struct device_node *link; int index; struct dsa_switch *dst_ds; + struct dsa_port *link_dp; for (index = 0;; index++) { link = of_parse_phandle(port->dn, "link", index); if (!link) break; - dst_ds = dsa_dst_find_port_dn(dst, link); + link_dp = dsa_tree_find_port_by_node(dst, link); of_node_put(link); - if (!dst_ds) + if (!link_dp) return 1; + dst_ds = link_dp->ds; + src_ds->rtable[dst_ds->index] = src_port; } From c52866655558e5fc87ceae8aac528a7e410c8a77 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:50 -0500 Subject: [PATCH 1979/2177] net: dsa: use of_for_each_phandle The OF code provides a of_for_each_phandle() helper to iterate over phandles. Use it instead of arbitrary iterating ourselves over the list of phandles hanging to the "link" property of the port's device node. The of_phandle_iterator_next() helper calls of_node_put() itself on it.node. Thus We must only do it ourselves if we break the loop. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 0f6f8c1701f9..25ed41262ead 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -145,21 +145,18 @@ static int dsa_port_complete(struct dsa_switch_tree *dst, struct dsa_port *port, u32 src_port) { - struct device_node *link; - int index; + struct device_node *dn = port->dn; + struct of_phandle_iterator it; struct dsa_switch *dst_ds; struct dsa_port *link_dp; + int err; - for (index = 0;; index++) { - link = of_parse_phandle(port->dn, "link", index); - if (!link) - break; - - link_dp = dsa_tree_find_port_by_node(dst, link); - of_node_put(link); - - if (!link_dp) + of_for_each_phandle(&it, err, dn, "link", NULL, 0) { + link_dp = dsa_tree_find_port_by_node(dst, it.node); + if (!link_dp) { + of_node_put(it.node); return 1; + } dst_ds = link_dp->ds; From 34c09a8916fb52aac948dfc861b33c0b3b37ac29 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:51 -0500 Subject: [PATCH 1980/2177] net: dsa: setup routing table The *_complete() functions take too much arguments to do only one thing: they try to fetch the dsa_port structures corresponding to device nodes under the "link" list property of DSA ports, and use them to setup the routing table of switches. This patch simplifies them by providing instead simpler dsa_{port,switch,tree}_setup_routing_table functions which return a boolean value, true if the tree is complete. dsa_tree_setup_routing_table is called inside dsa_tree_setup which simplifies the switch registering function as well. A switch's routing table is now initialized before its setup. This also makes dsa_port_is_valid obsolete, remove it. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 108 +++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 71 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 25ed41262ead..44d26b5977cd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -94,14 +94,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_tree_release); } -/* For platform data configurations, we need to have a valid name argument to - * differentiate a disabled port from an enabled one - */ -static bool dsa_port_is_valid(struct dsa_port *port) -{ - return port->type != DSA_PORT_TYPE_UNUSED; -} - static bool dsa_port_is_dsa(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_DSA; @@ -140,14 +132,12 @@ static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, return NULL; } -static int dsa_port_complete(struct dsa_switch_tree *dst, - struct dsa_switch *src_ds, - struct dsa_port *port, - u32 src_port) +static bool dsa_port_setup_routing_table(struct dsa_port *dp) { - struct device_node *dn = port->dn; + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + struct device_node *dn = dp->dn; struct of_phandle_iterator it; - struct dsa_switch *dst_ds; struct dsa_port *link_dp; int err; @@ -155,66 +145,54 @@ static int dsa_port_complete(struct dsa_switch_tree *dst, link_dp = dsa_tree_find_port_by_node(dst, it.node); if (!link_dp) { of_node_put(it.node); - return 1; + return false; } - dst_ds = link_dp->ds; - - src_ds->rtable[dst_ds->index] = src_port; + ds->rtable[link_dp->ds->index] = dp->index; } - return 0; + return true; } -/* A switch is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static bool dsa_switch_setup_routing_table(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; - int err; + bool complete = true; + struct dsa_port *dp; + int i; - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; + for (i = 0; i < DSA_MAX_SWITCHES; i++) + ds->rtable[i] = DSA_RTABLE_NONE; - if (!dsa_port_is_dsa(port)) - continue; + for (i = 0; i < ds->num_ports; i++) { + dp = &ds->ports[i]; - err = dsa_port_complete(dst, ds, port, index); - if (err != 0) - return err; + if (dsa_port_is_dsa(dp)) { + complete = dsa_port_setup_routing_table(dp); + if (!complete) + break; + } } - return 0; + return complete; } -/* A tree is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_dst_complete(struct dsa_switch_tree *dst) +static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - u32 index; - int err; + bool complete = true; + int device; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - err = dsa_ds_complete(dst, ds); - if (err != 0) - return err; + complete = dsa_switch_setup_routing_table(ds); + if (!complete) + break; } - return 0; + return complete; } static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) @@ -460,6 +438,7 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) static int dsa_tree_setup(struct dsa_switch_tree *dst) { + bool complete; int err; if (dst->setup) { @@ -468,6 +447,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return -EEXIST; } + complete = dsa_tree_setup_routing_table(dst); + if (!complete) + return 0; + err = dsa_tree_setup_default_cpu(dst); if (err) return err; @@ -727,7 +710,7 @@ static int _dsa_register_switch(struct dsa_switch *ds) struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; unsigned int index; - int i, err; + int err; if (np) err = dsa_switch_parse_of(ds, np); @@ -742,33 +725,16 @@ static int _dsa_register_switch(struct dsa_switch *ds) index = ds->index; dst = ds->dst; - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - ds->rtable[i] = DSA_RTABLE_NONE; - err = dsa_tree_add_switch(dst, ds); if (err) return err; - err = dsa_dst_complete(dst); - if (err < 0) - goto out_del_dst; - - /* Not all switches registered yet */ - if (err == 1) - return 0; - err = dsa_tree_setup(dst); if (err) { dsa_tree_teardown(dst); - goto out_del_dst; + dsa_tree_remove_switch(dst, index); } - return 0; - -out_del_dst: - dsa_tree_remove_switch(dst, index); - return err; } From 308173546ac4342103541e8d4e4ce83d1a5e7eba Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:52 -0500 Subject: [PATCH 1981/2177] net: dsa: setup a tree when adding a switch to it Now that the tree setup is centralized, we can simplify the code a bit more by setting up or tearing down the tree directly when adding or removing a switch to/from it. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 44d26b5977cd..3db50e68640e 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -489,6 +489,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, unsigned int index) { + dsa_tree_teardown(dst); + dst->ds[index] = NULL; dsa_tree_put(dst); } @@ -497,6 +499,7 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst, struct dsa_switch *ds) { unsigned int index = ds->index; + int err; if (dst->ds[index]) return -EBUSY; @@ -504,7 +507,11 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst, dsa_tree_get(dst); dst->ds[index] = ds; - return 0; + err = dsa_tree_setup(dst); + if (err) + dsa_tree_remove_switch(dst, index); + + return err; } static int dsa_port_parse_user(struct dsa_port *dp, const char *name) @@ -704,12 +711,17 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) return dsa_switch_parse_ports(ds, cd); } +static int dsa_switch_add(struct dsa_switch *ds) +{ + struct dsa_switch_tree *dst = ds->dst; + + return dsa_tree_add_switch(dst, ds); +} + static int _dsa_register_switch(struct dsa_switch *ds) { struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; - struct dsa_switch_tree *dst; - unsigned int index; int err; if (np) @@ -722,20 +734,7 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (err) return err; - index = ds->index; - dst = ds->dst; - - err = dsa_tree_add_switch(dst, ds); - if (err) - return err; - - err = dsa_tree_setup(dst); - if (err) { - dsa_tree_teardown(dst); - dsa_tree_remove_switch(dst, index); - } - - return err; + return dsa_switch_add(ds); } struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) @@ -777,8 +776,6 @@ static void _dsa_unregister_switch(struct dsa_switch *ds) struct dsa_switch_tree *dst = ds->dst; unsigned int index = ds->index; - dsa_tree_teardown(dst); - dsa_tree_remove_switch(dst, index); } From b4fbb347fe4cd7988d0f9453a7e3ab0cd1b4a75a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 6 Nov 2017 16:11:53 -0500 Subject: [PATCH 1982/2177] net: dsa: rename probe and remove switch functions This commit brings no functional changes. It gets rid of the underscore prefixed _dsa_register_switch and _dsa_unregister_switch functions in favor of dsa_switch_probe() which parses and adds a switch to a tree and dsa_switch_remove() which removes a switch from a tree. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3db50e68640e..fd54a8e17986 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -718,7 +718,7 @@ static int dsa_switch_add(struct dsa_switch *ds) return dsa_tree_add_switch(dst, ds); } -static int _dsa_register_switch(struct dsa_switch *ds) +static int dsa_switch_probe(struct dsa_switch *ds) { struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; @@ -764,14 +764,14 @@ int dsa_register_switch(struct dsa_switch *ds) int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds); + err = dsa_switch_probe(ds); mutex_unlock(&dsa2_mutex); return err; } EXPORT_SYMBOL_GPL(dsa_register_switch); -static void _dsa_unregister_switch(struct dsa_switch *ds) +static void dsa_switch_remove(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; unsigned int index = ds->index; @@ -782,7 +782,7 @@ static void _dsa_unregister_switch(struct dsa_switch *ds) void dsa_unregister_switch(struct dsa_switch *ds) { mutex_lock(&dsa2_mutex); - _dsa_unregister_switch(ds); + dsa_switch_remove(ds); mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); From 2b52a283907b7f6d439444bfdde7f2241ddde735 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 17:51:10 +0300 Subject: [PATCH 1983/2177] net/mlx5e: Rename VLAN related variables and functions Rename VLAN related symbols to better reflect the fact that they are associated to C-tag VLAN. Signed-off-by: Gal Pressman Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 ++-- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 56 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 8 +-- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 751f62cae969..eba5db75214b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -655,12 +655,12 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; + unsigned long active_cvlans[BITS_TO_LONGS(VLAN_N_VID)]; + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; struct mlx5_flow_handle *any_svlan_rule; - bool filter_disabled; + bool cvlan_filter_disabled; }; struct mlx5e_l2_table { @@ -887,8 +887,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_timestamp_set(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f0d11ad05ed2..53901537778b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -118,7 +118,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -135,7 +135,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -154,7 +154,7 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, - MLX5E_VLAN_RULE_TYPE_MATCH_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, }; static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, @@ -190,8 +190,8 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->fs.vlan.active_vlans_rule[vid]; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); @@ -223,7 +223,7 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, if (!spec) return -ENOMEM; - if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) mlx5e_vport_context_update_vlans(priv); err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); @@ -255,11 +255,11 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.any_svlan_rule = NULL; } break; - case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->fs.vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]); - priv->fs.vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_cvlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]); + priv->fs.vlan.active_cvlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -283,23 +283,23 @@ static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = false; + priv->fs.vlan.cvlan_filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) { - if (priv->fs.vlan.filter_disabled) + if (priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = true; + priv->fs.vlan.cvlan_filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); @@ -310,9 +310,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - set_bit(vid, priv->fs.vlan.active_vlans); + set_bit(vid, priv->fs.vlan.active_cvlans); - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); } int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, @@ -320,9 +320,9 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->fs.vlan.active_vlans); + clear_bit(vid, priv->fs.vlan.active_cvlans); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); return 0; } @@ -333,11 +333,11 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_add_any_vid_rules(priv); } @@ -348,11 +348,11 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_del_any_vid_rules(priv); } @@ -546,7 +546,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (enable_promisc) { mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) @@ -561,7 +561,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (disable_allmulti) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); mlx5e_del_l2_flow_rule(priv, &ea->promisc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5d5d2e50e4bf..c21aa54122dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3260,14 +3260,14 @@ out: return err; } -static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); if (enable) - mlx5e_enable_vlan_filter(priv); + mlx5e_enable_cvlan_filter(priv); else - mlx5e_disable_vlan_filter(priv); + mlx5e_disable_cvlan_filter(priv); return 0; } @@ -3378,7 +3378,7 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_lro); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_FILTER, - set_feature_vlan_filter); + set_feature_cvlan_filter); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, From 355368d530460aa19b9d4291e2da2c6fd8929c76 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 14 Sep 2017 16:24:19 +0300 Subject: [PATCH 1984/2177] net/mlx5e: Add rollback on add VLAN failure When add VLAN rule fails the active vlan bit should be cleared. Fixes: afb736e9330a ("net/mlx5: Ethernet resource handling files") Signed-off-by: Gal Pressman Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 53901537778b..d3d775a93183 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -309,10 +309,15 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); + int err; set_bit(vid, priv->fs.vlan.active_cvlans); - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, priv->fs.vlan.active_cvlans); + + return err; } int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, From 375ef2b1f0d0b43b0d36ffdd521637ff59b0c13c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 17 Sep 2017 13:43:58 +0300 Subject: [PATCH 1985/2177] net: Introduce netdev_*_once functions Extend the net device error logging with netdev_*_once macros. netdev_*_once are the equivalents of the dev_*_once macros which are useful for messages that should only be logged once. Also add netdev_WARN_ONCE, which is the "once" extension for the already existing netdev_WARN macro. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30f0f2928808..79518ede3170 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4336,6 +4336,31 @@ void netdev_notice(const struct net_device *dev, const char *format, ...); __printf(2, 3) void netdev_info(const struct net_device *dev, const char *format, ...); +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) @@ -4376,6 +4401,10 @@ do { \ WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ netdev_reg_state(dev), ##args) +#define netdev_WARN_ONCE(dev, condition, format, arg...) \ + WARN_ONCE(1, "netdevice: %s%s\n" format, netdev_name(dev) \ + netdev_reg_state(dev), ##args) + /* netif printk helpers, similar to netdev_printk */ #define netif_printk(priv, type, level, dev, fmt, args...) \ From 03eda9541f351fb289fe18c87cb111f8bfca9837 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 18 Sep 2017 13:09:18 +0300 Subject: [PATCH 1986/2177] net/mlx5e: Declare bitmap using kernel macro Replace explicit declaration of bitmap with DECLARE_BITMAP kernel macro. Signed-off-by: Gal Pressman Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index eba5db75214b..f952796e9e82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -655,7 +655,7 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; - unsigned long active_cvlans[BITS_TO_LONGS(VLAN_N_VID)]; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; From 7d92d580334a18800aaf66aaf2e103271c48bafb Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 10:36:43 +0300 Subject: [PATCH 1987/2177] net/mlx5e: Add 802.1ad VLAN filter steering rules When a user chooses to use 802.1ad VLAN the proper steering rules will be added to the VLAN flow table (matching the specific S-tag VID). Due to current hardware limitation, when using 802.1ad, we must disable C-tag VLAN stripping on the RQs. Signed-off-by: Gal Pressman Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_fs.c | 102 +++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++++ 3 files changed, 112 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f952796e9e82..2a0739d07a08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -656,7 +656,9 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; struct mlx5_flow_handle *any_svlan_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index d3d775a93183..f771be99329e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -155,6 +155,7 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, }; static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, @@ -174,6 +175,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ rule_p = &priv->fs.vlan.untagged_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -190,6 +195,16 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &priv->fs.vlan.active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, @@ -255,6 +270,12 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.any_svlan_rule = NULL; } break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (priv->fs.vlan.active_svlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]); + priv->fs.vlan.active_svlans_rule[vid] = NULL; + } + break; case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: mlx5e_vport_context_update_vlans(priv); if (priv->fs.vlan.active_cvlans_rule[vid]) { @@ -305,10 +326,8 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) { - struct mlx5e_priv *priv = netdev_priv(dev); int err; set_bit(vid, priv->fs.vlan.active_cvlans); @@ -320,14 +339,48 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, return err; } -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) +{ + struct net_device *netdev = priv->netdev; + int err; + + set_bit(vid, priv->fs.vlan.active_svlans); + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, priv->fs.vlan.active_svlans); + return err; + } + + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->fs.vlan.active_cvlans); + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(priv, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(priv, vid); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + return -EOPNOTSUPP; +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, priv->fs.vlan.active_cvlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, priv->fs.vlan.active_svlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(dev); + } return 0; } @@ -342,6 +395,9 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_add_any_vid_rules(priv); @@ -357,6 +413,9 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_del_any_vid_rules(priv); @@ -550,6 +609,9 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { + if (!priv->channels.params.vlan_strip_disable) + netdev_warn_once(ndev, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); @@ -1270,13 +1332,15 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 3 +#define MLX5E_NUM_VLAN_GROUPS 4 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) -#define MLX5E_VLAN_GROUP1_SIZE BIT(1) -#define MLX5E_VLAN_GROUP2_SIZE BIT(0) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE +\ - MLX5E_VLAN_GROUP2_SIZE) + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -1299,7 +1363,8 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1310,7 +1375,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1319,6 +1384,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c21aa54122dd..59b8a2d62b8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3395,6 +3395,25 @@ static int mlx5e_set_features(struct net_device *netdev, return err ? -EINVAL : 0; } +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mutex_lock(&priv->state_lock); + if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!priv->channels.params.vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + mutex_unlock(&priv->state_lock); + + return features; +} + static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3872,6 +3891,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, @@ -4231,6 +4251,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; netdev->priv_flags |= IFF_UNICAST_FLT; From 4382c7b92a1db397874ca62c73aa8b023af6dba8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 13:22:51 +0300 Subject: [PATCH 1988/2177] net/mlx5e: Add 802.1ad VLAN insertion support Report VLAN insertion support for S-tagged packets and add support by choosing the correct VLAN type in the WQE. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 ++ include/linux/mlx5/qp.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 59b8a2d62b8d..c408b7efa42e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4194,6 +4194,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { netdev->hw_features |= NETIF_F_GSO_PARTIAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index de651de35c9b..c62305b214cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -369,6 +369,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 66d19b611fe4..62af7512dabb 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -221,6 +221,7 @@ enum { }; enum { + MLX5_ETH_WQE_SVLAN = 1 << 0, MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; From f24686e878914c260331b1067898a3925b598c6e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 13:49:59 +0300 Subject: [PATCH 1989/2177] net/mlx5e: Add VLAN offloads statistics The following counters are now exposed through ethtool -S: rx[i]_removed_vlan_packets (per channel) rx_removed_vlan_packets tx[i]_added_vlan_packets (per channel) tx_added_vlan_packets rx_removed_vlan_packets: The number of packets that had their outer VLAN header stripped to the CQE by the hardware. tx_added_vlan_packets: The number of packets that had their outer VLAN header inserted by the hardware. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c408b7efa42e..d2b057a3e512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -196,6 +196,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; @@ -224,6 +225,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_tso_bytes += sq_stats->tso_bytes; s->tx_tso_inner_packets += sq_stats->tso_inner_packets; s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6d7df4750e0f..d2b1549056d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -685,9 +685,11 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (likely(netdev->features & NETIF_F_RXHASH)) mlx5e_skb_set_hash(cqe, skb); - if (cqe_has_vlan(cqe)) + if (cqe_has_vlan(cqe)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->vlan_info)); + rq->stats.removed_vlan_packets++; + } skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 8bc30484ecc1..b74ddc7984bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -42,8 +42,10 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, @@ -733,6 +735,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, @@ -755,6 +758,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index d094663edd9b..d679e21f686e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -59,8 +59,10 @@ struct mlx5e_sw_stats { u64 tx_tso_bytes; u64 tx_tso_inner_packets; u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; @@ -153,6 +155,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 removed_vlan_packets; u64 xdp_drop; u64 xdp_tx; u64 xdp_tx_full; @@ -180,6 +183,7 @@ struct mlx5e_sq_stats { u64 tso_inner_bytes; u64 csum_partial; u64 csum_partial_inner; + u64 added_vlan_packets; u64 nop; /* less likely accessed in data path */ u64 csum_none; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index c62305b214cc..569b42a01026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -361,6 +361,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); ihs += VLAN_HLEN; + sq->stats.added_vlan_packets++; } else { memcpy(eseg->inline_hdr.start, skb_data, ihs); mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); @@ -372,6 +373,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + sq->stats.added_vlan_packets++; } headlen = skb_len - skb->data_len; From f938daeee95eb36ef6b431bf054a5cc6cdada112 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 30 Aug 2017 15:12:45 +0300 Subject: [PATCH 1990/2177] net/mlx5e: CHECKSUM_COMPLETE offload for VLAN/QinQ packets When the VLAN tag is present in the packet buffer (i.e VLAN stripping disabled, QinQ) the driver will currently report CHECKSUM_UNNECESSARY. Instead of using CHECKSUM_COMPLETE offload for packets with first ethertype of IPv4/6, use it for packets with last ethertype of IPv4/6 to cover the former cases as well. The checksum field present in the CQE is calculated from the IP header until the end of the packet. When the first ethertype is different than IPv4/6 (for ex. 802.1Q VLAN) a checksum of the VLAN header/s should be added. The small header/s checksum calculation will allow us to use CHECKSUM_COMPLETE instead of CHECKSUM_UNNECESSARY. Testing bandwidth of one and 8 TCP streams to a single RQ, LRO and VLAN stripping offloads disabled: CPU: Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz NIC: Mellanox Technologies MT27710 Family [ConnectX-4 Lx] Before: +--------------+--------------------+---------------------+----------------------+ | Traffic type | 1 Stream BW [Mbps] | 8 Streams BW [Mbps] | Checksum offload | +--------------+--------------------+---------------------+----------------------+ | Untagged | 28,247.35 | 24,716.88 | CHECKSUM_COMPLETE | | VLAN | 27,516.69 | 23,752.26 | CHECKSUM_UNNECESSARY | | QinQ | 6,961.30 | 20,667.04 | CHECKSUM_UNNECESSARY | +--------------+--------------------+---------------------+----------------------+ Now: +--------------+--------------------+---------------------+-------------------+ | Traffic type | 1 Stream BW [Mbps] | 8 Streams BW [Mbps] | Checksum offload | +--------------+--------------------+---------------------+-------------------+ | Untagged | 28,521.28 | 24,926.32 | CHECKSUM_COMPLETE | | VLAN | 27,389.37 | 23,715.34 | CHECKSUM_COMPLETE | | QinQ | 6,901.77 | 20,845.73 | CHECKSUM_COMPLETE | +--------------+--------------------+---------------------+-------------------+ No performance degradation observed. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d2b1549056d2..a9d08f292fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -563,7 +563,6 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -610,10 +609,11 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); } -static inline bool is_first_ethertype_ip(struct sk_buff *skb) +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) { __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + ethertype = __vlan_get_protocol(skb, ethertype, network_depth); return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } @@ -623,6 +623,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, struct sk_buff *skb, bool lro) { + int network_depth = 0; + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; @@ -632,9 +634,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (is_first_ethertype_ip(skb)) { + if (is_last_ethertype_ip(skb, &network_depth)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); rq->stats.csum_complete++; return; } @@ -664,6 +674,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, struct net_device *netdev = rq->netdev; int lro_num_seg; + skb->mac_len = ETH_HLEN; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); From 72ad533acc22870156736c2fef4674c01307695e Mon Sep 17 00:00:00 2001 From: Mark Greer Date: Thu, 15 Jun 2017 20:34:21 -0700 Subject: [PATCH 1991/2177] NFC: digital: Abort cmd when deactivating target When deactivating an active target, the outstanding command should be aborted. Signed-off-by: Mark Greer Signed-off-by: Samuel Ortiz --- net/nfc/digital_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index de6dd37d04c7..ec0a8998e52d 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev, return; } + digital_abort_cmd(ddev); ddev->curr_protocol = 0; } From 4d63adfe12dd9cb61ed8badb4d798955399048c2 Mon Sep 17 00:00:00 2001 From: Mark Greer Date: Thu, 15 Jun 2017 20:34:22 -0700 Subject: [PATCH 1992/2177] NFC: Add NFC_CMD_DEACTIVATE_TARGET support Once an NFC target (i.e., a tag) is found, it remains active until there is a failure reading or writing it (often caused by the target moving out of range). While the target is active, the NFC adapter and antenna must remain powered. This wastes power when the target remains in range but the client application no longer cares whether it is there or not. To mitigate this, add a new netlink command that allows userspace to deactivate an active target. When issued, this command will cause the NFC subsystem to act as though the target was moved out of range. Once the command has been executed, the client application can power off the NFC adapter to reduce power consumption. Signed-off-by: Mark Greer Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 2 ++ net/nfc/netlink.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 399f39ff8048..f6e3c8c9c744 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -89,6 +89,7 @@ * @NFC_CMD_ACTIVATE_TARGET: Request NFC controller to reactivate target. * @NFC_CMD_VENDOR: Vendor specific command, to be implemented directly * from the driver in order to support hardware specific operations. + * @NFC_CMD_DEACTIVATE_TARGET: Request NFC controller to deactivate target. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -121,6 +122,7 @@ enum nfc_commands { NFC_CMD_SE_IO, NFC_CMD_ACTIVATE_TARGET, NFC_CMD_VENDOR, + NFC_CMD_DEACTIVATE_TARGET, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b251fb936a27..f6359c277212 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) return rc; } +static int nfc_genl_deactivate_target(struct sk_buff *skb, + struct genl_info *info) +{ + struct nfc_dev *dev; + u32 device_idx, target_idx; + int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(device_idx); + if (!dev) + return -ENODEV; + + target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); + + rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); + + nfc_put_device(dev); + return rc; +} + static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; @@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_vendor_cmd, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_DEACTIVATE_TARGET, + .doit = nfc_genl_deactivate_target, + .policy = nfc_genl_policy, + }, }; static struct genl_family nfc_genl_family __ro_after_init = { From 2fd3877b5bb7d39782c3205a1dcda02023b8514a Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:31 +0100 Subject: [PATCH 1993/2177] brcmfmac: handle FWHALT mailbox indication The firmware uses a mailbox to communicate to the host what is going on. In the driver we validate the bit received. Various people seen the following message: brcmfmac: brcmf_sdio_hostmail: Unknown mailbox data content: 0x40012 Bit 4 is cause of this message, but this actually indicates the firmware has halted. Handle this bit by giving a more meaningful error message. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../wireless/broadcom/brcm80211/brcmfmac/sdio.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 5adce0e36fe5..00de73d9e152 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -260,10 +260,11 @@ struct rte_console { #define I_HMB_HOST_INT I_HMB_SW3 /* Miscellaneous Interrupt */ /* tohostmailboxdata */ -#define HMB_DATA_NAKHANDLED 1 /* retransmit NAK'd frame */ -#define HMB_DATA_DEVREADY 2 /* talk to host after enable */ -#define HMB_DATA_FC 4 /* per prio flowcontrol update flag */ -#define HMB_DATA_FWREADY 8 /* fw ready for protocol activity */ +#define HMB_DATA_NAKHANDLED 0x0001 /* retransmit NAK'd frame */ +#define HMB_DATA_DEVREADY 0x0002 /* talk to host after enable */ +#define HMB_DATA_FC 0x0004 /* per prio flowcontrol update flag */ +#define HMB_DATA_FWREADY 0x0008 /* fw ready for protocol activity */ +#define HMB_DATA_FWHALT 0x0010 /* firmware halted */ #define HMB_DATA_FCDATA_MASK 0xff000000 #define HMB_DATA_FCDATA_SHIFT 24 @@ -1094,6 +1095,10 @@ static u32 brcmf_sdio_hostmail(struct brcmf_sdio *bus) offsetof(struct sdpcmd_regs, tosbmailbox)); bus->sdcnt.f1regdata += 2; + /* dongle indicates the firmware has halted/crashed */ + if (hmb_data & HMB_DATA_FWHALT) + brcmf_err("mailbox indicates firmware halted\n"); + /* Dongle recomposed rx frames, accept them again */ if (hmb_data & HMB_DATA_NAKHANDLED) { brcmf_dbg(SDIO, "Dongle reports NAK handled, expect rtx of %d\n", @@ -1151,6 +1156,7 @@ static u32 brcmf_sdio_hostmail(struct brcmf_sdio *bus) HMB_DATA_NAKHANDLED | HMB_DATA_FC | HMB_DATA_FWREADY | + HMB_DATA_FWHALT | HMB_DATA_FCDATA_MASK | HMB_DATA_VERSION_MASK)) brcmf_err("Unknown mailbox data content: 0x%02x\n", hmb_data); From 6c219b0088158da839a5be63c5b3d96c145501d2 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Wed, 8 Nov 2017 14:36:32 +0100 Subject: [PATCH 1994/2177] brcmfmac: disable packet filtering in promiscuous mode Disable arp and nd offload to allow all packets sending to host. Reported-by: Phil Elwell Tested-by: Phil Elwell Reviewed-by: Arend Van Spriel Signed-off-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 41 ------------------- .../broadcom/brcm80211/brcmfmac/core.c | 38 +++++++++++++++++ .../broadcom/brcm80211/brcmfmac/core.h | 1 + 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 4bfd43a80273..051d51d7d935 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -472,47 +472,6 @@ send_key_to_dongle(struct brcmf_if *ifp, struct brcmf_wsec_key *key) return err; } -static s32 -brcmf_configure_arp_nd_offload(struct brcmf_if *ifp, bool enable) -{ - s32 err; - u32 mode; - - if (enable) - mode = BRCMF_ARP_OL_AGENT | BRCMF_ARP_OL_PEER_AUTO_REPLY; - else - mode = 0; - - /* Try to set and enable ARP offload feature, this may fail, then it */ - /* is simply not supported and err 0 will be returned */ - err = brcmf_fil_iovar_int_set(ifp, "arp_ol", mode); - if (err) { - brcmf_dbg(TRACE, "failed to set ARP offload mode to 0x%x, err = %d\n", - mode, err); - err = 0; - } else { - err = brcmf_fil_iovar_int_set(ifp, "arpoe", enable); - if (err) { - brcmf_dbg(TRACE, "failed to configure (%d) ARP offload err = %d\n", - enable, err); - err = 0; - } else - brcmf_dbg(TRACE, "successfully configured (%d) ARP offload to 0x%x\n", - enable, mode); - } - - err = brcmf_fil_iovar_int_set(ifp, "ndoe", enable); - if (err) { - brcmf_dbg(TRACE, "failed to configure (%d) ND offload err = %d\n", - enable, err); - err = 0; - } else - brcmf_dbg(TRACE, "successfully configured (%d) ND offload to 0x%x\n", - enable, mode); - - return err; -} - static void brcmf_cfg80211_update_proto_addr_mode(struct wireless_dev *wdev) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 5cc3a07dda9e..9c7536daf8f7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -71,6 +71,43 @@ struct brcmf_if *brcmf_get_ifp(struct brcmf_pub *drvr, int ifidx) return ifp; } +void brcmf_configure_arp_nd_offload(struct brcmf_if *ifp, bool enable) +{ + s32 err; + u32 mode; + + if (enable) + mode = BRCMF_ARP_OL_AGENT | BRCMF_ARP_OL_PEER_AUTO_REPLY; + else + mode = 0; + + /* Try to set and enable ARP offload feature, this may fail, then it */ + /* is simply not supported and err 0 will be returned */ + err = brcmf_fil_iovar_int_set(ifp, "arp_ol", mode); + if (err) { + brcmf_dbg(TRACE, "failed to set ARP offload mode to 0x%x, err = %d\n", + mode, err); + } else { + err = brcmf_fil_iovar_int_set(ifp, "arpoe", enable); + if (err) { + brcmf_dbg(TRACE, "failed to configure (%d) ARP offload err = %d\n", + enable, err); + } else { + brcmf_dbg(TRACE, "successfully configured (%d) ARP offload to 0x%x\n", + enable, mode); + } + } + + err = brcmf_fil_iovar_int_set(ifp, "ndoe", enable); + if (err) { + brcmf_dbg(TRACE, "failed to configure (%d) ND offload err = %d\n", + enable, err); + } else { + brcmf_dbg(TRACE, "successfully configured (%d) ND offload to 0x%x\n", + enable, mode); + } +} + static void _brcmf_set_multicast_list(struct work_struct *work) { struct brcmf_if *ifp; @@ -134,6 +171,7 @@ static void _brcmf_set_multicast_list(struct work_struct *work) if (err < 0) brcmf_err("Setting BRCMF_C_SET_PROMISC failed, %d\n", err); + brcmf_configure_arp_nd_offload(ifp, !cmd_value); } #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index a4dd313140f3..17085712bae2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -203,6 +203,7 @@ int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp); /* Return pointer to interface name */ char *brcmf_ifname(struct brcmf_if *ifp); struct brcmf_if *brcmf_get_ifp(struct brcmf_pub *drvr, int ifidx); +void brcmf_configure_arp_nd_offload(struct brcmf_if *ifp, bool enable); int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked); struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx, bool is_p2pdev, const char *name, u8 *mac_addr); From 8c6efda22f5f9f73fc948f517424466be01ae84d Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:33 +0100 Subject: [PATCH 1995/2177] brcmfmac: cleanup brcmf_cfg80211_escan() function The function brcmf_cfg80211_escan() was always called with a non-null request parameter and null pointer for this_ssid parameter. Clean up the function removing the dead code path. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 76 +++---------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 051d51d7d935..1283798f0b86 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1072,18 +1072,10 @@ brcmf_do_escan(struct brcmf_if *ifp, struct cfg80211_scan_request *request) static s32 brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, - struct cfg80211_scan_request *request, - struct cfg80211_ssid *this_ssid) + struct cfg80211_scan_request *request) { - struct brcmf_if *ifp = vif->ifp; struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); - struct cfg80211_ssid *ssids; - u32 passive_scan; - bool escan_req; - bool spec_scan; s32 err; - struct brcmf_ssid_le ssid_le; - u32 SSID_len; brcmf_dbg(SCAN, "START ESCAN\n"); @@ -1101,8 +1093,8 @@ brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, cfg->scan_status); return -EAGAIN; } - if (test_bit(BRCMF_VIF_STATUS_CONNECTING, &ifp->vif->sme_state)) { - brcmf_err("Connecting: status (%lu)\n", ifp->vif->sme_state); + if (test_bit(BRCMF_VIF_STATUS_CONNECTING, &vif->sme_state)) { + brcmf_err("Connecting: status (%lu)\n", vif->sme_state); return -EAGAIN; } @@ -1110,63 +1102,17 @@ brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, if (vif == cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif) vif = cfg->p2p.bss_idx[P2PAPI_BSSCFG_PRIMARY].vif; - escan_req = false; - if (request) { - /* scan bss */ - ssids = request->ssids; - escan_req = true; - } else { - /* scan in ibss */ - /* we don't do escan in ibss */ - ssids = this_ssid; - } - cfg->scan_request = request; set_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status); - if (escan_req) { - cfg->escan_info.run = brcmf_run_escan; - err = brcmf_p2p_scan_prep(wiphy, request, vif); - if (err) - goto scan_out; - err = brcmf_do_escan(vif->ifp, request); - if (err) - goto scan_out; - } else { - brcmf_dbg(SCAN, "ssid \"%s\", ssid_len (%d)\n", - ssids->ssid, ssids->ssid_len); - memset(&ssid_le, 0, sizeof(ssid_le)); - SSID_len = min_t(u8, sizeof(ssid_le.SSID), ssids->ssid_len); - ssid_le.SSID_len = cpu_to_le32(0); - spec_scan = false; - if (SSID_len) { - memcpy(ssid_le.SSID, ssids->ssid, SSID_len); - ssid_le.SSID_len = cpu_to_le32(SSID_len); - spec_scan = true; - } else - brcmf_dbg(SCAN, "Broadcast scan\n"); + cfg->escan_info.run = brcmf_run_escan; + err = brcmf_p2p_scan_prep(wiphy, request, vif); + if (err) + goto scan_out; - passive_scan = cfg->active_scan ? 0 : 1; - err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_PASSIVE_SCAN, - passive_scan); - if (err) { - brcmf_err("WLC_SET_PASSIVE_SCAN error (%d)\n", err); - goto scan_out; - } - brcmf_scan_config_mpc(ifp, 0); - err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN, &ssid_le, - sizeof(ssid_le)); - if (err) { - if (err == -EBUSY) - brcmf_dbg(INFO, "BUSY: scan for \"%s\" canceled\n", - ssid_le.SSID); - else - brcmf_err("WLC_SCAN error (%d)\n", err); - - brcmf_scan_config_mpc(ifp, 1); - goto scan_out; - } - } + err = brcmf_do_escan(vif->ifp, request); + if (err) + goto scan_out; /* Arm scan timeout timer */ mod_timer(&cfg->escan_timeout, jiffies + @@ -1191,7 +1137,7 @@ brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) if (!check_vif_up(vif)) return -EIO; - err = brcmf_cfg80211_escan(wiphy, vif, request, NULL); + err = brcmf_cfg80211_escan(wiphy, vif, request); if (err) brcmf_err("scan error (%d)\n", err); From df2d8388bc96c0f29d27d121f2a4cd054f8b3900 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:34 +0100 Subject: [PATCH 1996/2177] brcmfmac: use msecs_to_jiffies() instead of calculation using HZ Minor cleanup using provided macro to convert milliseconds interval to jiffies in brcmf_cfg80211_escan(). Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1283798f0b86..afdf6bb62335 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1115,8 +1115,8 @@ brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, goto scan_out; /* Arm scan timeout timer */ - mod_timer(&cfg->escan_timeout, jiffies + - BRCMF_ESCAN_TIMER_INTERVAL_MS * HZ / 1000); + mod_timer(&cfg->escan_timeout, + jiffies + msecs_to_jiffies(BRCMF_ESCAN_TIMER_INTERVAL_MS)); return 0; From 588378f15cff285ac81c929239ccba01d7f71d50 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:35 +0100 Subject: [PATCH 1997/2177] brcmfmac: get rid of brcmf_cfg80211_escan() function The function brcmf_cfg80211_escan() is only called by brcmf_cfg80211_scan() so there is no reason to split in two function especially since the latter does not do an awful lot. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index afdf6bb62335..31a80a6cf280 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1071,13 +1071,16 @@ brcmf_do_escan(struct brcmf_if *ifp, struct cfg80211_scan_request *request) } static s32 -brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, - struct cfg80211_scan_request *request) +brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); - s32 err; + struct brcmf_cfg80211_vif *vif; + s32 err = 0; - brcmf_dbg(SCAN, "START ESCAN\n"); + brcmf_dbg(TRACE, "Enter\n"); + vif = container_of(request->wdev, struct brcmf_cfg80211_vif, wdev); + if (!check_vif_up(vif)) + return -EIO; if (test_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status)) { brcmf_err("Scanning already: status (%lu)\n", cfg->scan_status); @@ -1102,6 +1105,8 @@ brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, if (vif == cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif) vif = cfg->p2p.bss_idx[P2PAPI_BSSCFG_PRIMARY].vif; + brcmf_dbg(SCAN, "START ESCAN\n"); + cfg->scan_request = request; set_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status); @@ -1121,31 +1126,12 @@ brcmf_cfg80211_escan(struct wiphy *wiphy, struct brcmf_cfg80211_vif *vif, return 0; scan_out: + brcmf_err("scan error (%d)\n", err); clear_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status); cfg->scan_request = NULL; return err; } -static s32 -brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) -{ - struct brcmf_cfg80211_vif *vif; - s32 err = 0; - - brcmf_dbg(TRACE, "Enter\n"); - vif = container_of(request->wdev, struct brcmf_cfg80211_vif, wdev); - if (!check_vif_up(vif)) - return -EIO; - - err = brcmf_cfg80211_escan(wiphy, vif, request); - - if (err) - brcmf_err("scan error (%d)\n", err); - - brcmf_dbg(TRACE, "Exit\n"); - return err; -} - static s32 brcmf_set_rts(struct net_device *ndev, u32 rts_threshold) { s32 err = 0; From bbf35414cd23a9d7230bfd7046e1e2c26020e7eb Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:36 +0100 Subject: [PATCH 1998/2177] brcmfmac: get rid of struct brcmf_cfg80211_info::active_scan field The field struct brcmf_cfg80211_info::active_scan is set to true upon initializing the driver instance, but it is never changed so simply get rid of it. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 10 +--------- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.h | 2 -- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 5 +---- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 31a80a6cf280..9b334e2b39fb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1043,7 +1043,6 @@ brcmf_do_escan(struct brcmf_if *ifp, struct cfg80211_scan_request *request) { struct brcmf_cfg80211_info *cfg = ifp->drvr->config; s32 err; - u32 passive_scan; struct brcmf_scan_results *results; struct escan_info *escan = &cfg->escan_info; @@ -1051,13 +1050,7 @@ brcmf_do_escan(struct brcmf_if *ifp, struct cfg80211_scan_request *request) escan->ifp = ifp; escan->wiphy = cfg->wiphy; escan->escan_state = WL_ESCAN_STATE_SCANNING; - passive_scan = cfg->active_scan ? 0 : 1; - err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_PASSIVE_SCAN, - passive_scan); - if (err) { - brcmf_err("error (%d)\n", err); - return err; - } + brcmf_scan_config_mpc(ifp, 0); results = (struct brcmf_scan_results *)cfg->escan_info.escan_buf; results->version = 0; @@ -5767,7 +5760,6 @@ static s32 wl_init_priv(struct brcmf_cfg80211_info *cfg) cfg->scan_request = NULL; cfg->pwr_save = true; - cfg->active_scan = true; /* we do active scan per default */ cfg->dongle_up = false; /* dongle is not up yet */ err = brcmf_init_priv_mem(cfg); if (err) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h index 7b2835e5e434..b5b5f0f10b63 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h @@ -283,7 +283,6 @@ struct brcmf_cfg80211_wowl { * @scan_status: scan activity on the dongle. * @pub: common driver information. * @channel: current channel. - * @active_scan: current scan mode. * @int_escan_map: bucket map for which internal e-scan is done. * @ibss_starter: indicates this sta is ibss starter. * @pwr_save: indicate whether dongle to support power save mode. @@ -316,7 +315,6 @@ struct brcmf_cfg80211_info { unsigned long scan_status; struct brcmf_pub *pub; u32 channel; - bool active_scan; u32 int_escan_map; bool ibss_starter; bool pwr_save; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 1c450c0727cb..c6cccb8cc26b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -692,10 +692,7 @@ static s32 brcmf_p2p_escan(struct brcmf_p2p_info *p2p, u32 num_chans, /* determine the scan engine parameters */ sparams->bss_type = DOT11_BSSTYPE_ANY; - if (p2p->cfg->active_scan) - sparams->scan_type = 0; - else - sparams->scan_type = 1; + sparams->scan_type = BRCMF_SCANTYPE_ACTIVE; eth_broadcast_addr(sparams->bssid); sparams->home_time = cpu_to_le32(P2PAPI_SCAN_HOME_TIME_MS); From bd99a3013bdc00f8fc7534c657b39616792b4467 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 8 Nov 2017 14:36:37 +0100 Subject: [PATCH 1999/2177] brcmfmac: move configuration of probe request IEs The configuration of the IEs for probe requests was done in a P2P related function, which is not very obvious. Moving it to .scan callback function, ie. brcmf_cfg80211_scan(). Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 +++++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 6 ++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 9b334e2b39fb..6e70df978159 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1108,6 +1108,11 @@ brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) if (err) goto scan_out; + err = brcmf_vif_set_mgmt_ie(vif, BRCMF_VNDR_IE_PRBREQ_FLAG, + request->ie, request->ie_len); + if (err) + goto scan_out; + err = brcmf_do_escan(vif->ifp, request); if (err) goto scan_out; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index c6cccb8cc26b..2ee54133efa1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -881,7 +881,7 @@ int brcmf_p2p_scan_prep(struct wiphy *wiphy, { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_p2p_info *p2p = &cfg->p2p; - int err = 0; + int err; if (brcmf_p2p_scan_is_p2p_request(request)) { /* find my listen channel */ @@ -904,9 +904,7 @@ int brcmf_p2p_scan_prep(struct wiphy *wiphy, /* override .run_escan() callback. */ cfg->escan_info.run = brcmf_p2p_run_escan; } - err = brcmf_vif_set_mgmt_ie(vif, BRCMF_VNDR_IE_PRBREQ_FLAG, - request->ie, request->ie_len); - return err; + return 0; } From e294cbfda05640063a0da3ec69b9b19ed13ea2f8 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:47 -0700 Subject: [PATCH 2000/2177] qtnfmac: use per-band HT/VHT info from wireless device HT/VHT capabilities must be reported per each band supported by a radio, not for all bands on a radio. Furthermore, driver better not assume any capabilities and just use whetever is reported by device itself. To support this, convert "get channels" command into "get band info" command. Difference is that it may also carry HT/VHT capabilities along with channels information. While at it, also add "num_bitrates" field to "get band info" command, for future use. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 30 +------- .../net/wireless/quantenna/qtnfmac/commands.c | 77 +++++++++++++++---- .../net/wireless/quantenna/qtnfmac/commands.h | 4 +- drivers/net/wireless/quantenna/qtnfmac/core.c | 3 +- .../net/wireless/quantenna/qtnfmac/qlink.h | 29 +++---- 5 files changed, 84 insertions(+), 59 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index e70f5bd5e498..2f60331fc0dd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -823,8 +823,7 @@ static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy_in, if (!wiphy->bands[band]) continue; - ret = qtnf_cmd_get_mac_chan_info(mac, - wiphy->bands[band]); + ret = qtnf_cmd_band_info_get(mac, wiphy->bands[band]); if (ret) pr_err("failed to get chan info for mac %u band %u\n", mac_idx, band); @@ -832,33 +831,6 @@ static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy_in, } } -void qtnf_band_setup_htvht_caps(struct qtnf_mac_info *macinfo, - struct ieee80211_supported_band *band) -{ - struct ieee80211_sta_ht_cap *ht_cap; - struct ieee80211_sta_vht_cap *vht_cap; - - ht_cap = &band->ht_cap; - ht_cap->ht_supported = true; - memcpy(&ht_cap->cap, &macinfo->ht_cap.cap_info, - sizeof(u16)); - ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K; - ht_cap->ampdu_density = IEEE80211_HT_MPDU_DENSITY_NONE; - memcpy(&ht_cap->mcs, &macinfo->ht_cap.mcs, - sizeof(ht_cap->mcs)); - - if (macinfo->phymode_cap & QLINK_PHYMODE_AC) { - vht_cap = &band->vht_cap; - vht_cap->vht_supported = true; - memcpy(&vht_cap->cap, - &macinfo->vht_cap.vht_cap_info, sizeof(u32)); - /* Update MCS support for VHT */ - memcpy(&vht_cap->vht_mcs, - &macinfo->vht_cap.supp_mcs, - sizeof(struct ieee80211_vht_mcs_info)); - } -} - struct wiphy *qtnf_wiphy_allocate(struct qtnf_bus *bus) { struct wiphy *wiphy; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index b81f81bd1411..a7422c5a150b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1115,19 +1115,50 @@ qtnf_cmd_resp_proc_mac_info(struct qtnf_wmac *mac, sizeof(mac_info->vht_cap)); } +static void qtnf_cmd_resp_band_fill_htcap(const u8 *info, + struct ieee80211_sta_ht_cap *bcap) +{ + const struct ieee80211_ht_cap *ht_cap = + (const struct ieee80211_ht_cap *)info; + + bcap->ht_supported = true; + bcap->cap = le16_to_cpu(ht_cap->cap_info); + bcap->ampdu_factor = + ht_cap->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR; + bcap->ampdu_density = + (ht_cap->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> + IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; + memcpy(&bcap->mcs, &ht_cap->mcs, sizeof(bcap->mcs)); +} + +static void qtnf_cmd_resp_band_fill_vhtcap(const u8 *info, + struct ieee80211_sta_vht_cap *bcap) +{ + const struct ieee80211_vht_cap *vht_cap = + (const struct ieee80211_vht_cap *)info; + + bcap->vht_supported = true; + bcap->cap = le32_to_cpu(vht_cap->vht_cap_info); + memcpy(&bcap->vht_mcs, &vht_cap->supp_mcs, sizeof(bcap->vht_mcs)); +} + static int -qtnf_cmd_resp_fill_channels_info(struct ieee80211_supported_band *band, - struct qlink_resp_get_chan_info *resp, - size_t payload_len) +qtnf_cmd_resp_fill_band_info(struct ieee80211_supported_band *band, + struct qlink_resp_band_info_get *resp, + size_t payload_len) { u16 tlv_type; size_t tlv_len; + size_t tlv_dlen; const struct qlink_tlv_hdr *tlv; const struct qlink_tlv_channel *qchan; struct ieee80211_channel *chan; unsigned int chidx = 0; u32 qflags; + memset(&band->ht_cap, 0, sizeof(band->ht_cap)); + memset(&band->vht_cap, 0, sizeof(band->vht_cap)); + if (band->channels) { if (band->n_channels == resp->num_chans) { memset(band->channels, 0, @@ -1155,7 +1186,8 @@ qtnf_cmd_resp_fill_channels_info(struct ieee80211_supported_band *band, while (payload_len >= sizeof(*tlv)) { tlv_type = le16_to_cpu(tlv->type); - tlv_len = le16_to_cpu(tlv->len) + sizeof(*tlv); + tlv_dlen = le16_to_cpu(tlv->len); + tlv_len = tlv_dlen + sizeof(*tlv); if (tlv_len > payload_len) { pr_warn("malformed TLV 0x%.2X; LEN: %zu\n", @@ -1241,13 +1273,32 @@ qtnf_cmd_resp_fill_channels_info(struct ieee80211_supported_band *band, chan->hw_value, chan->flags, chan->max_power, chan->max_reg_power); break; + case WLAN_EID_HT_CAPABILITY: + if (unlikely(tlv_dlen != + sizeof(struct ieee80211_ht_cap))) { + pr_err("bad HTCAP TLV len %zu\n", tlv_dlen); + goto error_ret; + } + + qtnf_cmd_resp_band_fill_htcap(tlv->val, &band->ht_cap); + break; + case WLAN_EID_VHT_CAPABILITY: + if (unlikely(tlv_dlen != + sizeof(struct ieee80211_vht_cap))) { + pr_err("bad VHTCAP TLV len %zu\n", tlv_dlen); + goto error_ret; + } + + qtnf_cmd_resp_band_fill_vhtcap(tlv->val, + &band->vht_cap); + break; default: pr_warn("unknown TLV type: %#x\n", tlv_type); break; } payload_len -= tlv_len; - tlv = (struct qlink_tlv_hdr *)((u8 *)tlv + tlv_len); + tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_dlen); } if (payload_len) { @@ -1469,13 +1520,13 @@ out: return ret; } -int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, - struct ieee80211_supported_band *band) +int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, + struct ieee80211_supported_band *band) { struct sk_buff *cmd_skb, *resp_skb = NULL; size_t info_len; - struct qlink_cmd_chans_info_get *cmd; - struct qlink_resp_get_chan_info *resp; + struct qlink_cmd_band_info_get *cmd; + struct qlink_resp_band_info_get *resp; u16 res_code = QLINK_CMD_RESULT_OK; int ret = 0; u8 qband; @@ -1495,12 +1546,12 @@ int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, } cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0, - QLINK_CMD_CHANS_INFO_GET, + QLINK_CMD_BAND_INFO_GET, sizeof(*cmd)); if (!cmd_skb) return -ENOMEM; - cmd = (struct qlink_cmd_chans_info_get *)cmd_skb->data; + cmd = (struct qlink_cmd_band_info_get *)cmd_skb->data; cmd->band = qband; qtnf_bus_lock(mac->bus); @@ -1517,7 +1568,7 @@ int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, goto out; } - resp = (struct qlink_resp_get_chan_info *)resp_skb->data; + resp = (struct qlink_resp_band_info_get *)resp_skb->data; if (resp->band != qband) { pr_err("MAC%u: reply band %u != cmd band %u\n", mac->macid, resp->band, qband); @@ -1525,7 +1576,7 @@ int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, goto out; } - ret = qtnf_cmd_resp_fill_channels_info(band, resp, info_len); + ret = qtnf_cmd_resp_fill_band_info(band, resp, info_len); out: qtnf_bus_unlock(mac->bus); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index e87c4a484dd4..d6fe3cc09fba 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -30,8 +30,8 @@ int qtnf_cmd_send_add_intf(struct qtnf_vif *vif, enum nl80211_iftype iftype, int qtnf_cmd_send_change_intf_type(struct qtnf_vif *vif, enum nl80211_iftype iftype, u8 *mac_addr); int qtnf_cmd_send_del_intf(struct qtnf_vif *vif); -int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac, - struct ieee80211_supported_band *band); +int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, + struct ieee80211_supported_band *band); int qtnf_cmd_send_regulatory_config(struct qtnf_wmac *mac, const char *alpha2); int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, const struct cfg80211_ap_settings *s); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 6a6e5ffb0348..2d2c1ea65cb2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -171,7 +171,7 @@ static int qtnf_mac_init_single_band(struct wiphy *wiphy, wiphy->bands[band]->band = band; - ret = qtnf_cmd_get_mac_chan_info(mac, wiphy->bands[band]); + ret = qtnf_cmd_band_info_get(mac, wiphy->bands[band]); if (ret) { pr_err("MAC%u: band %u: failed to get chans info: %d\n", mac->macid, band, ret); @@ -179,7 +179,6 @@ static int qtnf_mac_init_single_band(struct wiphy *wiphy, } qtnf_band_init_rates(wiphy->bands[band]); - qtnf_band_setup_htvht_caps(&mac->macinfo, wiphy->bands[band]); return 0; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 0f582782682f..dfef7faa6fca 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -168,11 +168,12 @@ struct qlink_auth_encr { * Commands are QLINK messages of type @QLINK_MSG_TYPE_CMD, sent by driver to * wireless network device for processing. Device is expected to send back a * reply message of type &QLINK_MSG_TYPE_CMDRSP, containing at least command - * execution status (one of &enum qlink_cmd_result) at least. Reply message + * execution status (one of &enum qlink_cmd_result). Reply message * may also contain data payload specific to the command type. * - * @QLINK_CMD_CHANS_INFO_GET: for the specified MAC and specified band, get - * number of operational channels and information on each of the channel. + * @QLINK_CMD_BAND_INFO_GET: for the specified MAC and specified band, get + * the band's description including number of operational channels and + * info on each channel, HT/VHT capabilities, supported rates etc. * This command is generic to a specified MAC, interface index must be set * to QLINK_VIFID_RSVD in command header. * @QLINK_CMD_REG_NOTIFY: notify device about regulatory domain change. This @@ -194,7 +195,7 @@ enum qlink_cmd_type { QLINK_CMD_CHANGE_INTF = 0x0017, QLINK_CMD_UPDOWN_INTF = 0x0018, QLINK_CMD_REG_NOTIFY = 0x0019, - QLINK_CMD_CHANS_INFO_GET = 0x001A, + QLINK_CMD_BAND_INFO_GET = 0x001A, QLINK_CMD_CHAN_SWITCH = 0x001B, QLINK_CMD_CHAN_GET = 0x001C, QLINK_CMD_CONFIG_AP = 0x0020, @@ -477,11 +478,11 @@ enum qlink_band { }; /** - * struct qlink_cmd_chans_info_get - data for QLINK_CMD_CHANS_INFO_GET command + * struct qlink_cmd_band_info_get - data for QLINK_CMD_BAND_INFO_GET command * - * @band: a PHY band for which channels info is needed, one of @enum qlink_band + * @band: a PHY band for which information is queried, one of @enum qlink_band */ -struct qlink_cmd_chans_info_get { +struct qlink_cmd_band_info_get { struct qlink_cmd chdr; u8 band; } __packed; @@ -730,17 +731,19 @@ struct qlink_resp_get_sta_info { } __packed; /** - * struct qlink_resp_get_chan_info - response for QLINK_CMD_CHANS_INFO_GET cmd + * struct qlink_resp_band_info_get - response for QLINK_CMD_BAND_INFO_GET cmd * - * @band: frequency band to which channels belong to, one of @enum qlink_band. - * @num_chans: total number of channels info data contained in reply data. - * @info: variable-length channels info. + * @band: frequency band that the response describes, one of @enum qlink_band. + * @num_chans: total number of channels info TLVs contained in reply. + * @num_bitrates: total number of bitrate TLVs contained in reply. + * @info: variable-length info portion. */ -struct qlink_resp_get_chan_info { +struct qlink_resp_band_info_get { struct qlink_resp rhdr; u8 band; u8 num_chans; - u8 rsvd[2]; + u8 num_bitrates; + u8 rsvd[1]; u8 info[0]; } __packed; From d42df85f7d853ae155d7708cc57118a0d6925d4f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:48 -0700 Subject: [PATCH 2001/2177] qtnfmac: initialize HT/VHT caps "can override" masks Information on which HT/VHT capabilities can be overridden is reported per-MAC by wireless device. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 2 ++ drivers/net/wireless/quantenna/qtnfmac/commands.c | 7 ++++--- drivers/net/wireless/quantenna/qtnfmac/core.h | 4 ++-- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 8 ++++---- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 2f60331fc0dd..b6a25409d220 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -925,6 +925,8 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac) wiphy->available_antennas_rx = mac->macinfo.num_rx_chain; wiphy->max_ap_assoc_sta = mac->macinfo.max_ap_assoc_sta; + wiphy->ht_capa_mod_mask = &mac->macinfo.ht_cap_mod_mask; + wiphy->vht_capa_mod_mask = &mac->macinfo.vht_cap_mod_mask; ether_addr_copy(wiphy->perm_addr, mac->macaddr); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index a7422c5a150b..35ff7e87ca00 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1110,9 +1110,10 @@ qtnf_cmd_resp_proc_mac_info(struct qtnf_wmac *mac, qlink_chan_width_mask_to_nl(le16_to_cpu( resp_info->radar_detect_widths)); - memcpy(&mac_info->ht_cap, &resp_info->ht_cap, sizeof(mac_info->ht_cap)); - memcpy(&mac_info->vht_cap, &resp_info->vht_cap, - sizeof(mac_info->vht_cap)); + memcpy(&mac_info->ht_cap_mod_mask, &resp_info->ht_cap_mod_mask, + sizeof(mac_info->ht_cap_mod_mask)); + memcpy(&mac_info->vht_cap_mod_mask, &resp_info->vht_cap_mod_mask, + sizeof(mac_info->vht_cap_mod_mask)); } static void qtnf_cmd_resp_band_fill_htcap(const u8 *info, diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index da2c24e2271d..74b94ce4f426 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -103,8 +103,8 @@ struct qtnf_mac_info { u8 sretry_limit; u8 coverage_class; u8 radar_detect_widths; - struct ieee80211_ht_cap ht_cap; - struct ieee80211_vht_cap vht_cap; + struct ieee80211_ht_cap ht_cap_mod_mask; + struct ieee80211_vht_cap vht_cap_mod_mask; struct ieee80211_iface_limit *limits; size_t n_limits; }; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index dfef7faa6fca..b2c4e27d1dd2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -636,8 +636,8 @@ struct qlink_resp { * specified WMAC). * @num_tx_chain: Number of transmit chains used by WMAC. * @num_rx_chain: Number of receive chains used by WMAC. - * @vht_cap: VHT capabilities. - * @ht_cap: HT capabilities. + * @vht_cap_mod_mask: mask specifying which VHT capabilities can be altered. + * @ht_cap_mod_mask: mask specifying which HT capabilities can be altered. * @bands_cap: wireless bands WMAC can operate in, bitmap of &enum qlink_band. * @phymode_cap: PHY modes WMAC can operate in, bitmap of &enum qlink_phy_mode. * @max_ap_assoc_sta: Maximum number of associations supported by WMAC. @@ -649,8 +649,8 @@ struct qlink_resp_get_mac_info { u8 dev_mac[ETH_ALEN]; u8 num_tx_chain; u8 num_rx_chain; - struct ieee80211_vht_cap vht_cap; - struct ieee80211_ht_cap ht_cap; + struct ieee80211_vht_cap vht_cap_mod_mask; + struct ieee80211_ht_cap ht_cap_mod_mask; u8 bands_cap; u8 phymode_cap; __le16 max_ap_assoc_sta; From d1398b5b34cca945cadf75f29833785cf6a675b1 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:49 -0700 Subject: [PATCH 2002/2177] qtnfmac: get rid of PHYMODE capabilities flags Supported WiFi operation modes are now identified per-band based on HT/VHT capabilities of each band. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 3 --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 1 - drivers/net/wireless/quantenna/qtnfmac/core.h | 1 - drivers/net/wireless/quantenna/qtnfmac/qlink.h | 11 ++--------- 4 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index b6a25409d220..1a0c538a38d2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -891,9 +891,6 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac) if (ret) goto out; - pr_info("MAC%u: phymode=%#x radar=%#x\n", mac->macid, - mac->macinfo.phymode_cap, mac->macinfo.radar_detect_widths); - wiphy->frag_threshold = mac->macinfo.frag_thr; wiphy->rts_threshold = mac->macinfo.rts_thr; wiphy->retry_short = mac->macinfo.sretry_limit; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 35ff7e87ca00..503187a371ae 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1090,7 +1090,6 @@ qtnf_cmd_resp_proc_mac_info(struct qtnf_wmac *mac, mac_info = &mac->macinfo; mac_info->bands_cap = resp_info->bands_cap; - mac_info->phymode_cap = resp_info->phymode_cap; memcpy(&mac_info->dev_mac, &resp_info->dev_mac, sizeof(mac_info->dev_mac)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 74b94ce4f426..1b7bc0318f3e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -92,7 +92,6 @@ struct qtnf_vif { struct qtnf_mac_info { u8 bands_cap; - u8 phymode_cap; u8 dev_mac[ETH_ALEN]; u8 num_tx_chain; u8 num_rx_chain; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index b2c4e27d1dd2..c558d819a966 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -74,12 +74,6 @@ enum qlink_hw_capab { QLINK_HW_CAPAB_STA_INACT_TIMEOUT = BIT(1), }; -enum qlink_phy_mode { - QLINK_PHYMODE_BGN = BIT(0), - QLINK_PHYMODE_AN = BIT(1), - QLINK_PHYMODE_AC = BIT(2), -}; - enum qlink_iface_type { QLINK_IFTYPE_AP = 1, QLINK_IFTYPE_STATION = 2, @@ -639,7 +633,6 @@ struct qlink_resp { * @vht_cap_mod_mask: mask specifying which VHT capabilities can be altered. * @ht_cap_mod_mask: mask specifying which HT capabilities can be altered. * @bands_cap: wireless bands WMAC can operate in, bitmap of &enum qlink_band. - * @phymode_cap: PHY modes WMAC can operate in, bitmap of &enum qlink_phy_mode. * @max_ap_assoc_sta: Maximum number of associations supported by WMAC. * @radar_detect_widths: bitmask of channels BW for which WMAC can detect radar. * @var_info: variable-length WMAC info data. @@ -651,10 +644,10 @@ struct qlink_resp_get_mac_info { u8 num_rx_chain; struct ieee80211_vht_cap vht_cap_mod_mask; struct ieee80211_ht_cap ht_cap_mod_mask; - u8 bands_cap; - u8 phymode_cap; __le16 max_ap_assoc_sta; __le16 radar_detect_widths; + u8 bands_cap; + u8 rsvd[1]; u8 var_info[0]; } __packed; From 18b7470f92dfbea3e5ef82cab70edfc559d46735 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:50 -0700 Subject: [PATCH 2003/2177] qtnfmac: extend "IE set" TLV to include frame type info Specifying frame type for "IE set" TLV will allow to use several TLVs in a single message. Modify users accordingly. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/commands.c | 25 ++++++-- .../net/wireless/quantenna/qtnfmac/event.c | 58 +++++++++++-------- .../net/wireless/quantenna/qtnfmac/qlink.h | 27 +++++++++ 3 files changed, 80 insertions(+), 30 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 503187a371ae..8d3cdba99390 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -147,6 +147,21 @@ static struct sk_buff *qtnf_cmd_alloc_new_cmdskb(u8 macid, u8 vifid, u16 cmd_no, return cmd_skb; } +static void qtnf_cmd_tlv_ie_set_add(struct sk_buff *cmd_skb, u8 frame_type, + const u8 *buf, size_t len) +{ + struct qlink_tlv_ie_set *tlv; + + tlv = (struct qlink_tlv_ie_set *)skb_put(cmd_skb, sizeof(*tlv) + len); + tlv->hdr.type = cpu_to_le16(QTN_TLV_ID_IE_SET); + tlv->hdr.len = cpu_to_le16(len + sizeof(*tlv) - sizeof(tlv->hdr)); + tlv->type = frame_type; + tlv->flags = 0; + + if (len && buf) + memcpy(tlv->ie_data, buf, len); +} + int qtnf_cmd_send_start_ap(struct qtnf_vif *vif) { struct sk_buff *cmd_skb; @@ -2028,9 +2043,8 @@ int qtnf_cmd_send_scan(struct qtnf_wmac *mac) } if (scan_req->ie_len != 0) - qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_IE_SET, - scan_req->ie, - scan_req->ie_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_PROBE_REQ, + scan_req->ie, scan_req->ie_len); if (scan_req->n_channels) { n_channels = scan_req->n_channels; @@ -2154,9 +2168,8 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, sme->ssid_len); if (sme->ie_len != 0) - qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_IE_SET, - sme->ie, - sme->ie_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_ASSOC_REQ, + sme->ie, sme->ie_len); qtnf_bus_lock(vif->mac->bus); diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 4b29f9fb9c3c..b67a082eed69 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -65,34 +65,39 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif, sinfo.assoc_req_ies_len = 0; payload_len = len - sizeof(*sta_assoc); - tlv = (struct qlink_tlv_hdr *)sta_assoc->ies; + tlv = (const struct qlink_tlv_hdr *)sta_assoc->ies; - while (payload_len >= sizeof(struct qlink_tlv_hdr)) { + while (payload_len >= sizeof(*tlv)) { tlv_type = le16_to_cpu(tlv->type); tlv_value_len = le16_to_cpu(tlv->len); tlv_full_len = tlv_value_len + sizeof(struct qlink_tlv_hdr); - if (tlv_full_len > payload_len) { - pr_warn("VIF%u.%u: malformed TLV 0x%.2X; LEN: %u\n", - mac->macid, vif->vifid, tlv_type, - tlv_value_len); + if (tlv_full_len > payload_len) return -EINVAL; - } if (tlv_type == QTN_TLV_ID_IE_SET) { - sinfo.assoc_req_ies = tlv->val; - sinfo.assoc_req_ies_len = tlv_value_len; + const struct qlink_tlv_ie_set *ie_set; + unsigned int ie_len; + + if (payload_len < sizeof(*ie_set)) + return -EINVAL; + + ie_set = (const struct qlink_tlv_ie_set *)tlv; + ie_len = tlv_value_len - + (sizeof(*ie_set) - sizeof(ie_set->hdr)); + + if (ie_set->type == QLINK_IE_SET_ASSOC_REQ && ie_len) { + sinfo.assoc_req_ies = ie_set->ie_data; + sinfo.assoc_req_ies_len = ie_len; + } } payload_len -= tlv_full_len; tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len); } - if (payload_len) { - pr_warn("VIF%u.%u: malformed TLV buf; bytes left: %zu\n", - mac->macid, vif->vifid, payload_len); + if (payload_len) return -EINVAL; - } cfg80211_new_sta(vif->netdev, sta_assoc->sta_addr, &sinfo, GFP_KERNEL); @@ -289,27 +294,32 @@ qtnf_event_handle_scan_results(struct qtnf_vif *vif, tlv_value_len = le16_to_cpu(tlv->len); tlv_full_len = tlv_value_len + sizeof(struct qlink_tlv_hdr); - if (tlv_full_len > payload_len) { - pr_warn("VIF%u.%u: malformed TLV 0x%.2X; LEN: %u\n", - vif->mac->macid, vif->vifid, tlv_type, - tlv_value_len); + if (tlv_full_len > payload_len) return -EINVAL; - } if (tlv_type == QTN_TLV_ID_IE_SET) { - ies = tlv->val; - ies_len = tlv_value_len; + const struct qlink_tlv_ie_set *ie_set; + unsigned int ie_len; + + if (payload_len < sizeof(*ie_set)) + return -EINVAL; + + ie_set = (const struct qlink_tlv_ie_set *)tlv; + ie_len = tlv_value_len - + (sizeof(*ie_set) - sizeof(ie_set->hdr)); + + if (ie_len) { + ies = ie_set->ie_data; + ies_len = ie_len; + } } payload_len -= tlv_full_len; tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len); } - if (payload_len) { - pr_warn("VIF%u.%u: malformed TLV buf; bytes left: %zu\n", - vif->mac->macid, vif->vifid, payload_len); + if (payload_len) return -EINVAL; - } bss = cfg80211_inform_bss(wiphy, channel, frame_type, sr->bssid, get_unaligned_le64(&sr->tsf), diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index c558d819a966..f491942eb6d0 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -1147,6 +1147,33 @@ struct qlink_tlv_chandef { struct qlink_chandef chan; } __packed; +enum qlink_ie_set_type { + QLINK_IE_SET_UNKNOWN, + QLINK_IE_SET_ASSOC_REQ, + QLINK_IE_SET_ASSOC_RESP, + QLINK_IE_SET_PROBE_REQ, + QLINK_IE_SET_SCAN, + QLINK_IE_SET_BEACON_HEAD, + QLINK_IE_SET_BEACON_TAIL, + QLINK_IE_SET_BEACON_IES, + QLINK_IE_SET_PROBE_RESP, + QLINK_IE_SET_PROBE_RESP_IES, +}; + +/** + * struct qlink_tlv_ie_set - data for QTN_TLV_ID_IE_SET + * + * @type: type of MGMT frame IEs belong to, one of &enum qlink_ie_set_type. + * @flags: for future use. + * @ie_data: IEs data. + */ +struct qlink_tlv_ie_set { + struct qlink_tlv_hdr hdr; + u8 type; + u8 flags; + u8 ie_data[0]; +} __packed; + struct qlink_chan_stats { __le32 chan_num; __le32 cca_tx; From 5face518d4467868e7a82a321955236b2bb65bdc Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:51 -0700 Subject: [PATCH 2004/2177] qtnfmac: SCAN results: retreive frame type information from "IE set" TLV "IE set" TLV carries the same information as qlink_event_scan_result::frame_type. Convert the event to make use of TLV and drop frame_type member. While at it, make qlink_event_scan_result structure alignement-safe. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/event.c | 25 +++++++++---------- .../net/wireless/quantenna/qtnfmac/qlink.h | 11 ++------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index b67a082eed69..4abc6d9ed560 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -252,13 +252,12 @@ qtnf_event_handle_scan_results(struct qtnf_vif *vif, struct cfg80211_bss *bss; struct ieee80211_channel *channel; struct wiphy *wiphy = priv_to_wiphy(vif->mac); - enum cfg80211_bss_frame_type frame_type; + enum cfg80211_bss_frame_type frame_type = CFG80211_BSS_FTYPE_UNKNOWN; size_t payload_len; u16 tlv_type; u16 tlv_value_len; size_t tlv_full_len; const struct qlink_tlv_hdr *tlv; - const u8 *ies = NULL; size_t ies_len = 0; @@ -275,17 +274,6 @@ qtnf_event_handle_scan_results(struct qtnf_vif *vif, return -EINVAL; } - switch (sr->frame_type) { - case QLINK_BSS_FTYPE_BEACON: - frame_type = CFG80211_BSS_FTYPE_BEACON; - break; - case QLINK_BSS_FTYPE_PRESP: - frame_type = CFG80211_BSS_FTYPE_PRESP; - break; - default: - frame_type = CFG80211_BSS_FTYPE_UNKNOWN; - } - payload_len = len - sizeof(*sr); tlv = (struct qlink_tlv_hdr *)sr->payload; @@ -308,6 +296,17 @@ qtnf_event_handle_scan_results(struct qtnf_vif *vif, ie_len = tlv_value_len - (sizeof(*ie_set) - sizeof(ie_set->hdr)); + switch (ie_set->type) { + case QLINK_IE_SET_BEACON_IES: + frame_type = CFG80211_BSS_FTYPE_BEACON; + break; + case QLINK_IE_SET_PROBE_RESP_IES: + frame_type = CFG80211_BSS_FTYPE_PRESP; + break; + default: + frame_type = CFG80211_BSS_FTYPE_UNKNOWN; + } + if (ie_len) { ies = ie_set->ie_data; ies_len = ie_len; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index f491942eb6d0..81effd21075c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -881,12 +881,6 @@ struct qlink_event_rxmgmt { u8 frame_data[0]; } __packed; -enum qlink_frame_type { - QLINK_BSS_FTYPE_UNKNOWN, - QLINK_BSS_FTYPE_BEACON, - QLINK_BSS_FTYPE_PRESP, -}; - /** * struct qlink_event_scan_result - data for QLINK_EVENT_SCAN_RESULTS event * @@ -896,7 +890,6 @@ enum qlink_frame_type { * @capab: capabilities field. * @bintval: beacon interval announced by discovered BSS. * @signal: signal strength. - * @frame_type: frame type used to get scan result, see &enum qlink_frame_type. * @bssid: BSSID announced by discovered BSS. * @ssid_len: length of SSID announced by BSS. * @ssid: SSID announced by discovered BSS. @@ -909,10 +902,10 @@ struct qlink_event_scan_result { __le16 capab; __le16 bintval; s8 signal; - u8 frame_type; - u8 bssid[ETH_ALEN]; u8 ssid_len; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 bssid[ETH_ALEN]; + u8 rsvd[2]; u8 payload[0]; } __packed; From 4d1f0fabdc4515db1b57da2dfdfdeb931cd49e92 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:52 -0700 Subject: [PATCH 2005/2177] qtnfmac: convert "Append IEs" command to QTN_TLV_ID_IE_SET usage Data contained within "Append IEs" command (QLINK_CMD_MGMT_SET_APPIE) duplicates QTN_TLV_ID_IE_SET TLV. Convert the command to use that TLV instead. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 12 ++++++------ .../net/wireless/quantenna/qtnfmac/commands.c | 17 ++++------------- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 15 --------------- 3 files changed, 10 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 1a0c538a38d2..27b11dc1e6fd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -217,10 +217,10 @@ static int qtnf_mgmt_set_appie(struct qtnf_vif *vif, int ret = 0; if (!info->beacon_ies || !info->beacon_ies_len) { - ret = qtnf_cmd_send_mgmt_set_appie(vif, QLINK_MGMT_FRAME_BEACON, + ret = qtnf_cmd_send_mgmt_set_appie(vif, QLINK_IE_SET_BEACON_IES, NULL, 0); } else { - ret = qtnf_cmd_send_mgmt_set_appie(vif, QLINK_MGMT_FRAME_BEACON, + ret = qtnf_cmd_send_mgmt_set_appie(vif, QLINK_IE_SET_BEACON_IES, info->beacon_ies, info->beacon_ies_len); } @@ -230,11 +230,11 @@ static int qtnf_mgmt_set_appie(struct qtnf_vif *vif, if (!info->proberesp_ies || !info->proberesp_ies_len) { ret = qtnf_cmd_send_mgmt_set_appie(vif, - QLINK_MGMT_FRAME_PROBE_RESP, + QLINK_IE_SET_PROBE_RESP_IES, NULL, 0); } else { ret = qtnf_cmd_send_mgmt_set_appie(vif, - QLINK_MGMT_FRAME_PROBE_RESP, + QLINK_IE_SET_PROBE_RESP_IES, info->proberesp_ies, info->proberesp_ies_len); } @@ -244,11 +244,11 @@ static int qtnf_mgmt_set_appie(struct qtnf_vif *vif, if (!info->assocresp_ies || !info->assocresp_ies_len) { ret = qtnf_cmd_send_mgmt_set_appie(vif, - QLINK_MGMT_FRAME_ASSOC_RESP, + QLINK_IE_SET_ASSOC_RESP, NULL, 0); } else { ret = qtnf_cmd_send_mgmt_set_appie(vif, - QLINK_MGMT_FRAME_ASSOC_RESP, + QLINK_IE_SET_ASSOC_RESP, info->assocresp_ies, info->assocresp_ies_len); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 8d3cdba99390..294418c7e59b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -395,11 +395,10 @@ int qtnf_cmd_send_mgmt_set_appie(struct qtnf_vif *vif, u8 frame_type, const u8 *buf, size_t len) { struct sk_buff *cmd_skb; - struct qlink_cmd_mgmt_append_ie *cmd; u16 res_code = QLINK_CMD_RESULT_OK; int ret; - if (sizeof(*cmd) + len > QTNF_MAX_CMD_BUF_SIZE) { + if (len > QTNF_MAX_CMD_BUF_SIZE) { pr_warn("VIF%u.%u: %u frame is too big: %zu\n", vif->mac->macid, vif->vifid, frame_type, len); return -E2BIG; @@ -407,22 +406,14 @@ int qtnf_cmd_send_mgmt_set_appie(struct qtnf_vif *vif, u8 frame_type, cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, QLINK_CMD_MGMT_SET_APPIE, - sizeof(*cmd)); + sizeof(struct qlink_cmd)); if (unlikely(!cmd_skb)) return -ENOMEM; + qtnf_cmd_tlv_ie_set_add(cmd_skb, frame_type, buf, len); + qtnf_bus_lock(vif->mac->bus); - cmd = (struct qlink_cmd_mgmt_append_ie *)cmd_skb->data; - cmd->type = frame_type; - cmd->flags = 0; - - /* If len == 0 then IE buf for specified frame type - * should be cleared on EP. - */ - if (len && buf) - qtnf_cmd_skb_put_buffer(cmd_skb, buf, len); - ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); if (unlikely(ret)) diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 81effd21075c..43ce45106aad 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -298,21 +298,6 @@ struct qlink_cmd_mgmt_frame_tx { u8 frame_data[0]; } __packed; -/** - * struct qlink_cmd_mgmt_append_ie - data for QLINK_CMD_MGMT_SET_APPIE command - * - * @type: type of MGMT frame to appent requested IEs to, one of - * &enum qlink_mgmt_frame_type. - * @flags: for future use. - * @ie_data: IEs data to append. - */ -struct qlink_cmd_mgmt_append_ie { - struct qlink_cmd chdr; - u8 type; - u8 flags; - u8 ie_data[0]; -} __packed; - /** * struct qlink_cmd_get_sta_info - data for QLINK_CMD_GET_STA_INFO command * From 17011da0b8f079b1f6458893d4140cf65a265536 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:53 -0700 Subject: [PATCH 2006/2177] qtnfmac: configure and start AP interface with a single command Current logic artificially divides "start AP" procedure into three stages: - generic interface configuration (security, channel etc) - IE's processing - enable AP mode on interface This separation would not allow to do a proper device configuration as first stage needs to use information from IEs that are processed on a second stage. Which means first and second stages have to be meged. In that case there is no point anymore to keep third stage either, so merge all three into a single command. This new command carries all the same info as contained in "struct cfg80211_ap_settings". Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 17 +---- .../net/wireless/quantenna/qtnfmac/commands.c | 72 +++++++++++-------- .../net/wireless/quantenna/qtnfmac/commands.h | 5 +- .../net/wireless/quantenna/qtnfmac/qlink.h | 5 +- 4 files changed, 47 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 27b11dc1e6fd..7d6dc76c930a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -271,26 +271,11 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; - ret = qtnf_cmd_send_config_ap(vif, settings); - if (ret) { - pr_err("VIF%u.%u: failed to push config to FW\n", - vif->mac->macid, vif->vifid); - goto out; - } - - ret = qtnf_mgmt_set_appie(vif, &settings->beacon); - if (ret) { - pr_err("VIF%u.%u: failed to add IEs to beacon\n", - vif->mac->macid, vif->vifid); - goto out; - } - - ret = qtnf_cmd_send_start_ap(vif); + ret = qtnf_cmd_send_start_ap(vif, settings); if (ret) pr_err("VIF%u.%u: failed to start AP\n", vif->mac->macid, vif->vifid); -out: return ret; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 294418c7e59b..5186116d653f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -162,56 +162,51 @@ static void qtnf_cmd_tlv_ie_set_add(struct sk_buff *cmd_skb, u8 frame_type, memcpy(tlv->ie_data, buf, len); } -int qtnf_cmd_send_start_ap(struct qtnf_vif *vif) +static bool qtnf_cmd_start_ap_can_fit(const struct qtnf_vif *vif, + const struct cfg80211_ap_settings *s) { - struct sk_buff *cmd_skb; - u16 res_code = QLINK_CMD_RESULT_OK; - int ret; + unsigned int len = sizeof(struct qlink_cmd_start_ap); - cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, - QLINK_CMD_START_AP, - sizeof(struct qlink_cmd)); - if (unlikely(!cmd_skb)) - return -ENOMEM; + len += s->ssid_len; + len += s->beacon.head_len; + len += s->beacon.tail_len; + len += s->beacon.beacon_ies_len; + len += s->beacon.proberesp_ies_len; + len += s->beacon.assocresp_ies_len; + len += s->beacon.probe_resp_len; - qtnf_bus_lock(vif->mac->bus); + if (cfg80211_chandef_valid(&s->chandef)) + len += sizeof(struct qlink_tlv_chandef); - ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); - - if (unlikely(ret)) - goto out; - - if (unlikely(res_code != QLINK_CMD_RESULT_OK)) { - pr_err("VIF%u.%u: CMD failed: %u\n", vif->mac->macid, - vif->vifid, res_code); - ret = -EFAULT; - goto out; + if (len > (sizeof(struct qlink_cmd) + QTNF_MAX_CMD_BUF_SIZE)) { + pr_err("VIF%u.%u: can not fit AP settings: %u\n", + vif->mac->macid, vif->vifid, len); + return false; } - netif_carrier_on(vif->netdev); - -out: - qtnf_bus_unlock(vif->mac->bus); - return ret; + return true; } -int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, - const struct cfg80211_ap_settings *s) +int qtnf_cmd_send_start_ap(struct qtnf_vif *vif, + const struct cfg80211_ap_settings *s) { struct sk_buff *cmd_skb; - struct qlink_cmd_config_ap *cmd; + struct qlink_cmd_start_ap *cmd; struct qlink_auth_encr *aen; u16 res_code = QLINK_CMD_RESULT_OK; int ret; int i; + if (!qtnf_cmd_start_ap_can_fit(vif, s)) + return -E2BIG; + cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, - QLINK_CMD_CONFIG_AP, + QLINK_CMD_START_AP, sizeof(*cmd)); if (unlikely(!cmd_skb)) return -ENOMEM; - cmd = (struct qlink_cmd_config_ap *)cmd_skb->data; + cmd = (struct qlink_cmd_start_ap *)cmd_skb->data; cmd->dtim_period = s->dtim_period; cmd->beacon_interval = cpu_to_le16(s->beacon_interval); cmd->hidden_ssid = qlink_hidden_ssid_nl2q(s->hidden_ssid); @@ -256,6 +251,21 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, qlink_chandef_cfg2q(&s->chandef, &chtlv->chan); } + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_BEACON_HEAD, + s->beacon.head, s->beacon.head_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_BEACON_TAIL, + s->beacon.tail, s->beacon.tail_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_BEACON_IES, + s->beacon.beacon_ies, s->beacon.beacon_ies_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_PROBE_RESP, + s->beacon.probe_resp, s->beacon.probe_resp_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_PROBE_RESP_IES, + s->beacon.proberesp_ies, + s->beacon.proberesp_ies_len); + qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_ASSOC_RESP, + s->beacon.assocresp_ies, + s->beacon.assocresp_ies_len); + qtnf_bus_lock(vif->mac->bus); ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); @@ -270,6 +280,8 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, goto out; } + netif_carrier_on(vif->netdev); + out: qtnf_bus_unlock(vif->mac->bus); return ret; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index d6fe3cc09fba..d981a76e5835 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -33,9 +33,8 @@ int qtnf_cmd_send_del_intf(struct qtnf_vif *vif); int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, struct ieee80211_supported_band *band); int qtnf_cmd_send_regulatory_config(struct qtnf_wmac *mac, const char *alpha2); -int qtnf_cmd_send_config_ap(struct qtnf_vif *vif, - const struct cfg80211_ap_settings *s); -int qtnf_cmd_send_start_ap(struct qtnf_vif *vif); +int qtnf_cmd_send_start_ap(struct qtnf_vif *vif, + const struct cfg80211_ap_settings *s); int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif); int qtnf_cmd_send_register_mgmt(struct qtnf_vif *vif, u16 frame_type, bool reg); int qtnf_cmd_send_mgmt_frame(struct qtnf_vif *vif, u32 cookie, u16 flags, diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 43ce45106aad..e07f776fe928 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -192,7 +192,6 @@ enum qlink_cmd_type { QLINK_CMD_BAND_INFO_GET = 0x001A, QLINK_CMD_CHAN_SWITCH = 0x001B, QLINK_CMD_CHAN_GET = 0x001C, - QLINK_CMD_CONFIG_AP = 0x0020, QLINK_CMD_START_AP = 0x0021, QLINK_CMD_STOP_AP = 0x0022, QLINK_CMD_GET_STA_INFO = 0x0030, @@ -542,7 +541,7 @@ enum qlink_hidden_ssid { }; /** - * struct qlink_cmd_config_ap - data for QLINK_CMD_CONFIG_AP command + * struct qlink_cmd_start_ap - data for QLINK_CMD_START_AP command * * @beacon_interval: beacon interval * @inactivity_timeout: station's inactivity period in seconds @@ -554,7 +553,7 @@ enum qlink_hidden_ssid { * @aen: encryption info * @info: variable configurations */ -struct qlink_cmd_config_ap { +struct qlink_cmd_start_ap { struct qlink_cmd chdr; __le16 beacon_interval; __le16 inactivity_timeout; From a3945f43761c3e15bebe56e5eb5674caf00f0d32 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:54 -0700 Subject: [PATCH 2007/2177] qtnfmac: include HTCAP and VHTCAP into config AP command Include HT/VHT caps directly into command so that they won't have to be searched for in IEs. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/commands.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 5186116d653f..c2f0b750365b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -266,6 +266,24 @@ int qtnf_cmd_send_start_ap(struct qtnf_vif *vif, s->beacon.assocresp_ies, s->beacon.assocresp_ies_len); + if (s->ht_cap) { + struct qlink_tlv_hdr *tlv = (struct qlink_tlv_hdr *) + skb_put(cmd_skb, sizeof(*tlv) + sizeof(*s->ht_cap)); + + tlv->type = cpu_to_le16(WLAN_EID_HT_CAPABILITY); + tlv->len = cpu_to_le16(sizeof(*s->ht_cap)); + memcpy(tlv->val, s->ht_cap, sizeof(*s->ht_cap)); + } + + if (s->vht_cap) { + struct qlink_tlv_hdr *tlv = (struct qlink_tlv_hdr *) + skb_put(cmd_skb, sizeof(*tlv) + sizeof(*s->vht_cap)); + + tlv->type = cpu_to_le16(WLAN_EID_VHT_CAPABILITY); + tlv->len = cpu_to_le16(sizeof(*s->vht_cap)); + memcpy(tlv->val, s->vht_cap, sizeof(*s->vht_cap)); + } + qtnf_bus_lock(vif->mac->bus); ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); From c9889671736cbc6324aa31cffab5fed0e3f74a2f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Mon, 30 Oct 2017 18:04:55 -0700 Subject: [PATCH 2008/2177] qtnfmac: pass all CONNECT cmd params to wireless card for processing Specifically, following parameters are needed for wireless device configuration but were not available to it before: - HT/VHT capabilities and capabilities masks. - full channel info (not just IEEE number) - BSSID hint - previous BSSID for reassoc request Move Management Frame Protection setting from common encr info structure into STA-specific .connect command parameters. Make sure that all new qlink structure definitions are alignment-safe. Signed-off-by: Igor Mitsyanko Reviewed-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/commands.c | 63 ++++++++++++------- .../net/wireless/quantenna/qtnfmac/qlink.h | 36 ++++++++--- 2 files changed, 67 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index c2f0b750365b..8bc8dd637315 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -221,7 +221,6 @@ int qtnf_cmd_send_start_ap(struct qtnf_vif *vif, aen = &cmd->aen; aen->auth_type = s->auth_type; aen->privacy = !!s->privacy; - aen->mfp = 0; aen->wpa_versions = cpu_to_le32(s->crypto.wpa_versions); aen->cipher_group = cpu_to_le32(s->crypto.cipher_group); aen->n_ciphers_pairwise = cpu_to_le32(s->crypto.n_ciphers_pairwise); @@ -2029,17 +2028,36 @@ out: return ret; } +static void qtnf_cmd_channel_tlv_add(struct sk_buff *cmd_skb, + const struct ieee80211_channel *sc) +{ + struct qlink_tlv_channel *qchan; + u32 flags = 0; + + qchan = skb_put_zero(cmd_skb, sizeof(*qchan)); + qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL); + qchan->hdr.len = cpu_to_le16(sizeof(*qchan) - sizeof(qchan->hdr)); + qchan->center_freq = cpu_to_le16(sc->center_freq); + qchan->hw_value = cpu_to_le16(sc->hw_value); + + if (sc->flags & IEEE80211_CHAN_NO_IR) + flags |= QLINK_CHAN_NO_IR; + + if (sc->flags & IEEE80211_CHAN_RADAR) + flags |= QLINK_CHAN_RADAR; + + qchan->flags = cpu_to_le32(flags); +} + int qtnf_cmd_send_scan(struct qtnf_wmac *mac) { struct sk_buff *cmd_skb; u16 res_code = QLINK_CMD_RESULT_OK; struct ieee80211_channel *sc; struct cfg80211_scan_request *scan_req = mac->scan_req; - struct qlink_tlv_channel *qchan; int n_channels; int count = 0; int ret; - u32 flags; if (scan_req->n_ssids > QTNF_MAX_SSID_LIST_LENGTH) { pr_err("MAC%u: too many SSIDs in scan request\n", mac->macid); @@ -2081,22 +2099,8 @@ int qtnf_cmd_send_scan(struct qtnf_wmac *mac) pr_debug("MAC%u: scan chan=%d, freq=%d, flags=%#x\n", mac->macid, sc->hw_value, sc->center_freq, sc->flags); - qchan = skb_put_zero(cmd_skb, sizeof(*qchan)); - flags = 0; - qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL); - qchan->hdr.len = cpu_to_le16(sizeof(*qchan) - - sizeof(struct qlink_tlv_hdr)); - qchan->center_freq = cpu_to_le16(sc->center_freq); - qchan->hw_value = cpu_to_le16(sc->hw_value); - - if (sc->flags & IEEE80211_CHAN_NO_IR) - flags |= QLINK_CHAN_NO_IR; - - if (sc->flags & IEEE80211_CHAN_RADAR) - flags |= QLINK_CHAN_RADAR; - - qchan->flags = cpu_to_le32(flags); + qtnf_cmd_channel_tlv_add(cmd_skb, sc); n_channels--; count++; } @@ -2140,10 +2144,15 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, ether_addr_copy(cmd->bssid, vif->bssid); - if (sme->channel) - cmd->channel = cpu_to_le16(sme->channel->hw_value); + if (sme->bssid_hint) + ether_addr_copy(cmd->bssid_hint, sme->bssid_hint); else - cmd->channel = 0; + eth_zero_addr(cmd->bssid_hint); + + if (sme->prev_bssid) + ether_addr_copy(cmd->prev_bssid, sme->prev_bssid); + else + eth_zero_addr(cmd->prev_bssid); if ((sme->bg_scan_period > 0) && (sme->bg_scan_period <= QTNF_MAX_BG_SCAN_PERIOD)) @@ -2161,11 +2170,18 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, connect_flags |= QLINK_STA_CONNECT_USE_RRM; cmd->flags = cpu_to_le32(connect_flags); + memcpy(&cmd->ht_capa, &sme->ht_capa, sizeof(cmd->ht_capa)); + memcpy(&cmd->ht_capa_mask, &sme->ht_capa_mask, + sizeof(cmd->ht_capa_mask)); + memcpy(&cmd->vht_capa, &sme->vht_capa, sizeof(cmd->vht_capa)); + memcpy(&cmd->vht_capa_mask, &sme->vht_capa_mask, + sizeof(cmd->vht_capa_mask)); + cmd->pbss = sme->pbss; aen = &cmd->aen; aen->auth_type = sme->auth_type; aen->privacy = !!sme->privacy; - aen->mfp = sme->mfp; + cmd->mfp = sme->mfp; aen->wpa_versions = cpu_to_le32(sme->crypto.wpa_versions); aen->cipher_group = cpu_to_le32(sme->crypto.cipher_group); aen->n_ciphers_pairwise = cpu_to_le32(sme->crypto.n_ciphers_pairwise); @@ -2192,6 +2208,9 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, qtnf_cmd_tlv_ie_set_add(cmd_skb, QLINK_IE_SET_ASSOC_REQ, sme->ie, sme->ie_len); + if (sme->channel) + qtnf_cmd_channel_tlv_add(cmd_skb, sme->channel); + qtnf_bus_lock(vif->mac->bus); ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code); diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index e07f776fe928..a432fb001c41 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -19,7 +19,7 @@ #include -#define QLINK_PROTO_VER 5 +#define QLINK_PROTO_VER 6 #define QLINK_MACID_RSVD 0xFF #define QLINK_VIFID_RSVD 0xFF @@ -148,9 +148,9 @@ struct qlink_auth_encr { __le16 control_port_ethertype; u8 auth_type; u8 privacy; - u8 mfp; u8 control_port; u8 control_port_no_encrypt; + u8 rsvd[2]; } __packed; /* QLINK Command messages related definitions @@ -404,20 +404,36 @@ enum qlink_sta_connect_flags { /** * struct qlink_cmd_connect - data for QLINK_CMD_CONNECT command * - * @flags: for future use. - * @channel: channel which should be used to connect. - * @bg_scan_period: period of background scan. - * @aen: authentication information. * @bssid: BSSID of the BSS to connect to. + * @bssid_hint: recommended AP BSSID for initial connection to the BSS or + * 00:00:00:00:00:00 if not specified. + * @prev_bssid: previous BSSID, if specified (not 00:00:00:00:00:00) indicates + * a request to reassociate. + * @bg_scan_period: period of background scan. + * @flags: one of &enum qlink_sta_connect_flags. + * @ht_capa: HT Capabilities overrides. + * @ht_capa_mask: The bits of ht_capa which are to be used. + * @vht_capa: VHT Capability overrides + * @vht_capa_mask: The bits of vht_capa which are to be used. + * @aen: authentication information. + * @mfp: whether to use management frame protection. * @payload: variable portion of connection request. */ struct qlink_cmd_connect { struct qlink_cmd chdr; - __le32 flags; - __le16 channel; - __le16 bg_scan_period; - struct qlink_auth_encr aen; u8 bssid[ETH_ALEN]; + u8 bssid_hint[ETH_ALEN]; + u8 prev_bssid[ETH_ALEN]; + __le16 bg_scan_period; + __le32 flags; + struct ieee80211_ht_cap ht_capa; + struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa; + struct ieee80211_vht_cap vht_capa_mask; + struct qlink_auth_encr aen; + u8 mfp; + u8 pbss; + u8 rsvd[2]; u8 payload[0]; } __packed; From bfa62a52cad93686bb8d8171ea5288813248a7c6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 9 Nov 2017 11:59:24 +0100 Subject: [PATCH 2009/2177] rt2x00usb: mark device removed when get ENOENT usb error ENOENT usb error mean "specified interface or endpoint does not exist or is not enabled". Mark device not present when we encounter this error similar like we do with ENODEV error. Otherwise we can have infinite loop in rt2x00usb_work_rxdone(), because we remove and put again RX entries to the queue infinitely. We can have similar situation when submit urb will fail all the time with other error, so we need consider to limit number of entries processed by rxdone work. But for now, since the patch fixes reproducible soft lockup issue on single processor systems and taken ENOENT error meaning, let apply this fix. Patch adds additional ENOENT check not only in rx kick routine, but also on other places where we check for ENODEV error. Reported-by: Richard Genoud Debugged-by: Richard Genoud Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Tested-by: Richard Genoud Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index e2f4f5778267..086aad22743d 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -57,7 +57,7 @@ int rt2x00usb_vendor_request(struct rt2x00_dev *rt2x00dev, if (status >= 0) return 0; - if (status == -ENODEV) { + if (status == -ENODEV || status == -ENOENT) { /* Device has disappeared. */ clear_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags); break; @@ -321,7 +321,7 @@ static bool rt2x00usb_kick_tx_entry(struct queue_entry *entry, void *data) status = usb_submit_urb(entry_priv->urb, GFP_ATOMIC); if (status) { - if (status == -ENODEV) + if (status == -ENODEV || status == -ENOENT) clear_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags); set_bit(ENTRY_DATA_IO_FAILED, &entry->flags); rt2x00lib_dmadone(entry); @@ -410,7 +410,7 @@ static bool rt2x00usb_kick_rx_entry(struct queue_entry *entry, void *data) status = usb_submit_urb(entry_priv->urb, GFP_ATOMIC); if (status) { - if (status == -ENODEV) + if (status == -ENODEV || status == -ENOENT) clear_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags); set_bit(ENTRY_DATA_IO_FAILED, &entry->flags); rt2x00lib_dmadone(entry); From f87eba996baceed0b2573153ab353fe4db8afb3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 15:03:59 +0100 Subject: [PATCH 2010/2177] rt2x00: use monotonic timestamps for frame dump rt2x00 uses the deprecated do_gettimeofday() function to get a timestamp for its debugfs "dump" file interface. The timestamp is using an unsigned 32-bit value, so we could make it work until 2106 by using ktime_get_real_ts64(), but it seems better to use monotonic times, as we normally want for timestamps. Since this is an interface change, I'm incrementing the DUMP_HEADER_VERSION number, so user space can figure out whether the timestamps are monotonic or not. Most likely the tools won't care either way. Generally speaking, ABI version numbers and in particular changing them is a bad idea. However since this is in debugfs, we don't put any API stability rules on the interface according to Documentation/filesystems/debugfs.txt, and we can take the easy way out here; anyone using the frame dump feature can probably work out the differences here. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00debug.c | 7 ++++--- drivers/net/wireless/ralink/rt2x00/rt2x00dump.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c index 51520a0e2138..f4fdad2ed319 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c @@ -164,13 +164,13 @@ void rt2x00debug_dump_frame(struct rt2x00_dev *rt2x00dev, struct skb_frame_desc *skbdesc = get_skb_frame_desc(skb); struct sk_buff *skbcopy; struct rt2x00dump_hdr *dump_hdr; - struct timeval timestamp; + struct timespec64 timestamp; u32 data_len; if (likely(!test_bit(FRAME_DUMP_FILE_OPEN, &intf->frame_dump_flags))) return; - do_gettimeofday(×tamp); + ktime_get_ts64(×tamp); if (skb_queue_len(&intf->frame_dump_skbqueue) > 20) { rt2x00_dbg(rt2x00dev, "txrx dump queue length exceeded\n"); @@ -200,7 +200,8 @@ void rt2x00debug_dump_frame(struct rt2x00_dev *rt2x00dev, dump_hdr->queue_index = entry->queue->qid; dump_hdr->entry_index = entry->entry_idx; dump_hdr->timestamp_sec = cpu_to_le32(timestamp.tv_sec); - dump_hdr->timestamp_usec = cpu_to_le32(timestamp.tv_usec); + dump_hdr->timestamp_usec = cpu_to_le32(timestamp.tv_nsec / + NSEC_PER_USEC); if (!(skbdesc->flags & SKBDESC_DESC_IN_SKB)) skb_put_data(skbcopy, skbdesc->desc, skbdesc->desc_len); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dump.h b/drivers/net/wireless/ralink/rt2x00/rt2x00dump.h index 4c0e01b5d515..3b14eef0b646 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dump.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dump.h @@ -106,7 +106,7 @@ enum rt2x00_dump_type { */ struct rt2x00dump_hdr { __le32 version; -#define DUMP_HEADER_VERSION 2 +#define DUMP_HEADER_VERSION 3 __le32 header_length; __le32 desc_length; From 4775ae7afec62e6ae1bc1f99ab57db2a36b8807e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Nov 2017 15:53:21 -0600 Subject: [PATCH 2011/2177] rsi: rsi_91x_ps: remove redundant code in str_psstate "INVALID_STATE" is already being returned in the default case and this code cannot be reached. Addresses-Coverity-ID: 1398384 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_ps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c index 523f5329d2b7..01472fac8b9a 100644 --- a/drivers/net/wireless/rsi/rsi_91x_ps.c +++ b/drivers/net/wireless/rsi/rsi_91x_ps.c @@ -36,7 +36,6 @@ char *str_psstate(enum ps_state state) default: return "INVALID_STATE"; } - return "INVALID_STATE"; } static inline void rsi_modify_ps_state(struct rsi_hw *adapter, From ff0fd34eaee9978f9ed7f6e2ac47f9590d4afac3 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:10:57 +0100 Subject: [PATCH 2012/2177] net: bridge: Rename mglist to host_joined The boolean mglist indicates the host has joined a particular multicast group on the bridge interface. It is badly named, obscuring what is means. Rename it. Signed-off-by: Andrew Lunn Acked-by: Nikolay Aleksandrov Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/bridge/br_input.c | 2 +- net/bridge/br_mdb.c | 2 +- net/bridge/br_multicast.c | 14 +++++++------- net/bridge/br_private.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a096d3e189da..7f98a7d25866 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -137,7 +137,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { - if ((mdst && mdst->mglist) || + if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; br->dev->stats.multicast++; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 31ddff22563e..aa716a33cb71 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -655,7 +655,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 5f7f0e9d446c..bfe5adb1f51c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -249,7 +249,7 @@ static void br_multicast_group_expired(struct timer_list *t) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - mp->mglist = false; + mp->host_joined = false; if (mp->ports) goto out; @@ -292,7 +292,7 @@ static void br_multicast_del_pg(struct net_bridge *br, p->flags); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -773,7 +773,7 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - mp->mglist = true; + mp->host_joined = true; mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -1477,7 +1477,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1561,7 +1561,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1622,7 +1622,7 @@ br_multicast_leave_group(struct net_bridge *br, br_mdb_notify(br->dev, port, group, RTM_DELMDB, p->flags); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1662,7 +1662,7 @@ br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 40553d832b6e..1312b8d20ec3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -209,7 +209,7 @@ struct net_bridge_mdb_entry struct rcu_head rcu; struct timer_list timer; struct br_ip addr; - bool mglist; + bool host_joined; }; struct net_bridge_mdb_htable From 2a26028d119267a2386733dd71d256f269e70f52 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:10:58 +0100 Subject: [PATCH 2013/2177] net: bridge: Send notification when host join/leaves a group The host can join or leave a multicast group on the brX interface, as indicated by IGMP snooping. This is tracked within the bridge multicast code. Send a notification when this happens, in the same way a notification is sent when a port of the bridge joins/leaves a group because of IGMP snooping. Signed-off-by: Andrew Lunn Acked-by: Nikolay Aleksandrov Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 9 ++++++--- net/bridge/br_multicast.c | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index aa716a33cb71..702408d2a93c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -317,7 +317,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, #endif mdb.obj.orig_dev = port_dev; - if (port_dev && type == RTM_NEWMDB) { + if (p && port_dev && type == RTM_NEWMDB) { complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); if (complete_info) { complete_info->port = p; @@ -327,7 +327,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, if (switchdev_port_obj_add(port_dev, &mdb.obj)) kfree(complete_info); } - } else if (port_dev && type == RTM_DELMDB) { + } else if (p && port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); } @@ -353,7 +353,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = port->dev->ifindex; + if (port) + entry.ifindex = port->dev->ifindex; + else + entry.ifindex = dev->ifindex; entry.addr.proto = group->proto; entry.addr.u.ip4 = group->u.ip4; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index bfe5adb1f51c..cb4729539b82 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -250,6 +250,7 @@ static void br_multicast_group_expired(struct timer_list *t) goto out; mp->host_joined = false; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0); if (mp->ports) goto out; @@ -773,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - mp->host_joined = true; + if (!mp->host_joined) { + mp->host_joined = true; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0); + } mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } From 47d5b6db2afa766d7af85db684d0b5f092e4fc46 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:10:59 +0100 Subject: [PATCH 2014/2177] net: bridge: Add/del switchdev object on host join/leave When the host joins or leaves a multicast group, use switchdev to add an object to the hardware to forward traffic for the group to the host. Signed-off-by: Andrew Lunn Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + net/bridge/br_mdb.c | 43 +++++++++++++++++++++++++++++++++++++++ net/switchdev/switchdev.c | 2 ++ 3 files changed, 46 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d756fbe46625..39bc855d7fee 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -76,6 +76,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_PORT_MDB, + SWITCHDEV_OBJ_ID_HOST_MDB, }; struct switchdev_obj { diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 702408d2a93c..b0f4c734900b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -292,6 +292,46 @@ err: kfree(priv); } +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct br_mdb_entry *entry, int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = entry->vid, + }; + + if (entry->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = dev; + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct br_mdb_entry *entry, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, entry, type); +} + static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, struct br_mdb_entry *entry, int type) { @@ -331,6 +371,9 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, switchdev_port_obj_del(port_dev, &mdb.obj); } + if (!p) + br_mdb_switchdev_host(dev, entry, type); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0531b41d1f2d..74b9d916a58b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) return sizeof(struct switchdev_obj_port_vlan); case SWITCHDEV_OBJ_ID_PORT_MDB: return sizeof(struct switchdev_obj_port_mdb); + case SWITCHDEV_OBJ_ID_HOST_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } From 5f4dbc50ce4d74b6f57a25fa114fcefe55acce17 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:11:00 +0100 Subject: [PATCH 2015/2177] net: dsa: slave: Handle switchdev host mdb add/del Add code to handle switchdev host mdb add/del. Since DSA uses one of the switch ports as a transport to the host, we just need to add an MDB on this port. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cc7fe47dd4bf..35715fda84b2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -304,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev, case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); @@ -326,6 +333,12 @@ static int dsa_slave_port_obj_del(struct net_device *dev, case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; From bb9f603174545c01aea92f116803aeb0e6478b28 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:11:01 +0100 Subject: [PATCH 2016/2177] net: dsa: add more const attributes The notify mechanism does not need to modify the port it is notifying. So make the parameter const. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 4 ++-- net/dsa/port.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index bb0218c1b570..507e1ce4d4d2 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -147,10 +147,10 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, diff --git a/net/dsa/port.c b/net/dsa/port.c index bb30b1a7de3a..a85cd63a91c4 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -17,7 +17,7 @@ #include "dsa_priv.h" -static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) +static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { struct raw_notifier_head *nh = &dp->ds->dst->nh; int err; @@ -215,7 +215,7 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) return ds->ops->port_fdb_dump(ds, port, cb, data); } -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) { @@ -229,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); } -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { From ae45102c9d247516e52f41b3381e195bbc25a362 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 23:11:02 +0100 Subject: [PATCH 2017/2177] net: dsa: switch: Don't add CPU port to an mdb by default Now that the host indicates when a multicast group should be forwarded from the switch to the host, don't do it by default. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e6c06aa349a6..1155e43c157f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (ds->index == info->sw_index) set_bit(info->port, group); for (port = 0; port < ds->num_ports; port++) - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + if (dsa_is_dsa_port(ds, port)) set_bit(port, group); if (switchdev_trans_ph_prepare(trans)) { From 2ea7a679ca2abd251c1ec03f20508619707e1749 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 7 Nov 2017 00:04:24 +0100 Subject: [PATCH 2018/2177] net: dsa: Don't add vlans when vlan filtering is disabled The software bridge can be build with vlan filtering support included. However, by default it is turned off. In its turned off state, it still passes VLANs via switchev, even though they are not to be used. Don't pass these VLANs to the hardware. Only do so when vlan filtering is enabled. This fixes at least one corner case. There are still issues in other corners, such as when vlan_filtering is later enabled. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/port.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index a85cd63a91c4..bb4be2679904 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -252,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + + return 0; } int dsa_port_vlan_del(struct dsa_port *dp, @@ -264,7 +267,10 @@ int dsa_port_vlan_del(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + + return 0; } int dsa_port_fixed_link_register_of(struct dsa_port *dp) From a3dcaf17ee54f1d01d22cc2b22cab0b4f60d78cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2017 00:29:27 -0800 Subject: [PATCH 2019/2177] net: allow per netns sysctl_rmem and sysctl_wmem for protos As we want to gradually implement per netns sysctl_rmem and sysctl_wmem on per protocol basis, add two new fields in struct proto, and two new helpers : sk_get_wmem0() and sk_get_rmem0() First user will be TCP. Then UDP and SCTP can be easily converted, while DECNET probably wont get this support. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 22 ++++++++++++++++++++++ include/trace/events/sock.h | 2 +- net/core/sock.c | 10 ++++++---- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6f1be9726e02..688a823dccc3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1101,8 +1101,12 @@ struct proto { */ unsigned long *memory_pressure; long *sysctl_mem; + int *sysctl_wmem; int *sysctl_rmem; + u32 sysctl_wmem_offset; + u32 sysctl_rmem_offset; + int max_header; bool no_autobind; @@ -2390,4 +2394,22 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + + return *proto->sysctl_wmem; +} + +static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + + return *proto->sysctl_rmem; +} + #endif /* _SOCK_H */ diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 6d31c0520ef3..ec4dade24466 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -48,7 +48,7 @@ TRACE_EVENT(sock_exceed_buf_limit, strncpy(__entry->name, prot->name, 32); __entry->sysctl_mem = prot->sysctl_mem; __entry->allocated = allocated; - __entry->sysctl_rmem = prot->sysctl_rmem[0]; + __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); ), diff --git a/net/core/sock.c b/net/core/sock.c index 759400053110..c59bcf90d905 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2346,16 +2346,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; } else { /* SK_MEM_SEND */ + int wmem0 = sk_get_wmem0(sk, prot); + if (sk->sk_type == SOCK_STREAM) { - if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + if (sk->sk_wmem_queued < wmem0) return 1; - } else if (refcount_read(&sk->sk_wmem_alloc) < - prot->sysctl_wmem[0]) + } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { return 1; + } } if (sk_has_memory_pressure(sk)) { From 356d1833b638bd465672aefeb71def3ab93fc17d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2017 00:29:28 -0800 Subject: [PATCH 2020/2177] tcp: Namespace-ify sysctl_tcp_rmem and sysctl_tcp_wmem Note that when a new netns is created, it inherits its sysctl_tcp_rmem and sysctl_tcp_wmem from initial netns. This change is needed so that we can refine TCP rcvbuf autotuning, to take RTT into consideration. Signed-off-by: Eric Dumazet Cc: Wei Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 32 ++++++++++++++++---------------- net/ipv4/tcp.c | 21 ++++++++------------- net/ipv4/tcp_input.c | 14 ++++++++------ net/ipv4/tcp_ipv4.c | 13 ++++++++++--- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 8 files changed, 47 insertions(+), 43 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 379550f8124a..5e12975fc658 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -155,6 +155,8 @@ struct netns_ipv4 { int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; + int sysctl_tcp_wmem[3]; + int sysctl_tcp_rmem[3]; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index babfd4da1515..2f2c69ad31b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -242,8 +242,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; -extern int sysctl_tcp_wmem[3]; -extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a82b44038308..ef0ff3357a44 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -440,22 +440,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { - .procname = "tcp_wmem", - .data = &sysctl_tcp_wmem, - .maxlen = sizeof(sysctl_tcp_wmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, - { - .procname = "tcp_rmem", - .data = &sysctl_tcp_rmem, - .maxlen = sizeof(sysctl_tcp_rmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, @@ -1164,6 +1148,22 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &thousand, }, + { + .procname = "tcp_wmem", + .data = &init_net.ipv4.sysctl_tcp_wmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, + { + .procname = "tcp_rmem", + .data = &init_net.ipv4.sysctl_tcp_rmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4cb19ed4628..bc71a27d5ad9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; -int sysctl_tcp_wmem[3] __read_mostly; -int sysctl_tcp_rmem[3] __read_mostly; - EXPORT_SYMBOL(sysctl_tcp_mem); -EXPORT_SYMBOL(sysctl_tcp_rmem); -EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); @@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk_sockets_allocated_inc(sk); } @@ -3636,13 +3631,13 @@ void __init tcp_init(void) max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_wshare); + init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; + init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); - sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_rshare); + init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_rmem[1] = 87380; + init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b54ee09cbcf7..9ceaa1fdc3ab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk) sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) - sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); + sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; - int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) rcvmem <<= 2; if (sk->sk_rcvbuf < rcvmem) - sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); + sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters @@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); icsk->icsk_ack.quick = 0; - if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - sysctl_tcp_rmem[2]); + net->ipv4.sysctl_tcp_rmem[2]); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk) while (tcp_win_from_space(sk, rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + rcvbuf = min(rcvwin / tp->advmss * rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0162c577bb9c..1eac84b8044e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2409,8 +2409,8 @@ struct proto tcp_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -2509,7 +2509,14 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; - + if (net != &init_net) { + memcpy(net->ipv4.sysctl_tcp_rmem, + init_net.ipv4.sysctl_tcp_rmem, + sizeof(init_net.ipv4.sysctl_tcp_rmem)); + memcpy(net->ipv4.sysctl_tcp_wmem, + init_net.ipv4.sysctl_tcp_wmem, + sizeof(init_net.ipv4.sysctl_tcp_wmem)); + } net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a9d917e4dad5..9b98d35aa0d8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e2529958b52..6bb98c93edfe 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = { .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, From dd9d598c6657e2d2eed4497ff2c5d744015201dc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Nov 2017 16:33:08 +0800 Subject: [PATCH 2021/2177] ip_gre: add the support for i/o_flags update via netlink Now ip_gre is using ip_tunnel_changelink to update it's properties, but ip_tunnel_changelink in ip_tunnel doesn't update i/o_flags as a common function. o_flags updates would cause that tunnel->tun_hlen / hlen and dev->mtu / needed_headroom need to be recalculated, and dev->(hw_)features need to be updated as well. Therefore, we can't just add the update into ip_tunnel_update called in ip_tunnel_changelink, and it's also better not to touch ip_tunnel codes. This patch updates i/o_flags and calls ipgre_link_update to recalculate these gre properties after ip_tunnel_changelink does the common update. Note that since ipgre_link_update doesn't know the lower dev, it will update gre->hlen, dev->mtu and dev->needed_headroom with the value of 'new tun_hlen - old tun_hlen'. In this way, we can avoid many redundant codes, unlike ip6_gre. Reported-by: Jianlin Shi Signed-off-by: Xin Long Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c105a315b1a3..81e1e20af33b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -773,6 +773,30 @@ free_skb: return NETDEV_TX_OK; } +static void ipgre_link_update(struct net_device *dev, bool set_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int len; + + len = tunnel->tun_hlen; + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + len = tunnel->tun_hlen - len; + tunnel->hlen = tunnel->hlen + len; + + dev->needed_headroom = dev->needed_headroom + len; + if (set_mtu) + dev->mtu = max_t(int, dev->mtu - len, 68); + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + tunnel->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + dev->features |= NETIF_F_LLTX; + } +} + static int ipgre_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -1307,9 +1331,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { @@ -1322,7 +1346,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_changelink(dev, tb, &p, fwmark); + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; } static size_t ipgre_get_size(const struct net_device *dev) From a0efab67aeb9406e30b6a19dd6dd335e24f6bd66 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Nov 2017 16:33:09 +0800 Subject: [PATCH 2022/2177] ip_gre: add the support for i/o_flags update via ioctl As patch 'ip_gre: add the support for i/o_flags update via netlink' did for netlink, we also need to do the same job for these update via ioctl. This patch is to update i/o_flags and call ipgre_link_update to recalculate these gre properties after ip_tunnel_ioctl does the common update. Signed-off-by: Xin Long Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 81e1e20af33b..bb6239169b1a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -800,17 +800,19 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) static int ipgre_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - int err; struct ip_tunnel_parm p; + int err; if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || + ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); @@ -818,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (err) return err; + if (cmd == SIOCCHGTUNNEL) { + struct ip_tunnel *t = netdev_priv(dev); + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, true); + } + p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) return -EFAULT; + return 0; } From e4effc094c91706c5737530ef9ae6298e1d67512 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Nov 2017 15:47:24 +0000 Subject: [PATCH 2023/2177] net: vxge: remove redundant assignments and pointers There are several pointers that are being assigned but never read so remove these as they are redundant. Also remove an assignment to function_mode that is never read. Cleans up several clang warnings: vxge-main.c:1139:2: warning: Value stored to 'hldev' is never read vxge-main.c:1294:2: warning: Value stored to 'hldev' is never read vxge-main.c:2188:2: warning: Value stored to 'dev' is never read vxge-main.c:2188:2: warning: Value stored to 'dev' is never read vxge-main.c:2723:2: warning: Value stored to 'function_mode' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5d5b9855e24e..426c9a946eb4 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -1122,7 +1122,6 @@ static void vxge_set_multicast(struct net_device *dev) struct netdev_hw_addr *ha; struct vxgedev *vdev; int i, mcast_cnt = 0; - struct __vxge_hw_device *hldev; struct vxge_vpath *vpath; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info; @@ -1136,7 +1135,6 @@ static void vxge_set_multicast(struct net_device *dev) "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (unlikely(!is_vxge_card_up(vdev))) return; @@ -1283,7 +1281,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct vxgedev *vdev; - struct __vxge_hw_device *hldev; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info_new, mac_info_old; int vpath_idx = 0; @@ -1291,7 +1288,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (!is_valid_ether_addr(addr->sa_data)) return -EINVAL; @@ -2177,7 +2173,6 @@ static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring) */ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) { - struct net_device *dev; struct __vxge_hw_device *hldev; u64 reason; enum vxge_hw_status status; @@ -2185,7 +2180,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__); - dev = vdev->ndev; hldev = pci_get_drvdata(vdev->pdev); if (pci_channel_offline(vdev->pdev)) @@ -2713,14 +2707,13 @@ static int vxge_open(struct net_device *dev) struct vxge_vpath *vpath; int ret = 0; int i; - u64 val64, function_mode; + u64 val64; vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d", dev->name, __func__, __LINE__); vdev = netdev_priv(dev); hldev = pci_get_drvdata(vdev->pdev); - function_mode = vdev->config.device_hw_info.function_mode; /* make sure you have link off by default every time Nic is * initialized */ From cbad52e92ad7f01f0be4ca58bde59462dc1afe3a Mon Sep 17 00:00:00 2001 From: Robert Stonehouse Date: Tue, 7 Nov 2017 17:30:30 +0000 Subject: [PATCH 2024/2177] sfc: don't warn on successful change of MAC Fixes: 535a61777f44e ("sfc: suppress handled MCDI failures when changing the MAC address") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 19a91881fbf9..46d60013564c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5734,7 +5734,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); - } else { + } else if (rc) { efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, sizeof(inbuf), NULL, 0, rc); } From e0496cbbf8fb5d831e74b8cedb54236a49413679 Mon Sep 17 00:00:00 2001 From: Manish Kurup Date: Tue, 7 Nov 2017 15:48:15 -0500 Subject: [PATCH 2025/2177] act_vlan: Change stats update to use per-core stats The VLAN action maintains one set of stats across all cores, and uses a spinlock to synchronize updates to it from the same. Changed this to use a per-CPU stats context instead. This change will result in better performance. Acked-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Manish Kurup Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 16eb067a8d8f..b093badc1450 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,9 +30,10 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, int err; u16 tci; - spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); - bstats_update(&v->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb); + + spin_lock(&v->tcf_lock); action = v->tcf_action; /* Ensure 'data' points at mac_header prior calling vlan manipulating @@ -85,7 +86,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, drop: action = TC_ACT_SHOT; - v->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); + unlock: if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); @@ -172,7 +174,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + &act_vlan_ops, bind, true); if (ret) return ret; From bf068bdd3c1e29c516ef0dc5cfb3c2b95fd450d1 Mon Sep 17 00:00:00 2001 From: Manish Kurup Date: Tue, 7 Nov 2017 15:48:45 -0500 Subject: [PATCH 2026/2177] nfp flower action: Modified to use VLAN helper functions Modified netronome nfp flower action to use VLAN helper functions instead of accessing/referencing TC act_vlan private structures directly. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Manish Kurup Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index de64cedf8b26..c1c595f8bb87 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -58,7 +58,6 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, const struct tc_action *action) { size_t act_size = sizeof(struct nfp_fl_push_vlan); - struct tcf_vlan *vlan = to_vlan(action); u16 tmp_push_vlan_tci; push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; @@ -67,8 +66,8 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->vlan_tpid = tcf_vlan_push_proto(action); tmp_push_vlan_tci = - FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) | + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | NFP_FL_PUSH_VLAN_CFI; push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } From 4c5b9d9642c859f7369338fc42c0f62f4151bef3 Mon Sep 17 00:00:00 2001 From: Manish Kurup Date: Tue, 7 Nov 2017 15:49:05 -0500 Subject: [PATCH 2027/2177] act_vlan: VLAN action rewrite to use RCU lock/unlock and update Using a spinlock in the VLAN action causes performance issues when the VLAN action is used on multiple cores. Rewrote the VLAN action to use RCU read locking for reads and updates instead. All functions now use an RCU dereferenced pointer to access the VLAN action context. Modified helper functions used by other modules, to use the RCU as opposed to directly accessing the structure. Acked-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Manish Kurup Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 46 +++++++++++++++++----- net/sched/act_vlan.c | 75 ++++++++++++++++++++++++------------ 2 files changed, 88 insertions(+), 33 deletions(-) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index c2090df944ff..22ae260d6869 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,12 +13,17 @@ #include #include +struct tcf_vlan_params { + int tcfv_action; + u16 tcfv_push_vid; + __be16 tcfv_push_proto; + u8 tcfv_push_prio; + struct rcu_head rcu; +}; + struct tcf_vlan { struct tc_action common; - int tcfv_action; - u16 tcfv_push_vid; - __be16 tcfv_push_proto; - u8 tcfv_push_prio; + struct tcf_vlan_params __rcu *vlan_p; }; #define to_vlan(a) ((struct tcf_vlan *)a) @@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a) static inline u32 tcf_vlan_action(const struct tc_action *a) { - return to_vlan(a)->tcfv_action; + u32 tcfv_action; + + rcu_read_lock(); + tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action; + rcu_read_unlock(); + + return tcfv_action; } static inline u16 tcf_vlan_push_vid(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_vid; + u16 tcfv_push_vid; + + rcu_read_lock(); + tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid; + rcu_read_unlock(); + + return tcfv_push_vid; } static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_proto; + __be16 tcfv_push_proto; + + rcu_read_lock(); + tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto; + rcu_read_unlock(); + + return tcfv_push_proto; } static inline u8 tcf_vlan_push_prio(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_prio; -} + u8 tcfv_push_prio; + rcu_read_lock(); + tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio; + rcu_read_unlock(); + + return tcfv_push_prio; +} #endif /* __NET_TC_VLAN_H */ diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index b093badc1450..97f717a13ad5 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -26,6 +26,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; int action; int err; u16 tci; @@ -33,24 +34,27 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&v->tcf_tm); bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb); - spin_lock(&v->tcf_lock); - action = v->tcf_action; - /* Ensure 'data' points at mac_header prior calling vlan manipulating * functions. */ if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - switch (v->tcfv_action) { + rcu_read_lock(); + + action = READ_ONCE(v->tcf_action); + + p = rcu_dereference(v->vlan_p); + + switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | - (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); + err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid | + (p->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -69,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; } /* replace the vid */ - tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid; /* replace prio bits, if tcfv_push_prio specified */ - if (v->tcfv_push_prio) { + if (p->tcfv_push_prio) { tci &= ~VLAN_PRIO_MASK; - tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT; } /* put updated tci as hwaccel tag */ - __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci); break; default: BUG(); @@ -89,10 +93,10 @@ drop: qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); unlock: + rcu_read_unlock(); if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); - spin_unlock(&v->tcf_lock); return action; } @@ -109,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_vlan_params *p, *p_old; struct tc_vlan *parm; struct tcf_vlan *v; int action; @@ -187,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v = to_vlan(*a); - spin_lock_bh(&v->tcf_lock); - - v->tcfv_action = action; - v->tcfv_push_vid = push_vid; - v->tcfv_push_prio = push_prio; - v->tcfv_push_proto = push_proto; + ASSERT_RTNL(); + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + if (ovr) + tcf_idr_release(*a, bind); + return -ENOMEM; + } v->tcf_action = parm->action; - spin_unlock_bh(&v->tcf_lock); + p_old = rtnl_dereference(v->vlan_p); + + p->tcfv_action = action; + p->tcfv_push_vid = push_vid; + p->tcfv_push_prio = push_prio; + p->tcfv_push_proto = push_proto; + + rcu_assign_pointer(v->vlan_p, p); + + if (p_old) + kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; } +static void tcf_vlan_cleanup(struct tc_action *a, int bind) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; + + p = rcu_dereference_protected(v->vlan_p, 1); + kfree_rcu(p, rcu); +} + static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p); struct tc_vlan opt = { .index = v->tcf_index, .refcnt = v->tcf_refcnt - ref, .bindcnt = v->tcf_bindcnt - bind, .action = v->tcf_action, - .v_action = v->tcfv_action, + .v_action = p->tcfv_action, }; struct tcf_t t; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || - v->tcfv_action == TCA_VLAN_ACT_MODIFY) && - (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || + if ((p->tcfv_action == TCA_VLAN_ACT_PUSH || + p->tcfv_action == TCA_VLAN_ACT_MODIFY) && + (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto) || + p->tcfv_push_proto) || (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, - v->tcfv_push_prio)))) + p->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); @@ -262,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = { .act = tcf_vlan, .dump = tcf_vlan_dump, .init = tcf_vlan_init, + .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), From 54985120a1c461b74f9510e5d730971f2a2383b1 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Tue, 7 Nov 2017 11:32:11 -0800 Subject: [PATCH 2028/2177] net: fix incorrect comment with regard to VLAN packet handling The commit bcc6d4790361 ("net: vlan: make non-hw-accel rx path similar to hw-accel") unified accel and non-accel path for VLAN RX. With that fix we do not register any packet_type handler for VLANs anymore, so fix the incorrect comment. Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 79518ede3170..6b274bfe489f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4479,15 +4479,7 @@ do { \ * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * * 0800 IP - * 8100 802.1Q VLAN * 0001 802.3 * 0002 AX.25 * 0004 802.2 From fdd0bd88ceaecf729db103ac8836af5805dd2dc1 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Fri, 10 Nov 2017 17:27:15 +0800 Subject: [PATCH 2029/2177] brcmfmac: add CLM download support The firmware for brcmfmac devices includes information regarding regulatory constraints. For certain devices this information is kept separately in a binary form that needs to be downloaded to the device. This patch adds support to download this so-called CLM blob file. It uses the same naming scheme as the other firmware files with extension of .clm_blob. The CLM blob file is optional. If the file does not exist, the download process will be bypassed. It will not affect the driver loading. Reviewed-by: Arend van Spriel Signed-off-by: Chung-Hsien Hsu Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/bus.h | 10 ++ .../broadcom/brcm80211/brcmfmac/common.c | 157 ++++++++++++++++++ .../broadcom/brcm80211/brcmfmac/core.c | 2 + .../broadcom/brcm80211/brcmfmac/core.h | 2 + .../broadcom/brcm80211/brcmfmac/fwil_types.h | 31 ++++ .../broadcom/brcm80211/brcmfmac/pcie.c | 19 +++ .../broadcom/brcm80211/brcmfmac/sdio.c | 19 +++ .../broadcom/brcm80211/brcmfmac/usb.c | 18 ++ 8 files changed, 258 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h index 163ddc49f951..0b76a615708e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h @@ -71,6 +71,7 @@ struct brcmf_bus_dcmd { * @wowl_config: specify if dongle is configured for wowl when going to suspend * @get_ramsize: obtain size of device memory. * @get_memdump: obtain device memory dump in provided buffer. + * @get_fwname: obtain firmware name. * * This structure provides an abstract interface towards the * bus specific driver. For control messages to common driver @@ -87,6 +88,8 @@ struct brcmf_bus_ops { void (*wowl_config)(struct device *dev, bool enabled); size_t (*get_ramsize)(struct device *dev); int (*get_memdump)(struct device *dev, void *data, size_t len); + int (*get_fwname)(struct device *dev, uint chip, uint chiprev, + unsigned char *fw_name); }; @@ -224,6 +227,13 @@ int brcmf_bus_get_memdump(struct brcmf_bus *bus, void *data, size_t len) return bus->ops->get_memdump(bus->dev, data, len); } +static inline +int brcmf_bus_get_fwname(struct brcmf_bus *bus, uint chip, uint chiprev, + unsigned char *fw_name) +{ + return bus->ops->get_fwname(bus->dev, chip, chiprev, fw_name); +} + /* * interface functions from common layer */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 7a2b49587b4d..6a59d0609d30 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -28,6 +29,7 @@ #include "tracepoint.h" #include "common.h" #include "of.h" +#include "firmware.h" MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Broadcom 802.11 wireless LAN fullmac driver."); @@ -104,12 +106,140 @@ void brcmf_c_set_joinpref_default(struct brcmf_if *ifp) brcmf_err("Set join_pref error (%d)\n", err); } +static int brcmf_c_download(struct brcmf_if *ifp, u16 flag, + struct brcmf_dload_data_le *dload_buf, + u32 len) +{ + s32 err; + + flag |= (DLOAD_HANDLER_VER << DLOAD_FLAG_VER_SHIFT); + dload_buf->flag = cpu_to_le16(flag); + dload_buf->dload_type = cpu_to_le16(DL_TYPE_CLM); + dload_buf->len = cpu_to_le32(len); + dload_buf->crc = cpu_to_le32(0); + len = sizeof(*dload_buf) + len - 1; + + err = brcmf_fil_iovar_data_set(ifp, "clmload", dload_buf, len); + + return err; +} + +static int brcmf_c_get_clm_name(struct brcmf_if *ifp, u8 *clm_name) +{ + struct brcmf_bus *bus = ifp->drvr->bus_if; + struct brcmf_rev_info *ri = &ifp->drvr->revinfo; + u8 fw_name[BRCMF_FW_NAME_LEN]; + u8 *ptr; + size_t len; + s32 err; + + memset(fw_name, 0, BRCMF_FW_NAME_LEN); + err = brcmf_bus_get_fwname(bus, ri->chipnum, ri->chiprev, fw_name); + if (err) { + brcmf_err("get firmware name failed (%d)\n", err); + goto done; + } + + /* generate CLM blob file name */ + ptr = strrchr(fw_name, '.'); + if (!ptr) { + err = -ENOENT; + goto done; + } + + len = ptr - fw_name + 1; + if (len + strlen(".clm_blob") > BRCMF_FW_NAME_LEN) { + err = -E2BIG; + } else { + strlcpy(clm_name, fw_name, len); + strlcat(clm_name, ".clm_blob", BRCMF_FW_NAME_LEN); + } +done: + return err; +} + +static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) +{ + struct device *dev = ifp->drvr->bus_if->dev; + struct brcmf_dload_data_le *chunk_buf; + const struct firmware *clm = NULL; + u8 clm_name[BRCMF_FW_NAME_LEN]; + u32 chunk_len; + u32 datalen; + u32 cumulative_len; + u16 dl_flag = DL_BEGIN; + u32 status; + s32 err; + + brcmf_dbg(TRACE, "Enter\n"); + + memset(clm_name, 0, BRCMF_FW_NAME_LEN); + err = brcmf_c_get_clm_name(ifp, clm_name); + if (err) { + brcmf_err("get CLM blob file name failed (%d)\n", err); + return err; + } + + err = request_firmware(&clm, clm_name, dev); + if (err) { + if (err == -ENOENT) { + brcmf_dbg(INFO, "continue with CLM data currently present in firmware\n"); + return 0; + } + brcmf_err("request CLM blob file failed (%d)\n", err); + return err; + } + + chunk_buf = kzalloc(sizeof(*chunk_buf) + MAX_CHUNK_LEN - 1, GFP_KERNEL); + if (!chunk_buf) { + err = -ENOMEM; + goto done; + } + + datalen = clm->size; + cumulative_len = 0; + do { + if (datalen > MAX_CHUNK_LEN) { + chunk_len = MAX_CHUNK_LEN; + } else { + chunk_len = datalen; + dl_flag |= DL_END; + } + memcpy(chunk_buf->data, clm->data + cumulative_len, chunk_len); + + err = brcmf_c_download(ifp, dl_flag, chunk_buf, chunk_len); + + dl_flag &= ~DL_BEGIN; + + cumulative_len += chunk_len; + datalen -= chunk_len; + } while ((datalen > 0) && (err == 0)); + + if (err) { + brcmf_err("clmload (%zu byte file) failed (%d); ", + clm->size, err); + /* Retrieve clmload_status and print */ + err = brcmf_fil_iovar_int_get(ifp, "clmload_status", &status); + if (err) + brcmf_err("get clmload_status failed (%d)\n", err); + else + brcmf_dbg(INFO, "clmload_status=%d\n", status); + err = -EIO; + } + + kfree(chunk_buf); +done: + release_firmware(clm); + return err; +} + int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) { s8 eventmask[BRCMF_EVENTING_MASK_LEN]; u8 buf[BRCMF_DCMD_SMLEN]; struct brcmf_rev_info_le revinfo; struct brcmf_rev_info *ri; + char *clmver; char *ptr; s32 err; @@ -148,6 +278,13 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) } ri->result = err; + /* Do any CLM downloading */ + err = brcmf_c_process_clm_blob(ifp); + if (err < 0) { + brcmf_err("download CLM blob file failed, %d\n", err); + goto done; + } + /* query for 'ver' to get version info from firmware */ memset(buf, 0, sizeof(buf)); strcpy(buf, "ver"); @@ -167,6 +304,26 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) ptr = strrchr(buf, ' ') + 1; strlcpy(ifp->drvr->fwver, ptr, sizeof(ifp->drvr->fwver)); + /* Query for 'clmver' to get CLM version info from firmware */ + memset(buf, 0, sizeof(buf)); + err = brcmf_fil_iovar_data_get(ifp, "clmver", buf, sizeof(buf)); + if (err) { + brcmf_dbg(TRACE, "retrieving clmver failed, %d\n", err); + } else { + clmver = (char *)buf; + /* store CLM version for adding it to revinfo debugfs file */ + memcpy(ifp->drvr->clmver, clmver, sizeof(ifp->drvr->clmver)); + + /* Replace all newline/linefeed characters with space + * character + */ + ptr = clmver; + while ((ptr = strnchr(ptr, '\n', sizeof(buf))) != NULL) + *ptr = ' '; + + brcmf_dbg(INFO, "CLM version = %s\n", clmver); + } + /* set mpc */ err = brcmf_fil_iovar_int_set(ifp, "mpc", 1); if (err) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 9c7536daf8f7..930e423f83a8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -988,6 +988,8 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data) seq_printf(s, "anarev: %u\n", ri->anarev); seq_printf(s, "nvramrev: %08x\n", ri->nvramrev); + seq_printf(s, "clmver: %s\n", bus_if->drvr->clmver); + return 0; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 17085712bae2..df8a1ecb9924 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -141,6 +141,8 @@ struct brcmf_pub { struct notifier_block inetaddr_notifier; struct notifier_block inet6addr_notifier; struct brcmf_mp_device *settings; + + u8 clmver[BRCMF_DCMD_SMLEN]; }; /* forward declarations */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index e0d22fedb2b4..4b290705e3e6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -155,6 +155,21 @@ #define BRCMF_MFP_CAPABLE 1 #define BRCMF_MFP_REQUIRED 2 +/* MAX_CHUNK_LEN is the maximum length for data passing to firmware in each + * ioctl. It is relatively small because firmware has small maximum size input + * playload restriction for ioctls. + */ +#define MAX_CHUNK_LEN 1400 + +#define DLOAD_HANDLER_VER 1 /* Downloader version */ +#define DLOAD_FLAG_VER_MASK 0xf000 /* Downloader version mask */ +#define DLOAD_FLAG_VER_SHIFT 12 /* Downloader version shift */ + +#define DL_BEGIN 0x0002 +#define DL_END 0x0004 + +#define DL_TYPE_CLM 2 + /* join preference types for join_pref iovar */ enum brcmf_join_pref_types { BRCMF_JOIN_PREF_RSSI = 1, @@ -826,6 +841,22 @@ struct brcmf_pno_macaddr_le { u8 mac[ETH_ALEN]; }; +/** + * struct brcmf_dload_data_le - data passing to firmware for downloading + * @flag: flags related to download data. + * @dload_type: type of download data. + * @len: length in bytes of download data. + * @crc: crc of download data. + * @data: download data. + */ +struct brcmf_dload_data_le { + __le16 flag; + __le16 dload_type; + __le32 len; + __le32 crc; + u8 data[1]; +}; + /** * struct brcmf_pno_bssid_le - bssid configuration for PNO scan. * diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index e6e9b00b79d7..3c87157f5b85 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1350,6 +1350,24 @@ static int brcmf_pcie_get_memdump(struct device *dev, void *data, size_t len) return 0; } +static int brcmf_pcie_get_fwname(struct device *dev, u32 chip, u32 chiprev, + u8 *fw_name) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + struct brcmf_pciedev_info *devinfo = buspub->devinfo; + int ret = 0; + + if (devinfo->fw_name[0] != '\0') + strlcpy(fw_name, devinfo->fw_name, BRCMF_FW_NAME_LEN); + else + ret = brcmf_fw_map_chip_to_name(chip, chiprev, + brcmf_pcie_fwnames, + ARRAY_SIZE(brcmf_pcie_fwnames), + fw_name, NULL); + + return ret; +} static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { .txdata = brcmf_pcie_tx, @@ -1359,6 +1377,7 @@ static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { .wowl_config = brcmf_pcie_wowl_config, .get_ramsize = brcmf_pcie_get_ramsize, .get_memdump = brcmf_pcie_get_memdump, + .get_fwname = brcmf_pcie_get_fwname, }; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 00de73d9e152..b2256aa76eb6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3985,6 +3985,24 @@ brcmf_sdio_watchdog(unsigned long data) } } +static int brcmf_sdio_get_fwname(struct device *dev, u32 chip, u32 chiprev, + u8 *fw_name) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; + int ret = 0; + + if (sdiodev->fw_name[0] != '\0') + strlcpy(fw_name, sdiodev->fw_name, BRCMF_FW_NAME_LEN); + else + ret = brcmf_fw_map_chip_to_name(chip, chiprev, + brcmf_sdio_fwnames, + ARRAY_SIZE(brcmf_sdio_fwnames), + fw_name, NULL); + + return ret; +} + static const struct brcmf_bus_ops brcmf_sdio_bus_ops = { .stop = brcmf_sdio_bus_stop, .preinit = brcmf_sdio_bus_preinit, @@ -3995,6 +4013,7 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = { .wowl_config = brcmf_sdio_wowl_config, .get_ramsize = brcmf_sdio_bus_get_ramsize, .get_memdump = brcmf_sdio_bus_get_memdump, + .get_fwname = brcmf_sdio_get_fwname, }; static void brcmf_sdio_firmware_callback(struct device *dev, int err, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 11ffaa01599e..b27170c12482 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1128,12 +1128,30 @@ static void brcmf_usb_wowl_config(struct device *dev, bool enabled) device_set_wakeup_enable(devinfo->dev, false); } +static int brcmf_usb_get_fwname(struct device *dev, u32 chip, u32 chiprev, + u8 *fw_name) +{ + struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(dev); + int ret = 0; + + if (devinfo->fw_name[0] != '\0') + strlcpy(fw_name, devinfo->fw_name, BRCMF_FW_NAME_LEN); + else + ret = brcmf_fw_map_chip_to_name(chip, chiprev, + brcmf_usb_fwnames, + ARRAY_SIZE(brcmf_usb_fwnames), + fw_name, NULL); + + return ret; +} + static const struct brcmf_bus_ops brcmf_usb_bus_ops = { .txdata = brcmf_usb_tx, .stop = brcmf_usb_down, .txctl = brcmf_usb_tx_ctlpkt, .rxctl = brcmf_usb_rx_ctlpkt, .wowl_config = brcmf_usb_wowl_config, + .get_fwname = brcmf_usb_get_fwname, }; static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo) From dd0bb688eaa241b5655d396d45366cba9225aed9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:42 -0500 Subject: [PATCH 2030/2177] bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/kprobes.h | 4 ++++ arch/x86/include/asm/ptrace.h | 5 ++++ arch/x86/kernel/kprobes/ftrace.c | 14 +++++++++++ include/linux/filter.h | 3 ++- include/linux/trace_events.h | 1 + include/uapi/linux/bpf.h | 7 +++++- kernel/bpf/core.c | 3 +++ kernel/bpf/verifier.c | 2 ++ kernel/events/core.c | 7 ++++++ kernel/trace/Kconfig | 11 +++++++++ kernel/trace/bpf_trace.c | 35 ++++++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 40 ++++++++++++++++++++++++++------ kernel/trace/trace_probe.h | 6 +++++ 15 files changed, 133 insertions(+), 9 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 057370a0ac4e..6e8520f09bc1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,6 +196,9 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_KPROBE_OVERRIDE + bool + config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..51458c1a0b4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -153,6 +153,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 6cf65437b5e5..c6c3b1f4306a 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); +#ifdef CONFIG_KPROBES_ON_FTRACE +extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); +#endif + /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index c0e3c45cf6ab..2370bb0149cc 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 041f7b6dfa0f..3c455bf490cb 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } + +asmlinkage void override_func(void); +asm( + ".type override_func, @function\n" + "override_func:\n" + " ret\n" + ".size override_func, .-override_func\n" +); + +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&override_func; +} +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index 0cd02ff4ae30..eaec066f99e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,7 +459,8 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + kprobe_override:1; /* Do we override a kprobe? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 84014ecfa67f..17e5e820a84c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,6 +523,7 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); +DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..adb66f78b674 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8a6c37762330..271daad31f37 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,6 +1326,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4a942e2e753d..bc464b8ec91e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,6 +4357,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index 42d24bd64ea4..ac240d31b5bf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,6 +8171,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..9dc0deeaad2b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -518,6 +518,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 506efe6e8ed9..1865b0d4cdeb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include #include #include +#include +#include + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,29 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} +#else +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + return -EINVAL; +} +#endif + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +578,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_override_return: + pr_warn_ratelimited("%s[%d] is installing a program with bpf_override_return helper that may cause unexpected behavior!", + current->comm, task_pid_nr(current)); + return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -766,6 +797,10 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* Kprobe override only works for ftrace based kprobes. */ + if (prog->kprobe_override && !trace_kprobe_ftrace(event->tp_event)) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index abf92e478cfb..8e3c9ec1faf7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1177,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1186,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1217,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1300,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1308,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 903273c93e61..adbb3f7d1fb5 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -253,6 +253,7 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -278,6 +279,11 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { From eafb3401faf243f7dca0e23325242cb8c2269ee9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:43 -0500 Subject: [PATCH 2031/2177] samples/bpf: add a test for bpf_override_return This adds a basic test for bpf_override_return to verify it works. We override the main function for mounting a btrfs fs so it'll return -ENOMEM and then make sure that trying to mount a btrfs fs will fail. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 ++++ samples/bpf/test_override_return.sh | 15 ++++++++++++ samples/bpf/tracex7_kern.c | 16 +++++++++++++ samples/bpf/tracex7_user.c | 28 +++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++- 6 files changed, 71 insertions(+), 2 deletions(-) create mode 100755 samples/bpf/test_override_return.sh create mode 100644 samples/bpf/tracex7_kern.c create mode 100644 samples/bpf/tracex7_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3b4945c1eab0..87db0f9a4c15 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,6 +15,7 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 +hostprogs-y += tracex7 hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist @@ -61,6 +62,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o @@ -104,6 +106,7 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += tracex7_kern.o always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o @@ -158,6 +161,7 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_tracex7 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh new file mode 100755 index 000000000000..e68b9ee6814b --- /dev/null +++ b/samples/bpf/test_override_return.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +rm -f testfile.img +dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 +DEVICE=$(losetup --show -f testfile.img) +mkfs.btrfs -f $DEVICE +mkdir tmpmnt +./tracex7 $DEVICE +if [ $? -eq 0 ] +then + echo "SUCCESS!" +else + echo "FAILED!" +fi +losetup -d $DEVICE diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c new file mode 100644 index 000000000000..1ab308a43e0f --- /dev/null +++ b/samples/bpf/tracex7_kern.c @@ -0,0 +1,16 @@ +#include +#include +#include +#include "bpf_helpers.h" + +SEC("kprobe/open_ctree") +int bpf_prog1(struct pt_regs *ctx) +{ + unsigned long rc = -12; + + bpf_override_return(ctx, rc); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c new file mode 100644 index 000000000000..8a52ac492e8b --- /dev/null +++ b/samples/bpf/tracex7_user.c @@ -0,0 +1,28 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +int main(int argc, char **argv) +{ + FILE *f; + char filename[256]; + char command[256]; + int ret; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + snprintf(command, 256, "mount %s tmpmnt/", argv[1]); + f = popen(command, "r"); + ret = pclose(f); + + return ret ? 0 : 1; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e880ae6434ee..adb66f78b674 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..33cb00e46c49 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions From 1852719658c0f853b5481c9eaed862f1a9355edc Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Wed, 8 Nov 2017 13:55:47 +0900 Subject: [PATCH 2032/2177] tools: bpftool: open pinned object without type check This was needed for opening any file in bpf-fs without knowing its object type Signed-off-by: Prashant Bhole Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 15 +++++++++++++-- tools/bpf/bpftool/main.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index aa7017098b2a..6b3d25d6a782 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -122,9 +122,8 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen) return 0; } -int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +int open_obj_pinned(char *path) { - enum bpf_obj_type type; int fd; fd = bpf_obj_get(path); @@ -136,6 +135,18 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return -1; } + return fd; +} + +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +{ + enum bpf_obj_type type; + int fd; + + fd = open_obj_pinned(path); + if (fd < 0) + return -1; + type = get_fd_type(fd); if (type < 0) { close(fd); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index ff5ad05b137b..2ff2a361af0d 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -89,6 +89,7 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); +int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); From 4990f1f4610b483a60397ed2768d268df228a551 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Wed, 8 Nov 2017 13:55:48 +0900 Subject: [PATCH 2033/2177] tools: bpftool: show filenames of pinned objects Added support to show filenames of pinned objects. For example: root@test# ./bpftool prog 3: tracepoint name tracepoint__irq tag f677a7dd722299a3 loaded_at Oct 26/11:39 uid 0 xlated 160B not jited memlock 4096B map_ids 4 pinned /sys/fs/bpf/softirq_prog 4: tracepoint name tracepoint__irq tag ea5dc530d00b92b6 loaded_at Oct 26/11:39 uid 0 xlated 392B not jited memlock 4096B map_ids 4,6 root@test# ./bpftool --json --pretty prog [{ "id": 3, "type": "tracepoint", "name": "tracepoint__irq", "tag": "f677a7dd722299a3", "loaded_at": "Oct 26/11:39", "uid": 0, "bytes_xlated": 160, "jited": false, "bytes_memlock": 4096, "map_ids": [4 ], "pinned": ["/sys/fs/bpf/softirq_prog" ] },{ "id": 4, "type": "tracepoint", "name": "tracepoint__irq", "tag": "ea5dc530d00b92b6", "loaded_at": "Oct 26/11:39", "uid": 0, "bytes_xlated": 392, "jited": false, "bytes_memlock": 4096, "map_ids": [4,6 ], "pinned": [] } ] root@test# ./bpftool map 4: hash name start flags 0x0 key 4B value 16B max_entries 10240 memlock 1003520B pinned /sys/fs/bpf/softirq_map1 5: hash name iptr flags 0x0 key 4B value 8B max_entries 10240 memlock 921600B root@test# ./bpftool --json --pretty map [{ "id": 4, "type": "hash", "name": "start", "flags": 0, "bytes_key": 4, "bytes_value": 16, "max_entries": 10240, "bytes_memlock": 1003520, "pinned": ["/sys/fs/bpf/softirq_map1" ] },{ "id": 5, "type": "hash", "name": "iptr", "flags": 0, "bytes_key": 4, "bytes_value": 8, "max_entries": 10240, "bytes_memlock": 921600, "pinned": [] } ] Signed-off-by: Prashant Bhole Signed-off-by: David S. Miller --- tools/bpf/bpftool/common.c | 82 ++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.c | 8 ++++ tools/bpf/bpftool/main.h | 17 ++++++++ tools/bpf/bpftool/map.c | 21 ++++++++++ tools/bpf/bpftool/prog.c | 24 +++++++++++ 5 files changed, 152 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6b3d25d6a782..2bd3b280e6dd 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,7 +34,9 @@ /* Author: Jakub Kicinski */ #include +#include #include +#include #include #include #include @@ -321,3 +323,83 @@ void print_hex_data_json(uint8_t *data, size_t len) jsonw_printf(json_wtr, "\"0x%02hhx\"", data[i]); jsonw_end_array(json_wtr); } + +int build_pinned_obj_table(struct pinned_obj_table *tab, + enum bpf_obj_type type) +{ + struct bpf_prog_info pinned_info = {}; + struct pinned_obj *obj_node = NULL; + __u32 len = sizeof(pinned_info); + struct mntent *mntent = NULL; + enum bpf_obj_type objtype; + FILE *mntfile = NULL; + FTSENT *ftse = NULL; + FTS *fts = NULL; + int fd, err; + + mntfile = setmntent("/proc/mounts", "r"); + if (!mntfile) + return -1; + + while ((mntent = getmntent(mntfile))) { + char *path[] = { mntent->mnt_dir, NULL }; + + if (strncmp(mntent->mnt_type, "bpf", 3) != 0) + continue; + + fts = fts_open(path, 0, NULL); + if (!fts) + continue; + + while ((ftse = fts_read(fts))) { + if (!(ftse->fts_info & FTS_F)) + continue; + fd = open_obj_pinned(ftse->fts_path); + if (fd < 0) + continue; + + objtype = get_fd_type(fd); + if (objtype != type) { + close(fd); + continue; + } + memset(&pinned_info, 0, sizeof(pinned_info)); + err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len); + if (err) { + close(fd); + continue; + } + + obj_node = malloc(sizeof(*obj_node)); + if (!obj_node) { + close(fd); + fts_close(fts); + fclose(mntfile); + return -1; + } + + memset(obj_node, 0, sizeof(*obj_node)); + obj_node->id = pinned_info.id; + obj_node->path = strdup(ftse->fts_path); + hash_add(tab->table, &obj_node->hash, obj_node->id); + + close(fd); + } + fts_close(fts); + } + fclose(mntfile); + return 0; +} + +void delete_pinned_obj_table(struct pinned_obj_table *tab) +{ + struct pinned_obj *obj; + struct hlist_node *tmp; + unsigned int bkt; + + hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { + hash_del(&obj->hash); + free(obj->path); + free(obj); + } +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 78d9afb74ef4..6ad53f1797fa 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -54,6 +54,8 @@ static int (*last_do_help)(int argc, char **argv); json_writer_t *json_wtr; bool pretty_output; bool json_output; +struct pinned_obj_table prog_table; +struct pinned_obj_table map_table; void usage(void) { @@ -272,6 +274,9 @@ int main(int argc, char **argv) json_output = false; bin_name = argv[0]; + hash_init(prog_table.table); + hash_init(map_table.table); + while ((opt = getopt_long(argc, argv, "Vhpj", options, NULL)) >= 0) { switch (opt) { @@ -311,5 +316,8 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); + delete_pinned_obj_table(&prog_table); + delete_pinned_obj_table(&map_table); + return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 2ff2a361af0d..13453de2e570 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "json_writer.h" @@ -70,6 +71,8 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; +extern struct pinned_obj_table prog_table; +extern struct pinned_obj_table map_table; void p_err(const char *fmt, ...); void p_info(const char *fmt, ...); @@ -78,6 +81,20 @@ bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); +struct pinned_obj_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct pinned_obj { + __u32 id; + char *path; + struct hlist_node hash; +}; + +int build_pinned_obj_table(struct pinned_obj_table *table, + enum bpf_obj_type type); +void delete_pinned_obj_table(struct pinned_obj_table *tab); + struct cmd { const char *cmd; int (*func)(int argc, char **argv); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e978ab23a77f..de0980657cef 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -436,6 +436,18 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); free(memlock); + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; + + jsonw_name(json_wtr, "pinned"); + jsonw_start_array(json_wtr); + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } + jsonw_end_array(json_wtr); + } + jsonw_end_object(json_wtr); return 0; @@ -466,7 +478,14 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) free(memlock); printf("\n"); + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\tpinned %s\n", obj->path); + } + } return 0; } @@ -478,6 +497,8 @@ static int do_show(int argc, char **argv) int err; int fd; + build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + if (argc == 2) { fd = map_parse_fd_and_info(&argc, &argv, &info, &len); if (fd < 0) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d3ab808dc882..8f94b8ac2e63 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -272,6 +272,18 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; + + jsonw_name(json_wtr, "pinned"); + jsonw_start_array(json_wtr); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } + jsonw_end_array(json_wtr); + } + jsonw_end_object(json_wtr); } @@ -331,6 +343,16 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; + + printf("\n"); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\tpinned %s\n", obj->path); + } + } + printf("\n"); } @@ -360,6 +382,8 @@ static int do_show(int argc, char **argv) int err; int fd; + build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + if (argc == 2) { fd = prog_parse_fd(&argc, &argv); if (fd < 0) From c541b73466549c4aa4ee20ccd04ba52e4c95d6eb Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Wed, 8 Nov 2017 13:55:49 +0900 Subject: [PATCH 2034/2177] tools: bpftool: optionally show filenames of pinned objects Making it optional to show file names of pinned objects because it scans complete bpf-fs filesystem which is costly. Added option -f|--bpffs. Documentation updated. Signed-off-by: Prashant Bhole Signed-off-by: David S. Miller --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 5 ++++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 5 ++++- tools/bpf/bpftool/main.c | 14 +++++++++++--- tools/bpf/bpftool/main.h | 3 ++- tools/bpf/bpftool/map.c | 3 ++- tools/bpf/bpftool/prog.c | 3 ++- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index abb9ee940b15..9f51a268eb06 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** @@ -86,6 +86,9 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -f, --bpffs + Show file names of pinned maps. + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 0f25d3c39e05..36e8d1c3c40d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **dump xlated** | **dump jited** | **pin** | **help** } @@ -75,6 +75,9 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -f, --bpffs + Show file names of pinned programs. + EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 6ad53f1797fa..d6e4762170a4 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -54,6 +54,7 @@ static int (*last_do_help)(int argc, char **argv); json_writer_t *json_wtr; bool pretty_output; bool json_output; +bool show_pinned; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -265,6 +266,7 @@ int main(int argc, char **argv) { "help", no_argument, NULL, 'h' }, { "pretty", no_argument, NULL, 'p' }, { "version", no_argument, NULL, 'V' }, + { "bpffs", no_argument, NULL, 'f' }, { 0 } }; int opt, ret; @@ -272,12 +274,13 @@ int main(int argc, char **argv) last_do_help = do_help; pretty_output = false; json_output = false; + show_pinned = false; bin_name = argv[0]; hash_init(prog_table.table); hash_init(map_table.table); - while ((opt = getopt_long(argc, argv, "Vhpj", + while ((opt = getopt_long(argc, argv, "Vhpjf", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -290,6 +293,9 @@ int main(int argc, char **argv) case 'j': json_output = true; break; + case 'f': + show_pinned = true; + break; default: usage(); } @@ -316,8 +322,10 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); - delete_pinned_obj_table(&prog_table); - delete_pinned_obj_table(&map_table); + if (show_pinned) { + delete_pinned_obj_table(&prog_table); + delete_pinned_obj_table(&map_table); + } return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 13453de2e570..9c191e222d6f 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -59,7 +59,7 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] }" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }" enum bpf_obj_type { BPF_OBJ_UNKNOWN, @@ -71,6 +71,7 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; +extern bool show_pinned; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index de0980657cef..e2450c8e88e6 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -497,7 +497,8 @@ static int do_show(int argc, char **argv) int err; int fd; - build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + if (show_pinned) + build_pinned_obj_table(&map_table, BPF_OBJ_MAP); if (argc == 2) { fd = map_parse_fd_and_info(&argc, &argv, &info, &len); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8f94b8ac2e63..f45c44ef9bec 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -382,7 +382,8 @@ static int do_show(int argc, char **argv) int err; int fd; - build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + if (show_pinned) + build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); if (argc == 2) { fd = prog_parse_fd(&argc, &argv); From 9ef8690be13d8ae3130749fbcc0cc21e4e3f738c Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 8 Nov 2017 16:30:44 +1100 Subject: [PATCH 2035/2177] net/ncsi: Improve general state logging The NCSI driver is mostly silent which becomes a headache when trying to determine what has occurred on the NCSI connection. This adds additional logging in a few key areas such as state transitions and calling out certain errors more visibly. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 15 ++++++++- net/ncsi/ncsi-manage.c | 76 +++++++++++++++++++++++++++++++----------- net/ncsi/ncsi-rsp.c | 10 +++++- 3 files changed, 80 insertions(+), 21 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f135938bf781..67e708e98ccf 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); + netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", + nc->id, data & 0x1 ? "up" : "down"); + chained = !list_empty(&nc->link); state = nc->state; spin_unlock_irqrestore(&nc->lock, flags); @@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); + netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n", + nc->id, ncm->data[3] & 0x3 ? "up" : "down"); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_ACTIVE) { spin_unlock_irqrestore(&nc->lock, flags); @@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) } ret = ncsi_validate_aen_pkt(h, nah->payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for AEN type 0x%x\n", + h->type); goto out; + } ret = nah->handler(ndp, h); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for AEN type 0x%x returned %d\n", + h->type, ret); out: consume_skb(skb); return ret; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 47baf914eec2..a2b904a718c6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -229,6 +229,8 @@ static void ncsi_channel_monitor(unsigned long data) case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: + netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n", + nc->id); if (!(ndp->flags & NCSI_DEV_HWA)) { ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; @@ -682,7 +684,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); if (!data) { netdev_err(ndp->ndev.dev, - "ncsi: failed to retrieve filter %d\n", index); + "NCSI: failed to retrieve filter %d\n", index); /* Set the VLAN id to 0 - this will still disable the entry in * the filter table, but we won't know what it was. */ @@ -692,7 +694,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, } netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: removed vlan tag %u at index %d\n", + "NCSI: removed vlan tag %u at index %d\n", vid, index + 1); ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); @@ -718,7 +720,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, if (index < 0) { /* New tag to add */ netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: new vlan id to set: %u\n", + "NCSI: new vlan id to set: %u\n", vlan->vid); break; } @@ -745,7 +747,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, } netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: set vid %u in packet, index %u\n", + "NCSI: set vid %u in packet, index %u\n", vlan->vid, index + 1); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vlan->vid; @@ -784,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_SP\n"); goto error; + } nd->state = ncsi_dev_state_config_cis; break; @@ -797,8 +802,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = nc->id; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_CIS\n"); goto error; + } nd->state = ncsi_dev_state_config_clear_vids; break; @@ -895,10 +903,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD %x\n", + nca.type); goto error; + } break; case ncsi_dev_state_config_done: + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: channel %u config done\n", nc->id); spin_lock_irqsave(&nc->lock, flags); if (nc->reconfigure_needed) { /* This channel's configuration has been updated @@ -925,6 +939,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } else { hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + netdev_warn(ndp->ndev.dev, + "NCSI: channel %u link down after config\n", + nc->id); } spin_unlock_irqrestore(&nc->lock, flags); @@ -937,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) ncsi_process_next_channel(ndp); break; default: - netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", - nd->state); + netdev_alert(dev, "Wrong NCSI state 0x%x in config\n", + nd->state); } return; @@ -990,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) } if (!found) { + netdev_warn(ndp->ndev.dev, + "NCSI: No channel found with link\n"); ncsi_report_link(ndp, true); return -ENODEV; } + ncm = &found->modes[NCSI_MODE_LINK]; + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: Channel %u added to queue (link %s)\n", + found->id, ncm->data[2] & 0x1 ? "up" : "down"); + out: spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&found->link, &ndp->channel_queue); @@ -1055,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) /* We can have no channels in extremely case */ if (list_empty(&ndp->channel_queue)) { + netdev_err(ndp->ndev.dev, + "NCSI: No available channels for HWA\n"); ncsi_report_link(ndp, false); return -ENOENT; } @@ -1223,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) return; error: + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during probe\n", + nca.type); ncsi_report_link(ndp, true); } @@ -1276,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; + netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n", + nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; + netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n", + nc->id); ncsi_suspend_channel(ndp); break; default: @@ -1299,6 +1332,8 @@ out: return ncsi_choose_active_channel(ndp); } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: No more channels to process\n"); ncsi_report_link(ndp, false); return -ENODEV; } @@ -1390,7 +1425,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) ncsi_dev_state_config || !list_empty(&nc->link)) { netdev_printk(KERN_DEBUG, nd->dev, - "ncsi: channel %p marked dirty\n", + "NCSI: channel %p marked dirty\n", nc); nc->reconfigure_needed = true; } @@ -1410,7 +1445,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) spin_unlock_irqrestore(&ndp->lock, flags); netdev_printk(KERN_DEBUG, nd->dev, - "ncsi: kicked channel %p\n", nc); + "NCSI: kicked channel %p\n", nc); n++; } } @@ -1431,7 +1466,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) nd = ncsi_find_dev(dev); if (!nd) { - netdev_warn(dev, "ncsi: No net_device?\n"); + netdev_warn(dev, "NCSI: No net_device?\n"); return 0; } @@ -1442,7 +1477,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) n_vids++; if (vlan->vid == vid) { netdev_printk(KERN_DEBUG, dev, - "vid %u already registered\n", vid); + "NCSI: vid %u already registered\n", vid); return 0; } } @@ -1461,7 +1496,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) vlan->vid = vid; list_add_rcu(&vlan->list, &ndp->vlan_vids); - netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid); + netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid); found = ncsi_kick_channels(ndp) != 0; @@ -1481,7 +1516,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) nd = ncsi_find_dev(dev); if (!nd) { - netdev_warn(dev, "ncsi: no net_device?\n"); + netdev_warn(dev, "NCSI: no net_device?\n"); return 0; } @@ -1491,14 +1526,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) if (vlan->vid == vid) { netdev_printk(KERN_DEBUG, dev, - "vid %u found, removing\n", vid); + "NCSI: vid %u found, removing\n", vid); list_del_rcu(&vlan->list); found = true; kfree(vlan); } if (!found) { - netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid); + netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid); return -EINVAL; } @@ -1581,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - if (ndp->flags & NCSI_DEV_HWA) + if (ndp->flags & NCSI_DEV_HWA) { + netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n"); ret = ncsi_enable_hwa(ndp); - else + } else { ret = ncsi_choose_active_channel(ndp); + } return ret; } @@ -1615,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd) } } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 927dad4759d1..58186c4102f0 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1032,11 +1032,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, if (payload < 0) payload = ntohs(hdr->length); ret = ncsi_validate_rsp_pkt(nr, payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for type 0x%x\n", + hdr->type); goto out; + } /* Process the packet */ ret = nrh->handler(nr); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for packet type 0x%x returned %d\n", + hdr->type, ret); out: ncsi_free_request(nr); return ret; From 04bad8bda9e25afe676a6f4452f3b304c1fdea16 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 8 Nov 2017 16:30:45 +1100 Subject: [PATCH 2036/2177] net/ncsi: Don't return error on normal response Several response handlers return EBUSY if the data corresponding to the command/response pair is already set. There is no reason to return an error here; the channel is advertising something as enabled because we told it to enable it, and it's possible that the feature has been enabled previously. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-rsp.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 58186c4102f0..efd933ff5570 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 0; return 0; @@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr) /* Check if the AEN has been enabled */ ncm = &nc->modes[NCSI_MODE_AEN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to AEN configuration */ cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); @@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); @@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ ncm->enable = 0; @@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) bitmap = &ncf->bitmap; if (cmd->at_e & 0x1) { - if (test_and_set_bit(cmd->index, bitmap)) - return -EBUSY; + set_bit(cmd->index, bitmap); memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); } else { - if (!test_and_clear_bit(cmd->index, bitmap)) - return -EBUSY; - + clear_bit(cmd->index, bitmap); memset(ncf->data + 6 * cmd->index, 0, 6); } @@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr) /* Check if broadcast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); @@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr) /* Check if broadcast filter isn't enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ ncm->enable = 0; @@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); @@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ ncm->enable = 0; @@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) /* Check if flow control has been enabled */ ncm = &nc->modes[NCSI_MODE_FC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to flow control mode */ cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); From 2210d6b2f287d738eddf6b75f432126ce05450f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 7 Nov 2017 21:52:09 -0800 Subject: [PATCH 2037/2177] net: ipv6: sysctl to specify IPv6 ND traffic class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a per-device sysctl to specify the default traffic class to use for kernel originated IPv6 Neighbour Discovery packets. Currently this includes: - Router Solicitation (ICMPv6 type 133) ndisc_send_rs() -> ndisc_send_skb() -> ip6_nd_hdr() - Neighbour Solicitation (ICMPv6 type 135) ndisc_send_ns() -> ndisc_send_skb() -> ip6_nd_hdr() - Neighbour Advertisement (ICMPv6 type 136) ndisc_send_na() -> ndisc_send_skb() -> ip6_nd_hdr() - Redirect (ICMPv6 type 137) ndisc_send_redirect() -> ndisc_send_skb() -> ip6_nd_hdr() and if the kernel ever gets around to generating RA's, it would presumably also include: - Router Advertisement (ICMPv6 type 134) (radvd daemon could pick up on the kernel setting and use it) Interface drivers may examine the Traffic Class value and translate the DiffServ Code Point into a link-layer appropriate traffic prioritization scheme. An example of mapping IETF DSCP values to IEEE 802.11 User Priority values can be found here: https://tools.ietf.org/html/draft-ietf-tsvwg-ieee-802-11 The expected primary use case is to properly prioritize ND over wifi. Testing: jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 0 jzem22:~# echo -1 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass -bash: echo: write error: Invalid argument jzem22:~# echo 256 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass -bash: echo: write error: Invalid argument jzem22:~# echo 0 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# echo 255 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 255 jzem22:~# echo 34 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 34 jzem22:~# echo $[0xDC] > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# tcpdump -v -i eth0 icmp6 and src host jzem22.pgc and dst host fe80::1 tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes IP6 (class 0xdc, hlim 255, next-header ICMPv6 (58) payload length: 24) jzem22.pgc > fe80::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 24, tgt is jzem22.pgc, Flags [solicited] (based on original change written by Erik Kline, with minor changes) v2: fix 'suspicious rcu_dereference_check() usage' by explicitly grabbing the rcu_read_lock. Cc: Lorenzo Colitti Signed-off-by: Erik Kline Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 +++++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 11 +++++++++++ net/ipv6/ndisc.c | 9 ++++++++- 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 54410a1d4065..d8676dda7fa6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1732,6 +1732,15 @@ ndisc_notify - BOOLEAN 1 - Generate unsolicited neighbour advertisements when device is brought up or hardware address changes. +ndisc_tclass - INTEGER + The IPv6 Traffic Class to use by default when sending IPv6 Neighbor + Discovery (Router Solicitation, Router Advertisement, Neighbor + Solicitation, Neighbor Advertisement, Redirect) messages. + These 8 bits can be interpreted as 6 high order bits holding the DSCP + value and 2 low order bits representing ECN (which you probably want + to leave cleared). + 0 - (default) + mldv1_unsolicited_report_interval - INTEGER The interval in milliseconds in which the next unsolicited MLDv1 report retransmit will take place. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ea04ca024f0d..cb18c6290ca8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -73,6 +73,7 @@ struct ipv6_devconf { __u32 enhanced_dad; __u32 addr_gen_mode; __s32 disable_policy; + __s32 ndisc_tclass; struct ctl_table_header *sysctl_header; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index b22a9c4e1b12..9c0f4a92bcff 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -186,6 +186,7 @@ enum { DEVCONF_ADDR_GEN_MODE, DEVCONF_DISABLE_POLICY, DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, + DEVCONF_NDISC_TCLASS, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6233e06fa35c..a6dffd65eb9d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5059,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; + array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; } static inline size_t inet6_ifla6_size(void) @@ -5986,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, } static int minus_one = -1; +static const int zero = 0; static const int one = 1; static const int two_five_five = 255; @@ -6356,6 +6358,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = addrconf_sysctl_disable_policy, }, + { + .procname = "ndisc_tclass", + .data = &ipv6_devconf.ndisc_tclass, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&zero, + .extra2 = (void *)&two_five_five, + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f9c3ffe04382..b3cea200c85e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -427,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb, int hop_limit, int len) { struct ipv6hdr *hdr; + struct inet6_dev *idev; + unsigned tclass; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + tclass = idev ? idev->cnf.ndisc_tclass : 0; + rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, 0, 0); + ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; From 28033ae4e0f59782b3f3357d9c28852a7e84a894 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 7 Nov 2017 21:59:40 -0800 Subject: [PATCH 2038/2177] net: netlink: Update attr validation to require exact length for some types Attributes using NLA_U* and NLA_S* (where * is 8, 16,32 and 64) are expected to be an exact length. Split these data types from nla_attr_minlen into nla_attr_len and update validate_nla to require the attribute to have exact length for them. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- lib/nlattr.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/nlattr.c b/lib/nlattr.c index 3d8295c85505..8bf78b4b78f0 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,19 +15,23 @@ #include #include -static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { +/* for these data types attribute length must be exactly given size */ +static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), - [NLA_MSECS] = sizeof(u64), - [NLA_NESTED] = NLA_HDRLEN, [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), }; +static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_MSECS] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + static int validate_nla_bitfield32(const struct nlattr *nla, u32 *valid_flags_allowed) { @@ -65,6 +69,13 @@ static int validate_nla(const struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); + /* for data types NLA_U* and NLA_S* require exact length */ + if (nla_attr_len[pt->type]) { + if (attrlen != nla_attr_len[pt->type]) + return -ERANGE; + return 0; + } + switch (pt->type) { case NLA_FLAG: if (attrlen > 0) @@ -191,6 +202,8 @@ nla_policy_len(const struct nla_policy *p, int n) for (i = 0; i < n; i++, p++) { if (p->len) len += nla_total_size(p->len); + else if (nla_attr_len[p->type]) + len += nla_total_size(nla_attr_len[p->type]); else if (nla_attr_minlen[p->type]) len += nla_total_size(nla_attr_minlen[p->type]); } From 39e2151f1012eb4163b3a9f414c27d92798e4cbe Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Wed, 8 Nov 2017 15:52:22 +0800 Subject: [PATCH 2039/2177] net: hns3: fix a bug when getting phy address from NCL_config file Driver gets phy address from NCL_config file and uses the phy address to initialize phydev. There are 5 bits for phy address. And C22 phy address has 5 bits. So 0-31 are all valid address for phy. If there is no phy, it will crash. Because driver always get a valid phy address. This patch fixes the phy address to 8 bits, and use 0xff to indicate invalid phy address. Fixes: 46a3df9f9718 (net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 844c83ea549e..ce5ed8845042 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -390,7 +390,7 @@ struct hclge_pf_res_cmd { #define HCLGE_CFG_TQP_DESC_N_S 16 #define HCLGE_CFG_TQP_DESC_N_M GENMASK(31, 16) #define HCLGE_CFG_PHY_ADDR_S 0 -#define HCLGE_CFG_PHY_ADDR_M GENMASK(4, 0) +#define HCLGE_CFG_PHY_ADDR_M GENMASK(7, 0) #define HCLGE_CFG_MEDIA_TP_S 8 #define HCLGE_CFG_MEDIA_TP_M GENMASK(15, 8) #define HCLGE_CFG_RX_BUF_LEN_S 16 From c040366bc4a58f719e61111dea4b550b71b2a0b4 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Wed, 8 Nov 2017 15:52:23 +0800 Subject: [PATCH 2040/2177] net: hns3: cleanup mac auto-negotiation state query in hclge_update_speed_duplex When checking whether auto-negotiation is on, driver only needs to check the value of mac.autoneg(SW) directly, and does not need to query it from hardware. Because this value is always synchronized with the auto-negotiation state of hardware. This patch removes mac auto-negotiation state query in hclge_update_speed_duplex(). Fixes: 46a3df9f9718 (net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support) Signed-off-by: Fuyun Liang Signed-off-by: Lipeng Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c6ba89089ef3..781d5a8cbb6a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2325,18 +2325,7 @@ static int hclge_update_speed_duplex(struct hclge_dev *hdev) /* get the speed and duplex as autoneg'result from mac cmd when phy * doesn't exit. */ - if (mac.phydev) - return 0; - - /* update mac->antoneg. */ - ret = hclge_query_autoneg_result(hdev); - if (ret) { - dev_err(&hdev->pdev->dev, - "autoneg result query failed %d\n", ret); - return ret; - } - - if (!mac.autoneg) + if (mac.phydev || !mac.autoneg) return 0; ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex); From e5c500eb298a9f5ef9b80d16fcea9662c89467b7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 8 Nov 2017 08:59:40 +0100 Subject: [PATCH 2041/2177] net: mvpp2: fix GOP statistics loop start and stop conditions GOP statistics from all ports of one instance of the driver are gathered with one work recalled in loop in a workqueue. The loop is started when a port is up, and stopped when a port is down. This last condition is obviously wrong. Fix this by having a work per port. This way, starting and stoping it when the port is up or down will be fine, while minimizing unnecessary CPU usage. Fixes: 118d6298f6f0 ("net: mvpp2: add ethtool GOP statistics") Reported-by: Stefan Chulski Signed-off-by: Miquel Raynal Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 62 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a79d2ff4f86e..6c20e811f973 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -885,9 +885,7 @@ struct mvpp2 { /* Maximum number of RXQs per port */ unsigned int max_port_rxqs; - /* Workqueue to gather hardware statistics with its lock */ - struct mutex gather_stats_lock; - struct delayed_work stats_work; + /* Workqueue to gather hardware statistics */ char queue_name[30]; struct workqueue_struct *stats_queue; }; @@ -955,6 +953,10 @@ struct mvpp2_port { struct mvpp2_pcpu_stats __percpu *stats; u64 *ethtool_stats; + /* Per-port work and its lock to gather hardware statistics */ + struct mutex gather_stats_lock; + struct delayed_work stats_work; + phy_interface_t phy_interface; struct device_node *phy_node; struct phy *comphy; @@ -4895,32 +4897,25 @@ static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, static void mvpp2_gather_hw_statistics(struct work_struct *work) { struct delayed_work *del_work = to_delayed_work(work); - struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work); - struct mvpp2_port *port; + struct mvpp2_port *port = container_of(del_work, struct mvpp2_port, + stats_work); u64 *pstats; - int i, j; + int i; - mutex_lock(&priv->gather_stats_lock); + mutex_lock(&port->gather_stats_lock); - for (i = 0; i < priv->port_count; i++) { - if (!priv->port_list[i]) - continue; - - port = priv->port_list[i]; - pstats = port->ethtool_stats; - for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_regs); j++) - *pstats++ += mvpp2_read_count(port, - &mvpp2_ethtool_regs[j]); - } + pstats = port->ethtool_stats; + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); /* No need to read again the counters right after this function if it * was called asynchronously by the user (ie. use of ethtool). */ - cancel_delayed_work(&priv->stats_work); - queue_delayed_work(priv->stats_queue, &priv->stats_work, + cancel_delayed_work(&port->stats_work); + queue_delayed_work(port->priv->stats_queue, &port->stats_work, MVPP2_MIB_COUNTERS_STATS_DELAY); - mutex_unlock(&priv->gather_stats_lock); + mutex_unlock(&port->gather_stats_lock); } static void mvpp2_ethtool_get_stats(struct net_device *dev, @@ -4928,13 +4923,15 @@ static void mvpp2_ethtool_get_stats(struct net_device *dev, { struct mvpp2_port *port = netdev_priv(dev); - /* Update statistics for all ports, copy only those actually needed */ - mvpp2_gather_hw_statistics(&port->priv->stats_work.work); + /* Update statistics for the given port, then take the lock to avoid + * concurrent accesses on the ethtool_stats structure during its copy. + */ + mvpp2_gather_hw_statistics(&port->stats_work.work); - mutex_lock(&port->priv->gather_stats_lock); + mutex_lock(&port->gather_stats_lock); memcpy(data, port->ethtool_stats, sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs)); - mutex_unlock(&port->priv->gather_stats_lock); + mutex_unlock(&port->gather_stats_lock); } static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) @@ -7089,7 +7086,7 @@ static int mvpp2_open(struct net_device *dev) mvpp22_init_rss(port); /* Start hardware statistics gathering */ - queue_delayed_work(priv->stats_queue, &priv->stats_work, + queue_delayed_work(priv->stats_queue, &port->stats_work, MVPP2_MIB_COUNTERS_STATS_DELAY); return 0; @@ -7136,8 +7133,7 @@ static int mvpp2_stop(struct net_device *dev) mvpp2_cleanup_rxqs(port); mvpp2_cleanup_txqs(port); - cancel_delayed_work_sync(&priv->stats_work); - flush_workqueue(priv->stats_queue); + cancel_delayed_work_sync(&port->stats_work); return 0; } @@ -7889,6 +7885,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, goto err_free_stats; } + mutex_init(&port->gather_stats_lock); + INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics); + mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); port->tx_ring_size = MVPP2_MAX_TXD; @@ -8356,7 +8355,6 @@ static int mvpp2_probe(struct platform_device *pdev) * smallest packets (64B) will overflow a 32-bit counter in less than * 30 seconds. Then, use a workqueue to fill 64-bit counters. */ - mutex_init(&priv->gather_stats_lock); snprintf(priv->queue_name, sizeof(priv->queue_name), "stats-wq-%s%s", netdev_name(priv->port_list[0]->dev), priv->port_count > 1 ? "+" : ""); @@ -8366,8 +8364,6 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_mg_clk; } - INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics); - platform_set_drvdata(pdev, priv); return 0; @@ -8389,12 +8385,14 @@ static int mvpp2_remove(struct platform_device *pdev) struct device_node *port_node; int i = 0; + flush_workqueue(priv->stats_queue); destroy_workqueue(priv->stats_queue); - mutex_destroy(&priv->gather_stats_lock); for_each_available_child_of_node(dn, port_node) { - if (priv->port_list[i]) + if (priv->port_list[i]) { + mutex_destroy(&priv->port_list[i]->gather_stats_lock); mvpp2_port_remove(priv->port_list[i]); + } i++; } From 9ce981401cce7852542cab267702b6e89f37a4b8 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 8 Nov 2017 09:56:34 +0100 Subject: [PATCH 2042/2177] net: macb: add of_phy_deregister_fixed_link to error paths We add the call of_phy_deregister_fixed_link to all associated error paths for memory clean up. Signed-off-by: Michael Grzeschik Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5dafcde67e45..cc3f36a5c6e1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -611,6 +611,8 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); err_out_free_mdiobus: + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); err_out: return err; @@ -3550,6 +3552,8 @@ static int macb_probe(struct platform_device *pdev) err_out_unregister_mdio: phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ @@ -3572,6 +3576,7 @@ static int macb_remove(struct platform_device *pdev) { struct net_device *dev; struct macb *bp; + struct device_node *np = pdev->dev.of_node; dev = platform_get_drvdata(pdev); @@ -3580,6 +3585,8 @@ static int macb_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); dev->phydev = NULL; mdiobus_free(bp->mii_bus); From 66ee6a06e620740b706b8dbde161492f6a405b26 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 8 Nov 2017 09:56:35 +0100 Subject: [PATCH 2043/2177] net: macb: add of_node_put to error paths We add the call of_node_put(bp->phy_node) to all associated error paths for memory clean up. Signed-off-by: Michael Grzeschik Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index cc3f36a5c6e1..72a67f74b97b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -611,6 +611,7 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); err_out_free_mdiobus: + of_node_put(bp->phy_node); if (np && of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); @@ -3552,6 +3553,7 @@ static int macb_probe(struct platform_device *pdev) err_out_unregister_mdio: phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + of_node_put(bp->phy_node); if (np && of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); From 8d6e79d3ce13e34957de87f7584cbf1bcde74c57 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 8 Nov 2017 09:59:26 +0100 Subject: [PATCH 2044/2177] tipc: improve link resiliency when rps is activated Currently, the TIPC RPS dissector is based only on the incoming packets' source node address, hence steering all traffic from a node to the same core. We have seen that this makes the links vulnerable to starvation and unnecessary resets when we turn down the link tolerance to very low values. To reduce the risk of this happening, we exempt probe and probe replies packets from the convergence to one core per source node. Instead, we do the opposite, - we try to diverge those packets across as many cores as possible, by randomizing the flow selector key. To make such packets identifiable to the dissector, we add a new 'is_keepalive' bit to word 0 of the LINK_PROTOCOL header. This bit is set both for PROBE and PROBE_REPLY messages, and only for those. It should be noted that these packets are not part of any flow anyway, and only constitute a minuscule fraction of all packets sent across a link. Hence, there is no risk that this will affect overall performance. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 12 +++---- include/net/tipc.h | 62 ++++++++++++++++++++++++++++++++++++ net/core/flow_dissector.c | 30 ++++++++--------- net/tipc/link.c | 26 ++++++++------- net/tipc/msg.h | 10 ++++++ 5 files changed, 108 insertions(+), 32 deletions(-) create mode 100644 include/net/tipc.h diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..9a074776f70b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs { }; /** - * struct flow_dissector_key_tipc_addrs: - * @srcnode: source node address + * struct flow_dissector_key_tipc: + * @key: source node address combined with selector */ -struct flow_dissector_key_tipc_addrs { - __be32 srcnode; +struct flow_dissector_key_tipc { + __be32 key; }; /** @@ -100,7 +100,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; - struct flow_dissector_key_tipc_addrs tipcaddrs; + struct flow_dissector_key_tipc tipckey; }; }; @@ -192,7 +192,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ - FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ diff --git a/include/net/tipc.h b/include/net/tipc.h new file mode 100644 index 000000000000..07670ec022a7 --- /dev/null +++ b/include/net/tipc.h @@ -0,0 +1,62 @@ +/* + * include/net/tipc.h: Include file for TIPC message header routines + * + * Copyright (c) 2017 Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_HDR_H +#define _TIPC_HDR_H + +#include + +#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */ + +struct tipc_basic_hdr { + __be32 w[4]; +}; + +static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) +{ + u32 w0 = ntohl(hdr->w[0]); + bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; + int key; + + /* Return source node identity as key */ + if (likely(!keepalive_msg)) + return hdr->w[3]; + + /* Spread PROBE/PROBE_REPLY messages across the cores */ + get_random_bytes(&key, sizeof(key)); + return key; +} + +#endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1f5caafb4492..15ce30063765 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -772,23 +773,22 @@ proto_again: break; } case htons(ETH_P_TIPC): { - struct { - __be32 pre[3]; - __be32 srcnode; - } *hdr, _hdr; - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + struct tipc_basic_hdr *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), + data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { + FLOW_DISSECTOR_KEY_TIPC)) { key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS, + FLOW_DISSECTOR_KEY_TIPC, target_container); - key_addrs->tipcaddrs.srcnode = hdr->srcnode; - key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; + key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); + key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; } fdret = FLOW_DISSECT_RET_OUT_GOOD; break; @@ -1024,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: diff -= sizeof(flow->addrs.v6addrs); break; - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + case FLOW_DISSECTOR_KEY_TIPC: + diff -= sizeof(flow->addrs.tipckey); break; } return (sizeof(*flow) - diff) / sizeof(u32); @@ -1039,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.src); - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - return flow->addrs.tipcaddrs.srcnode; + case FLOW_DISSECTOR_KEY_TIPC: + return flow->addrs.tipckey.key; default: return 0; } @@ -1321,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { - .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, - .offset = offsetof(struct flow_keys, addrs.tipcaddrs), + .key_id = FLOW_DISSECTOR_KEY_TIPC, + .offset = offsetof(struct flow_keys, addrs.tipckey), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, diff --git a/net/tipc/link.c b/net/tipc/link.c index 870b9b8f877a..6bce0b1117bd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l) static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, @@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) } if (state || probe || setup) - tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } @@ -1174,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1188,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; - tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); @@ -1215,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1289,7 +1290,8 @@ drop: } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_link *bcl = l->bc_rcvlink; @@ -1337,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_seq_gap(hdr, rcvgap); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); skb_trim(skb, INT_H_SIZE + dlen); @@ -1442,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 rcv_nxt = l->rcv_nxt; u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + bool reply = msg_probe(hdr); void *data; char *if_name; int rc = 0; @@ -1528,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; - if (rcvgap || (msg_probe(hdr))) - tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, - 0, 0, xmitq); + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ @@ -2122,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index cedf811317fb..bf8f57ccc70c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -226,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 19, 1, d); } +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); From 7863f46bac3a1716f7d547c53f367ddf509f031e Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:50 -0800 Subject: [PATCH 2045/2177] bpf: Fix tcp_synrto_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_synrto_kern.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c index 3c3fc83d81cb..232bb242823e 100644 --- a/samples/bpf/tcp_synrto_kern.c +++ b/samples/bpf/tcp_synrto_kern.c @@ -38,8 +38,10 @@ int bpf_synrto(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; From 016e661bb0610a98b1c9ac1250e3269236fabe19 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:51 -0800 Subject: [PATCH 2046/2177] bpf: Fix tcp_rwnd_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_rwnd_kern.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c index 3f2a228f81ce..09ff65b40b31 100644 --- a/samples/bpf/tcp_rwnd_kern.c +++ b/samples/bpf/tcp_rwnd_kern.c @@ -38,8 +38,10 @@ int bpf_rwnd(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != - 55601 && skops->local_port != 55601) - return -1; + 55601 && skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; From a4174f0560f849317239478b1b22afbf03a6eda2 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:52 -0800 Subject: [PATCH 2047/2177] bpf: Fix tcp_bufs_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_bufs_kern.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c index ee83bbabd17c..0566b7fa38a1 100644 --- a/samples/bpf/tcp_bufs_kern.c +++ b/samples/bpf/tcp_bufs_kern.c @@ -41,8 +41,10 @@ int bpf_bufs(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; @@ -61,8 +63,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Nothing to do */ @@ -71,8 +73,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; default: rv = -1; From 2ff969fbe2bfa4486b66226917352d4bb12ec1cb Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:53 -0800 Subject: [PATCH 2048/2177] bpf: Fix tcp_cong_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_cong_kern.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c index dac15bce1fa9..ad0f1ba8206a 100644 --- a/samples/bpf/tcp_cong_kern.c +++ b/samples/bpf/tcp_cong_kern.c @@ -39,8 +39,10 @@ int bpf_cong(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; From e1853319fc4c7279a3561b1844af4a02487c93ef Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:54 -0800 Subject: [PATCH 2049/2177] bpf: Fix tcp_iw_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_iw_kern.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c index 23c5122ef819..4ca5ecc9f580 100644 --- a/samples/bpf/tcp_iw_kern.c +++ b/samples/bpf/tcp_iw_kern.c @@ -42,8 +42,10 @@ int bpf_iw(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; @@ -62,8 +64,8 @@ int bpf_iw(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, @@ -73,8 +75,8 @@ int bpf_iw(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; default: rv = -1; From 03e982eed419da37e9cac1d759097dbe10447190 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 10 Nov 2017 22:19:55 -0800 Subject: [PATCH 2050/2177] bpf: Fix tcp_clamp_kern.c sample program The program was returning -1 in some cases which is not allowed by the verifier any longer. Fixes: 390ee7e29fc8 ("bpf: enforce return code for cgroup-bpf programs") Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- samples/bpf/tcp_clamp_kern.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c index d68eadd9ca2d..f4225c9d2c0c 100644 --- a/samples/bpf/tcp_clamp_kern.c +++ b/samples/bpf/tcp_clamp_kern.c @@ -41,8 +41,10 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* For testing purposes, only execute rest of BPF program * if neither port numberis 55601 */ - if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) - return -1; + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) { + skops->reply = -1; + return 0; + } op = (int) skops->op; @@ -66,9 +68,9 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, @@ -80,12 +82,12 @@ int bpf_clamp(struct bpf_sock_ops *skops) rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SNDCWND_CLAMP, &clamp, sizeof(clamp)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_SNDBUF, &bufsize, - sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; default: rv = -1; From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: [PATCH 2051/2177] bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- arch/Kconfig | 3 -- arch/x86/Kconfig | 1 - arch/x86/include/asm/kprobes.h | 4 --- arch/x86/include/asm/ptrace.h | 5 --- arch/x86/kernel/kprobes/ftrace.c | 14 -------- include/linux/filter.h | 3 +- include/linux/trace_events.h | 1 - include/uapi/linux/bpf.h | 7 +--- kernel/bpf/core.c | 3 -- kernel/bpf/verifier.c | 2 -- kernel/events/core.c | 7 ---- kernel/trace/Kconfig | 11 ------- kernel/trace/bpf_trace.c | 35 -------------------- kernel/trace/trace_kprobe.c | 40 ++++------------------- kernel/trace/trace_probe.h | 6 ---- samples/bpf/Makefile | 4 --- samples/bpf/test_override_return.sh | 15 --------- samples/bpf/tracex7_kern.c | 16 --------- samples/bpf/tracex7_user.c | 28 ---------------- tools/include/uapi/linux/bpf.h | 7 +--- tools/testing/selftests/bpf/bpf_helpers.h | 3 +- 21 files changed, 11 insertions(+), 204 deletions(-) delete mode 100755 samples/bpf/test_override_return.sh delete mode 100644 samples/bpf/tracex7_kern.c delete mode 100644 samples/bpf/tracex7_user.c diff --git a/arch/Kconfig b/arch/Kconfig index 6e8520f09bc1..057370a0ac4e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,9 +196,6 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool -config HAVE_KPROBE_OVERRIDE - bool - config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51458c1a0b4a..2fdb23313dd5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -153,7 +153,6 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE - select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index c6c3b1f4306a..6cf65437b5e5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,10 +67,6 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); -#ifdef CONFIG_KPROBES_ON_FTRACE -extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); -#endif - /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 2370bb0149cc..c0e3c45cf6ab 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,11 +109,6 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->ax = rc; -} - /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 3c455bf490cb..041f7b6dfa0f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_ftrace_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index eaec066f99e8..0cd02ff4ae30 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,8 +459,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - kprobe_override:1; /* Do we override a kprobe? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 17e5e820a84c..84014ecfa67f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,7 +523,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 271daad31f37..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,9 +1326,6 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { - if (fp->kprobe_override) - return false; - if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc464b8ec91e..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,8 +4357,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_override_return) - prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index ac240d31b5bf..42d24bd64ea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,13 +8171,6 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } - /* Kprobe override only works for kprobes, not uprobes. */ - if (prog->kprobe_override && - !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { - bpf_prog_put(prog); - return -EINVAL; - } - if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9dc0deeaad2b..434c840e2d82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -518,17 +518,6 @@ config FUNCTION_PROFILER If in doubt, say N. -config BPF_KPROBE_OVERRIDE - bool "Enable BPF programs to override a kprobed function" - depends on BPF_EVENTS - depends on KPROBES_ON_FTRACE - depends on HAVE_KPROBE_OVERRIDE - depends on DYNAMIC_FTRACE_WITH_REGS - default n - help - Allows BPF to override the execution of a probed function and - set a different return value. This is used for error injection. - config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1865b0d4cdeb..506efe6e8ed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,10 +13,6 @@ #include #include #include -#include -#include - -#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -80,29 +76,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - __this_cpu_write(bpf_kprobe_override, 1); - regs_set_return_value(regs, rc); - arch_ftrace_kprobe_override_function(regs); - return 0; -} -#else -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - return -EINVAL; -} -#endif - -static const struct bpf_func_proto bpf_override_return_proto = { - .func = bpf_override_return, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, -}; - BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -578,10 +551,6 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; - case BPF_FUNC_override_return: - pr_warn_ratelimited("%s[%d] is installing a program with bpf_override_return helper that may cause unexpected behavior!", - current->comm, task_pid_nr(current)); - return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -797,10 +766,6 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; - /* Kprobe override only works for ftrace based kprobes. */ - if (prog->kprobe_override && !trace_kprobe_ftrace(event->tp_event)) - return -EINVAL; - mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8e3c9ec1faf7..abf92e478cfb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,7 +42,6 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -88,12 +87,6 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } -int trace_kprobe_ftrace(struct trace_event_call *call) -{ - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_ftrace(&tk->rp.kp); -} - static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1177,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static int +static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1186,29 +1179,12 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call)) { - int ret; - - ret = trace_call_bpf(call, regs); - - /* - * We need to check and see if we modified the pc of the - * pt_regs, and if so clear the kprobe and return 1 so that we - * don't do the instruction skipping. Also reset our state so - * we are clean the next pass through. - */ - if (__this_cpu_read(bpf_kprobe_override)) { - __this_cpu_write(bpf_kprobe_override, 0); - reset_current_kprobe(); - return 1; - } - if (!ret) - return 0; - } + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) + return; head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return 0; + return; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1217,14 +1193,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return 0; + return; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL, NULL); - return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1300,7 +1275,6 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); - int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1308,9 +1282,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - ret = kprobe_perf_func(tk, regs); + kprobe_perf_func(tk, regs); #endif - return ret; + return 0; /* We don't tweek kernel, so just return 0 */ } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index adbb3f7d1fb5..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -253,7 +253,6 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); -int trace_kprobe_ftrace(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -279,11 +278,6 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } - -static inline int trace_kprobe_ftrace(struct trace_event_call *call) -{ - return 0; -} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87db0f9a4c15..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,7 +15,6 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 -hostprogs-y += tracex7 hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist @@ -62,7 +61,6 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o -tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o @@ -106,7 +104,6 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o -always += tracex7_kern.o always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o @@ -161,7 +158,6 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf -HOSTLOADLIBES_tracex7 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh deleted file mode 100755 index e68b9ee6814b..000000000000 --- a/samples/bpf/test_override_return.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -rm -f testfile.img -dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 -DEVICE=$(losetup --show -f testfile.img) -mkfs.btrfs -f $DEVICE -mkdir tmpmnt -./tracex7 $DEVICE -if [ $? -eq 0 ] -then - echo "SUCCESS!" -else - echo "FAILED!" -fi -losetup -d $DEVICE diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c deleted file mode 100644 index 1ab308a43e0f..000000000000 --- a/samples/bpf/tracex7_kern.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include -#include -#include "bpf_helpers.h" - -SEC("kprobe/open_ctree") -int bpf_prog1(struct pt_regs *ctx) -{ - unsigned long rc = -12; - - bpf_override_return(ctx, rc); - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c deleted file mode 100644 index 8a52ac492e8b..000000000000 --- a/samples/bpf/tracex7_user.c +++ /dev/null @@ -1,28 +0,0 @@ -#define _GNU_SOURCE - -#include -#include -#include -#include "libbpf.h" -#include "bpf_load.h" - -int main(int argc, char **argv) -{ - FILE *f; - char filename[256]; - char command[256]; - int ret; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } - - snprintf(command, 256, "mount %s tmpmnt/", argv[1]); - f = popen(command, "r"); - ret = pclose(f); - - return ret ? 0 : 1; -} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 33cb00e46c49..fd9a17fa8a8b 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,8 +82,7 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; -static int (*bpf_override_return)(void *ctx, unsigned long rc) = - (void *) BPF_FUNC_override_return; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions From 37798d0211315d60d92452eb54b22af199cce11d Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 8 Nov 2017 11:23:56 -0600 Subject: [PATCH 2052/2177] ibmvnic: Add vnic client data to login buffer Update the login buffer to include client data for the vnic driver, this includes the OS name, LPAR name, and device name. This update allows this information to be available in the VIOS. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 68 +++++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 2 + 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d0cff2807d0b..b918bc2f2e4f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -75,6 +75,7 @@ #include #include #include +#include #include "ibmvnic.h" @@ -2813,6 +2814,55 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter) return ibmvnic_send_crq(adapter, &crq); } +struct vnic_login_client_data { + u8 type; + __be16 len; + char name; +} __packed; + +static int vnic_client_data_len(struct ibmvnic_adapter *adapter) +{ + int len; + + /* Calculate the amount of buffer space needed for the + * vnic client data in the login buffer. There are four entries, + * OS name, LPAR name, device name, and a null last entry. + */ + len = 4 * sizeof(struct vnic_login_client_data); + len += 6; /* "Linux" plus NULL */ + len += strlen(utsname()->nodename) + 1; + len += strlen(adapter->netdev->name) + 1; + + return len; +} + +static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + struct vnic_login_client_data *vlcd) +{ + const char *os_name = "Linux"; + int len; + + /* Type 1 - LPAR OS */ + vlcd->type = 1; + len = strlen(os_name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 2 - LPAR name */ + vlcd->type = 2; + len = strlen(utsname()->nodename) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 3 - device name */ + vlcd->type = 3; + len = strlen(adapter->netdev->name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, adapter->netdev->name, len); +} + static void send_login(struct ibmvnic_adapter *adapter) { struct ibmvnic_login_rsp_buffer *login_rsp_buffer; @@ -2825,13 +2875,18 @@ static void send_login(struct ibmvnic_adapter *adapter) size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; + int client_data_len; + struct vnic_login_client_data *vlcd; int i; + client_data_len = vnic_client_data_len(adapter); + buffer_size = sizeof(struct ibmvnic_login_buffer) + - sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues); + sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + + client_data_len; - login_buffer = kmalloc(buffer_size, GFP_ATOMIC); + login_buffer = kzalloc(buffer_size, GFP_ATOMIC); if (!login_buffer) goto buf_alloc_failed; @@ -2898,6 +2953,15 @@ static void send_login(struct ibmvnic_adapter *adapter) } } + /* Insert vNIC login client data */ + vlcd = (struct vnic_login_client_data *) + ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); + login_buffer->client_data_offset = + cpu_to_be32((char *)vlcd - (char *)login_buffer); + login_buffer->client_data_len = cpu_to_be32(client_data_len); + + vnic_add_client_data(adapter, vlcd); + netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4670af80d612..8ed829c5b026 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -57,6 +57,8 @@ struct ibmvnic_login_buffer { __be32 off_rxcomp_subcrqs; __be32 login_rsp_ioba; __be32 login_rsp_len; + __be32 client_data_offset; + __be32 client_data_len; } __packed __aligned(8); struct ibmvnic_login_rsp_buffer { From d9b9c0e027c93160c94dae07b6d29acc5cdd6d54 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 11:24:57 -0600 Subject: [PATCH 2053/2177] net: ethernet: bgmac: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1397972 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index d937083db9a4..894eda5b13cf 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -131,6 +131,7 @@ static void bgmac_nicpm_speed_set(struct net_device *net_dev) switch (bgmac->net_dev->phydev->speed) { default: netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n"); + /* fall through */ case SPEED_1000: val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT; break; From e4ec1384132ead18e972f1180e958aa0b69abd11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 11:57:13 -0600 Subject: [PATCH 2054/2177] fsl/fman_port: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1397960 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 1789b206be58..6552d68ea6e1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1339,8 +1339,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params) switch (port->port_type) { case FMAN_PORT_TYPE_RX: set_rx_dflt_cfg(port, params); + /* fall through */ case FMAN_PORT_TYPE_TX: set_tx_dflt_cfg(port, params, &port->dts_params); + /* fall through */ default: set_dflt_cfg(port, params); } From 713bafea92920103cd3d361657406cf04d0e22dd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 8 Nov 2017 13:01:26 -0800 Subject: [PATCH 2055/2177] tcp: retire FACK loss detection FACK loss detection has been disabled by default and the successor RACK subsumed FACK and can handle reordering better. This patch removes FACK to simplify TCP loss recovery. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Reviewed-by: Priyaranjan Jha Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 3 +- include/linux/tcp.h | 1 - include/net/tcp.h | 14 +------ include/uapi/linux/snmp.h | 1 - net/ipv4/proc.c | 1 - net/ipv4/tcp.c | 2 - net/ipv4/tcp_input.c | 53 ++++---------------------- net/ipv4/tcp_metrics.c | 4 +- net/ipv4/tcp_minisocks.c | 5 +-- net/ipv4/tcp_output.c | 5 +-- 10 files changed, 12 insertions(+), 77 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d8676dda7fa6..46c7e1085efc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -289,8 +289,7 @@ tcp_ecn_fallback - BOOLEAN Default: 1 (fallback enabled) tcp_fack - BOOLEAN - Enable FACK congestion avoidance and fast retransmission. - The value is not used, if tcp_sack is not enabled. + This is a legacy option, it has no effect anymore. tcp_fin_timeout - INTEGER The length of time an orphaned (no longer referenced by any diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 22f40c96a15b..9574936fe041 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -85,7 +85,6 @@ struct tcp_sack_block { /*These are used to set the sack_ok field in struct tcp_options_received */ #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ -#define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ struct tcp_options_received { diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f2c69ad31b2..ed71511e67a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -384,7 +384,6 @@ void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); -void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); @@ -776,7 +775,7 @@ struct tcp_skb_cb { }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ - __u8 sacked; /* State flags for SACK/FACK. */ + __u8 sacked; /* State flags for SACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ @@ -1066,7 +1065,6 @@ void tcp_rate_check_app_limited(struct sock *sk); * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK - * tcp_is_fack - FACK enabled, implies SACK enabled */ static inline int tcp_is_sack(const struct tcp_sock *tp) { @@ -1078,16 +1076,6 @@ static inline bool tcp_is_reno(const struct tcp_sock *tp) return !tcp_is_sack(tp); } -static inline bool tcp_is_fack(const struct tcp_sock *tp) -{ - return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; -} - -static inline void tcp_enable_fack(struct tcp_sock *tp) -{ - tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 0d941cdd8e8c..33a70ece462f 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -191,7 +191,6 @@ enum LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ - LINUX_MIB_TCPFACKREORDER, /* TCPFACKReorder */ LINUX_MIB_TCPSACKREORDER, /* TCPSACKReorder */ LINUX_MIB_TCPRENOREORDER, /* TCPRenoReorder */ LINUX_MIB_TCPTSREORDER, /* TCPTSReorder */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 127153f1ed8a..9f37c4727861 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), - SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER), SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc71a27d5ad9..337555076043 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2509,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: if (opt.opt_val != 0) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9ceaa1fdc3ab..487e181cff86 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -842,18 +842,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -/* - * Packet counting of FACK is based on in-order assumptions, therefore TCP - * disables it when reordering is detected - */ -void tcp_disable_fack(struct tcp_sock *tp) -{ - /* RFC3517 uses different metric in lost marker => reset on change */ - if (tcp_is_fack(tp)) - tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; -} - /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { @@ -881,7 +869,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - tcp_disable_fack(tp); } tp->rack.reord = 1; @@ -891,8 +878,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric, mib_idx = LINUX_MIB_TCPTSREORDER; else if (tcp_is_reno(tp)) mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; else mib_idx = LINUX_MIB_TCPSACKREORDER; @@ -970,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modification, head until snd.fack is lost. * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. @@ -1248,7 +1232,7 @@ static u8 tcp_sacktag_one(struct sock *sk, fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && tp->lost_skb_hint && + if (tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; @@ -2051,10 +2035,6 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp) * counter when SACK is enabled (without SACK, sacked_out is used for * that purpose). * - * Instead, with FACK TCP uses fackets_out that includes both SACKed - * segments up to the highest received SACK block so far and holes in - * between them. - * * With reordering, holes may still be in flight, so RFC3517 recovery * uses pure sacked_out (total number of SACKed segments) even though * it violates the RFC that uses duplicate ACKs, often these are equal @@ -2064,10 +2044,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp) */ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { - return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; + return tp->sacked_out + 1; } -/* Linux NewReno/SACK/FACK/ECN state machine. +/* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * * "Open" Normal state, no dubious events, fast path. @@ -2132,16 +2112,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * dynamically measured and adjusted. This is implemented in * tcp_rack_mark_lost. * - * FACK (Disabled by default. Subsumbed by RACK): - * It is the simplest heuristics. As soon as we decided - * that something is lost, we decide that _all_ not SACKed - * packets until the most forward SACK are lost. I.e. - * lost_out = fackets_out - sacked_out and left_out = fackets_out. - * It is absolutely correct estimate, if network does not reorder - * packets. And it loses any connection to reality when reordering - * takes place. We use FACK by default until reordering - * is suspected on the path to this destination. - * * If the receiver does not support SACK: * * NewReno (RFC6582): in Recovery we assume that one segment @@ -2190,7 +2160,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) } /* Detect loss in event "A" above by marking head of queue up as lost. - * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * For non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it * has at least tp->reordering SACKed seqments above it; "packets" refers to * the maximum SACKed segments to pass before reaching this limit. @@ -2226,12 +2196,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; oldcnt = cnt; - if (tcp_is_fack(tp) || tcp_is_reno(tp) || + if (tcp_is_reno(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + if (tcp_is_sack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; @@ -2262,11 +2232,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (tcp_is_reno(tp)) { tcp_mark_head_lost(sk, 1, 1); - } else if (tcp_is_fack(tp)) { - int lost = tp->fackets_out - tp->reordering; - if (lost <= 0) - lost = 1; - tcp_mark_head_lost(sk, lost, 0); } else { int sacked_upto = tp->sacked_out - tp->reordering; if (sacked_upto >= 0) @@ -3199,8 +3164,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); - delta = tcp_is_fack(tp) ? pkts_acked : - prior_sacked - tp->sacked_out; + delta = prior_sacked - tp->sacked_out; tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } @@ -5708,9 +5672,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(tp); - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9d5ddebfd831..7097f92d16e5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -470,10 +470,8 @@ void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); - if (val && tp->reordering != val) { - tcp_disable_fack(tp); + if (val && tp->reordering != val) tp->reordering = val; - } crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bb86580decd..326c9282bf94 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -509,10 +509,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; - if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(newtp); - } + newtp->rx_opt.sack_ok = ireq->sack_ok; newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9b98d35aa0d8..094c429b4401 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1257,7 +1257,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); @@ -2961,9 +2961,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. */ void tcp_xmit_retransmit_queue(struct sock *sk) { From 737ff314563ca27f044f9a3a041e9d42491ef7ce Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 8 Nov 2017 13:01:27 -0800 Subject: [PATCH 2056/2177] tcp: use sequence distance to detect reordering Replace the reordering distance measurement in packet unit with sequence based approach. Previously it trackes the number of "packets" toward the forward ACK (i.e. highest sacked sequence)in a state variable "fackets_out". Precisely measuring reordering degree on packet distance has not much benefit, as the degree constantly changes by factors like path, load, and congestion window. It is also complicated and prone to arcane bugs. This patch replaces with sequence-based approach that's much simpler. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Reviewed-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - net/ipv4/tcp.c | 1 - net/ipv4/tcp_input.c | 155 +++++++++++++++++---------------------- net/ipv4/tcp_minisocks.c | 1 - net/ipv4/tcp_output.c | 17 ----- 5 files changed, 68 insertions(+), 107 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9574936fe041..df5d97a85e1a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -293,7 +293,6 @@ struct tcp_sock { u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ - u32 fackets_out; /* FACK'd packets */ struct hrtimer pacing_timer; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 337555076043..bf97317e6c97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2977,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; - info->tcpi_fackets = tp->fackets_out; now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 487e181cff86..94d729be42a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -849,39 +849,39 @@ static void tcp_dsack_seen(struct tcp_sock *tp) tp->rack.dsack_seen = 1; } -static void tcp_update_reordering(struct sock *sk, const int metric, - const int ts) +/* It's reordering when higher sequence was delivered (i.e. sacked) before + * some lower never-retransmitted sequence ("low_seq"). The maximum reordering + * distance is approximated in full-mss packet distance ("reordering"). + */ +static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, + const int ts) { struct tcp_sock *tp = tcp_sk(sk); - int mib_idx; + const u32 mss = tp->mss_cache; + u32 fack, metric; - if (WARN_ON_ONCE(metric < 0)) + fack = tcp_highest_sack_seq(tp); + if (!before(low_seq, fack)) return; - if (metric > tp->reordering) { - tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric); - + metric = fack - low_seq; + if ((metric > tp->reordering * mss) && mss) { #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, - tp->fackets_out, + 0, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif + tp->reordering = min_t(u32, (metric + mss - 1) / mss, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); } tp->rack.reord = 1; - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), + ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } /* This must be called before lost_out is incremented */ @@ -1097,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; + u32 reord; /* Timestamps for earliest and latest never-retransmitted segment * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. @@ -1174,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk, u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); - int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; - if (sacked & TCPCB_SACKED_ACKED) - state->reord = min(fack_count, state->reord); + if ((sacked & TCPCB_SACKED_ACKED) && + before(start_seq, state->reord)) + state->reord = start_seq; } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ @@ -1208,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk, * which was in hole. It is reordering. */ if (before(start_seq, - tcp_highest_sack_seq(tp))) - state->reord = min(fack_count, - state->reord); + tcp_highest_sack_seq(tp)) && + before(start_seq, state->reord)) + state->reord = start_seq; + if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; if (state->first_sackt == 0) @@ -1229,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk, tp->sacked_out += pcount; tp->delivered += pcount; /* Out-of-order packets delivered */ - fack_count += pcount; - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; - - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1484,7 +1479,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } out: - state->fack_count += pcount; return prev; noop: @@ -1563,8 +1557,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, tcp_highest_sack_seq(tp))) tcp_advance_highest_sack(sk, skb); } - - state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1575,7 +1567,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, { struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; struct sk_buff *skb; - int unack_bytes; while (*p) { parent = *p; @@ -1588,12 +1579,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, p = &parent->rb_right; continue; } - - state->fack_count = 0; - unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una; - if (state->mss_now && unack_bytes > 0) - state->fack_count = unack_bytes / state->mss_now; - return skb; } return NULL; @@ -1651,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, int first_sack_index; state->flag = 0; - state->reord = tp->packets_out; + state->reord = tp->snd_nxt; - if (!tp->sacked_out) { - if (WARN_ON(tp->fackets_out)) - tp->fackets_out = 0; + if (!tp->sacked_out) tcp_highest_sack_reset(sk); - } found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); @@ -1729,7 +1711,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } state->mss_now = tcp_current_mss(sk); - state->fack_count = 0; skb = NULL; i = 0; @@ -1787,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; cache++; goto walk; } @@ -1802,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; } skb = tcp_sacktag_skip(skb, sk, state, start_seq); @@ -1822,9 +1801,8 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - if ((state->reord < tp->fackets_out) && - ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) - tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker) + tcp_check_sack_reordering(sk, state->reord, 0); tcp_verify_left_out(tp); out: @@ -1862,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp) static void tcp_check_reno_reordering(struct sock *sk, const int addend) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_limit_reno_sacked(tp)) - tcp_update_reordering(sk, tp->packets_out + addend, 0); + + if (!tcp_limit_reno_sacked(tp)) + return; + + tp->reordering = min_t(u32, tp->packets_out + addend, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ @@ -1909,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = -1; - tp->fackets_out = 0; tp->sacked_out = 0; } @@ -1959,7 +1941,6 @@ void tcp_enter_loss(struct sock *sk) if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; - tp->fackets_out = 0; } tcp_clear_all_retrans_hints(tp); @@ -2026,11 +2007,6 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag) return false; } -static inline int tcp_fackets_out(const struct tcp_sock *tp) -{ - return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; -} - /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs * counter when SACK is enabled (without SACK, sacked_out is used for * that purpose). @@ -2701,15 +2677,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, const int acked) +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && tcp_packet_delayed(tp)) { /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. + * packet, rather than with a retransmit. Check reordering. */ - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + tcp_check_sack_reordering(sk, prior_snd_una, 1); /* We are getting evidence that the reordering degree is higher * than we realized. If there are no retransmits out then we @@ -2745,6 +2721,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) } } +static bool tcp_force_fast_retransmit(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return after(tcp_highest_sack_seq(tp), + tp->snd_una + tp->reordering * tp->mss_cache); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2757,19 +2741,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, const int acked, +static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int fast_rexmit = 0, flag = *ack_flag; bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && - (tcp_fackets_out(tp) > tp->reordering)); + tcp_force_fast_retransmit(sk)); if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; - if (!tp->sacked_out && tp->fackets_out) - tp->fackets_out = 0; /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -2816,11 +2798,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked)) + if (tcp_try_undo_partial(sk, prior_snd_una)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || - tcp_fackets_out(tp) > tp->reordering; + tcp_force_fast_retransmit(sk); } if (tcp_try_undo_dsack(sk)) { tcp_try_keep_open(sk); @@ -3030,15 +3012,15 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, int *acked, +static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, + u32 prior_snd_una, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); u64 first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; - u32 reord = tp->packets_out; + u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */ struct sk_buff *skb, *next; bool fully_acked = true; long sack_rtt_us = -1L; @@ -3053,6 +3035,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + const u32 start_seq = scb->seq; u8 sacked = scb->sacked; u32 acked_pcount; @@ -3083,7 +3066,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt = last_ackt; last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; - reord = min(pkts_acked, reord); + if (before(start_seq, reord)) + reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) flag |= FLAG_ORIG_SACK_ACKED; } @@ -3161,15 +3145,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets && reord <= tp->fackets_out) - tcp_update_reordering(sk, tp->fackets_out - reord, 0); + if (before(reord, prior_fack)) + tcp_check_sack_reordering(sk, reord, 0); delta = prior_sacked - tp->sacked_out; tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } - - tp->fackets_out -= min(pkts_acked, tp->fackets_out); - } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent @@ -3210,7 +3191,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } } #endif - *acked = pkts_acked; return flag; } @@ -3519,12 +3499,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_fackets; int prior_packets = tp->packets_out; u32 delivered = tp->delivered; u32 lost = tp->lost; - int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + u32 prior_fack; sack_state.first_sackt = 0; sack_state.rate = &rs; @@ -3556,7 +3535,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) icsk->icsk_retransmits = 0; } - prior_fackets = tp->fackets_out; + prior_fack = tcp_highest_sack_seq(tp); rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet @@ -3612,8 +3591,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state); tcp_rack_update_reo_wnd(sk, &rs); @@ -3625,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) @@ -3641,7 +3620,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3663,7 +3643,8 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); tcp_xmit_recovery(sk, rexmit); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 326c9282bf94..e36eff0403f4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -475,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->packets_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; - newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 094c429b4401..0256f7a41041 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1218,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) } } -/* When a modification to fackets out becomes necessary, we need to check - * skb is counted to fackets_out or not. - */ -static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, - int decr) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (!tp->sacked_out || tcp_is_reno(tp)) - return; - - if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) - tp->fackets_out -= decr; -} - /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ @@ -1253,8 +1238,6 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); - tcp_adjust_fackets_out(sk, skb, decr); - if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) From 765924e362d12f87786060b98a49abd91e11ea96 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 9 Nov 2017 08:29:52 +0900 Subject: [PATCH 2057/2177] l2tp: don't close sessions in l2tp_tunnel_destruct() Sessions are already removed by the proto ->destroy() handlers, and since commit f3c66d4e144a ("l2tp: prevent creation of sessions on terminated tunnels"), we're guaranteed that no new session can be created afterwards. Furthermore, l2tp_tunnel_closeall() can sleep when there are sessions left to close. So we really shouldn't call it in a ->sk_destruct() handler, as it can be used from atomic context. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7c8d1eb757a5..350fcd39ebd8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1246,8 +1246,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) list_del_rcu(&tunnel->list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - l2tp_tunnel_closeall(tunnel); - tunnel->sock = NULL; l2tp_tunnel_dec_refcount(tunnel); From 2d919149686e2da001e58e8a2ed49a0200c9db78 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 21:38:28 -0600 Subject: [PATCH 2058/2177] net: decnet: dn_table: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 115106 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/decnet/dn_table.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 08667f68e601..f0710b5d037d 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -156,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz) default: printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor); + /* fall through */ case 256: new_divisor = 1024; new_hashmask = 0x3FF; From 75d28f461ed700fe527312cc0a8b5de86d1c09c1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 21:44:38 -0600 Subject: [PATCH 2059/2177] net: 8390: pcnet_cs: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114891 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/pcnet_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index eae9827035dc..bcad4a7fac9f 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -1107,6 +1107,7 @@ static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCGMIIPHY: data->phy_id = info->phy_id; + /* fall through */ case SIOCGMIIREG: /* Read MII PHY register. */ data->val_out = mdio_read(mii_addr, data->phy_id, data->reg_num & 0x1f); return 0; From 0aa3b413f68dfe4c883c8164d152a1a03ce789bf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 21:49:33 -0600 Subject: [PATCH 2060/2177] net: 3com: 3c574_cs: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114888 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c574_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 48bc7fa0258c..3044a6f35f04 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -1046,6 +1046,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch(cmd) { case SIOCGMIIPHY: /* Get the address of the PHY in use. */ data->phy_id = phy; + /* fall through */ case SIOCGMIIREG: /* Read the specified MII register. */ { int saved_window; From a3e2ecbae0198e4cb9a63db5ac3131cbd58ec2b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 22:25:08 -0600 Subject: [PATCH 2061/2177] net: wan: x25_asy: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114928 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40ee80c03c94..74c06a5f586f 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -324,6 +324,7 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, if (err != LAPB_OK) netdev_err(dev, "lapb_disconnect_request error: %d\n", err); + /* fall through */ default: kfree_skb(skb); return NETDEV_TX_OK; From 98b07e3ed019cbea5ad049df3892957d5fa90b9e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 9 Nov 2017 07:52:15 +0000 Subject: [PATCH 2062/2177] qlge: remove duplicated assignment to mbcp The assignment to mbcp is identical to the initiatialized value assigned to mbcp at declaration time a few lines earlier, hence we can remove the second redundant assignment. Cleans up clang warning: drivers/net/ethernet/qlogic/qlge/qlge_mpi.c:209:22: warning: Value stored to 'mbcp' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_mpi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c index 384c8bc874f3..4be65d6761b3 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c @@ -213,7 +213,6 @@ static int ql_idc_req_aen(struct ql_adapter *qdev) /* Get the status data and start up a thread to * handle the request. */ - mbcp = &qdev->idc_mbc; mbcp->out_count = 4; status = ql_get_mb_sts(qdev, mbcp); if (status) { From 492d070f2495d4b200124ed44a35ab3d8f74ac93 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 9 Nov 2017 08:01:22 +0000 Subject: [PATCH 2063/2177] net: sfc: remove redundant variable start Variable start is assigned but never read hence it is redundant and can be removed. Cleans up clang warning: drivers/net/ethernet/sfc/ptp.c:655:2: warning: Value stored to 'start' is never read Signed-off-by: Colin Ian King Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ptp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..4f54245df0ec 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,11 +648,9 @@ static void efx_ptp_send_times(struct efx_nic *efx, struct pps_event_time now; struct timespec64 limit; struct efx_ptp_data *ptp = efx->ptp_data; - struct timespec64 start; int *mc_running = ptp->start.addr; pps_get_ts(&now); - start = now.ts_real; limit = now.ts_real; timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS); From 5290ada4a2e6a9dec00e849a49af8f7bf7462449 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 9 Nov 2017 00:03:15 -0800 Subject: [PATCH 2064/2177] sock: Remove the global prot_inuse counter. The per-cpu counter for init_net is prepared in core_initcall. The patch 7d720c3e ("percpu: add __percpu sparse annotations to net") and d6d9ca0fe ("net: this_cpu_xxx conversions") optimize the routines. Then remove the old counter. Cc: Pavel Emelyanov Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- net/core/sock.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index c59bcf90d905..57bbd6040eb6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3044,7 +3044,6 @@ struct prot_inuse { static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); -#ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); @@ -3088,27 +3087,6 @@ static __init int net_inuse_init(void) } core_initcall(net_inuse_init); -#else -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); - -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) -{ - __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_add); - -int sock_prot_inuse_get(struct net *net, struct proto *prot) -{ - int cpu, idx = prot->inuse_idx; - int res = 0; - - for_each_possible_cpu(cpu) - res += per_cpu(prot_inuse, cpu).val[idx]; - - return res >= 0 ? res : 0; -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_get); -#endif static void assign_proto_idx(struct proto *prot) { From 86dabda426ac33f4782854ef71f0c9adafd24134 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 9 Nov 2017 13:46:28 +0300 Subject: [PATCH 2065/2177] net: thunderbolt: Clear finished Tx frame bus address in tbnet_tx_callback() When Thunderbolt network interface is disabled or when the cable is unplugged the driver releases all allocated buffers by calling tbnet_free_buffers() for each ring. This function then calls dma_unmap_page() for each buffer it finds where bus address is non-zero. Now, we only clear this bus address when the Tx buffer is sent to the hardware so it is possible that the function finds an entry that has already been unmapped. Enabling DMA-API debugging catches this as well: thunderbolt 0000:06:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x0000000068321000] [size=4096 bytes] Fix this by clearing the bus address of a Tx frame right after we have unmapped the buffer. Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 435854688a7a..228d4aa6d9ae 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -536,6 +536,7 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), DMA_TO_DEVICE); + tf->frame.buffer_phy = 0; /* Return buffer to the ring */ net->tx_ring.prod++; From 3d67a5075295982fb055be6a5d5c78b0e0be3591 Mon Sep 17 00:00:00 2001 From: Aleksey Makarov Date: Thu, 9 Nov 2017 14:58:57 +0300 Subject: [PATCH 2066/2177] net: thunderx: fix double free error This patch fixes an error in memory allocation/freeing in ThunderX PF driver. I moved the allocation to the probe() function and made it managed. >From the Colin's email: While running static analysis on linux-next with CoverityScan I found 3 double free errors in the Cavium thunder driver. The issue occurs on the err_disable_device: label of function nic_probe when nic_free_lmacmem(nic) is called and a double free occurs on nic->duplex, nic->link and nic->speed. This occurs when nic_init_hw() fails: /* Initialize hardware */ err = nic_init_hw(nic); if (err) goto err_release_regions; nic_init_hw() calls nic_get_hw_info() and this calls nic_free_lmacmem() if any of the allocations fail. This free'ing occurs again by the call to nic_free_lmacmem() on the err_release_regions exit path in nic_probe(). Reported-by: Colin Ian King Signed-off-by: Aleksey Makarov Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 82 +++++++------------ 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 988c06a28e5e..8f1dd55b3e08 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -361,17 +361,8 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) } } -static void nic_free_lmacmem(struct nicpf *nic) +static void nic_get_hw_info(struct nicpf *nic) { - kfree(nic->vf_lmac_map); - kfree(nic->link); - kfree(nic->duplex); - kfree(nic->speed); -} - -static int nic_get_hw_info(struct nicpf *nic) -{ - u8 max_lmac; u16 sdevid; struct hw_info *hw = nic->hw; @@ -419,41 +410,16 @@ static int nic_get_hw_info(struct nicpf *nic) break; } hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); - - /* Allocate memory for LMAC tracking elements */ - max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX; - nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->vf_lmac_map) - goto error; - nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->link) - goto error; - nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->duplex) - goto error; - nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL); - if (!nic->speed) - goto error; - return 0; - -error: - nic_free_lmacmem(nic); - return -ENOMEM; } #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 -static int nic_init_hw(struct nicpf *nic) +static void nic_init_hw(struct nicpf *nic) { - int i, err; + int i; u64 cqm_cfg; - /* Get HW capability info */ - err = nic_get_hw_info(nic); - if (err) - return err; - /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); @@ -498,8 +464,6 @@ static int nic_init_hw(struct nicpf *nic) cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG); if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL) nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL); - - return 0; } /* Channel parse index configuration */ @@ -1269,6 +1233,7 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; struct nicpf *nic; + u8 max_lmac; int err; BUILD_BUG_ON(sizeof(union nic_mbx) > 16); @@ -1278,10 +1243,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL); - if (!nic->hw) { - devm_kfree(dev, nic); + if (!nic->hw) return -ENOMEM; - } pci_set_drvdata(pdev, nic); @@ -1322,11 +1285,33 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); - /* Initialize hardware */ - err = nic_init_hw(nic); - if (err) + /* Get HW capability info */ + nic_get_hw_info(nic); + + /* Allocate memory for LMAC tracking elements */ + err = -ENOMEM; + max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX; + + nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8), + GFP_KERNEL); + if (!nic->vf_lmac_map) goto err_release_regions; + nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->link) + goto err_release_regions; + + nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->duplex) + goto err_release_regions; + + nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL); + if (!nic->speed) + goto err_release_regions; + + /* Initialize hardware */ + nic_init_hw(nic); + nic_set_lmac_vf_mapping(nic); /* Register interrupts */ @@ -1360,9 +1345,6 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: - nic_free_lmacmem(nic); - devm_kfree(dev, nic->hw); - devm_kfree(dev, nic); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; @@ -1384,10 +1366,6 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); - nic_free_lmacmem(nic); - devm_kfree(&pdev->dev, nic->hw); - devm_kfree(&pdev->dev, nic); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } From ca29fd7cce5a6444d57fb86517589a1a31c759e1 Mon Sep 17 00:00:00 2001 From: Keefe Liu Date: Thu, 9 Nov 2017 20:09:31 +0800 Subject: [PATCH 2067/2177] ipvlan: fix ipv6 outbound device When process the outbound packet of ipv6, we should assign the master device to output device other than input device. Signed-off-by: Keefe Liu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 034ae4c57196..f2a7e929316e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -409,7 +409,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct dst_entry *dst; int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { - .flowi6_iif = dev->ifindex, + .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, From 3ec26c7944a405e9dbd21b31ff46a3b4e6095adb Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 9 Nov 2017 18:09:26 +0100 Subject: [PATCH 2068/2177] bindings: net: stmmac: correctify note about LPI interrupt There are two different combined signal for various interrupt events: In EQOS-CORE and EQOS-MTL configurations, mci_intr_o is the interrupt signal. In EQOS-DMA, EQOS-AHB and EQOS-AXI configurations, these interrupt events are combined with the events in the DMA on the sbd_intr_o signal. Depending on configuration, the device tree irq "macirq" will refer to either mci_intr_o or sbd_intr_o. The databook states: "The MAC generates the LPI interrupt when the Tx or Rx side enters or exits the LPI state. The interrupt mci_intr_o (sbd_intr_o in certain configurations) is asserted when the LPI interrupt status is set. When the MAC exits the Rx LPI state, then in addition to the mci_intr_o (sbd_intr_o in certain configurations), the sideband signal lpi_intr_o is asserted. If you do not want to gate-off the application clock during the Rx LPI state, you can leave the lpi_intr_o signal unconnected and use the mci_intr_o (sbd_intr_o in certain configurations) signal to detect Rx LPI exit." Since the "macirq" is always raised when Tx or Rx enters/exits the LPI state, "eth_lpi" must therefore refer to lpi_intr_o, which is only raised when Rx exits the LPI state. Update the DT binding description to reflect reality. Signed-off-by: Niklas Cassel Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/stmmac.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index c3a7be6615c5..3a28a5d8857d 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -12,7 +12,7 @@ Required properties: Valid interrupt names are: - "macirq" (combined signal for various interrupt events) - "eth_wake_irq" (the interrupt to manage the remote wake-up packet detection) - - "eth_lpi" (the interrupt that occurs when Tx or Rx enters/exits LPI state) + - "eth_lpi" (the interrupt that occurs when Rx exits the LPI state) - phy-mode: See ethernet.txt file in the same directory. - snps,reset-gpio gpio number for phy reset. - snps,reset-active-low boolean flag to indicate if phy reset is active low. From a42c8e33f2044d6b56f167b5506c7e09e5b702c2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:51 +0100 Subject: [PATCH 2069/2177] net: dsa: Fix SWITCHDEV_ATTR_ID_PORT_PARENT_ID SWITCHDEV_ATTR_ID_PORT_PARENT_ID is used by the software bridge when determining which ports to flood a packet out. If the packet originated from a switch, it assumes the switch has already flooded the packet out the switches ports, so the bridge should not flood the packet itself out switch ports. Ports on the same switch are expected to return the same parent ID when SWITCHDEV_ATTR_ID_PORT_PARENT_ID is called. DSA gets this wrong with clusters of switches. As far as the software bridge is concerned, the cluster is all one switch. A packet from any switch in the cluster can be assumed to have been flooded as needed out of all ports of the cluster, not just the switch it originated from. Hence all ports of a cluster should return the same parent. The old implementation did not, each switch in the cluster had its own ID. Also wrong was that the ID was not unique if multiple DSA instances are in operation. Use the tree ID as the parent ID, which is the same for all switches in a cluster and unique across switch clusters. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 35715fda84b2..d6e7a642493b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -355,11 +355,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev, { struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(ds->index); - memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); + attr->u.ppid.id_len = sizeof(dst->index); + memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = 0; From 13edbdb6ed8b829ab3198a8c7b978c37184317b5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:52 +0100 Subject: [PATCH 2070/2177] net: dsa: {e}dsa: set offload_fwd_mark on received packets The software bridge needs to know if a packet has already been bridged by hardware offload to ports in the same hardware offload, in order that it does not re-flood them, causing duplicates. This is particularly true for broadcast and multicast traffic which the host has requested. By setting offload_fwd_mark in the skb the bridge will only flood to ports in other offloads and other netifs. Set this flag in the DSA and EDSA tag driver. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_dsa.c | 2 ++ net/dsa/tag_edsa.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index dbbcdafed8c3..cd13cfc542ce 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -141,6 +141,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } + skb->offload_fwd_mark = 1; + return skb; } diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index f38a626b3a05..4083326b806e 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -160,6 +160,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } + skb->offload_fwd_mark = 1; + return skb; } From cd88646994bc0b537e0e7c89c53e674b58ba22b0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:53 +0100 Subject: [PATCH 2071/2177] net: dsa: mv88e6xxx: Fixed port netdev check for VLANs Having the same VLAN on multiple bridges is currently unsupported as an offload. mv88e6xxx_port_check_hw_vlan() is used to ensure that a VLAN is not on multiple bridges when adding a VLAN range to a port. It loops the ports and checks to see if there are ports in a different bridge with the same VLAN. While walking all switch ports, the code was checking if the new port has a netdev slave attached to it. If not, skip checking the port being walked. This seems like a typ0. If the new port does not have a slave, how has a VLAN been added to it in the first place, requiring this check be performed at all? More likely, we should be checking if the port being walked has a slave. Without the port having a slave, it cannot have a VLAN on it, so there is no need to check further for that particular port. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 09a66d4d9492..420621c759e4 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1137,7 +1137,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; - if (!ds->ports[port].slave) + if (!ds->ports[i].slave) continue; if (vlan.member[i] == From 743fcc283edd5d3a9d61abf96d4443329c826d28 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:54 +0100 Subject: [PATCH 2072/2177] net: dsa: mv88e6xxx: Print offending port when vlan check fails When testing if a VLAN is one more than one bridge, we print an error message that the VLAN is already in use somewhere else. Print both the new port which would like the VLAN, and the port which already has it, to aid debugging. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 420621c759e4..0a4a740ebea6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1151,8 +1151,8 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (!dsa_to_port(ds, i)->bridge_dev) continue; - dev_err(ds->dev, "p%d: hw VLAN %d already used by %s\n", - port, vlan.vid, + dev_err(ds->dev, "p%d: hw VLAN %d already used by port %d in %s\n", + port, vlan.vid, i, netdev_name(dsa_to_port(ds, i)->bridge_dev)); err = -EOPNOTSUPP; goto unlock; From a4c93ae1bb675c875c5bee2fa817680cd6588240 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:55 +0100 Subject: [PATCH 2073/2177] net: dsa: mv88e6xxx: Move mv88e6xxx_port_db_load_purge() This function is going to be needed by a soon to be added new function. Move it earlier so we can avoid a forward declaration. No functional changes. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 88 ++++++++++++++++---------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0a4a740ebea6..263b714a5ae3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1208,6 +1208,50 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return 0; } +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_vtu_entry vlan; + struct mv88e6xxx_atu_entry entry; + int err; + + /* Null VLAN ID corresponds to the port private database */ + if (vid == 0) + err = mv88e6xxx_port_get_fid(chip, port, &vlan.fid); + else + err = mv88e6xxx_vtu_get(chip, vid, &vlan, false); + if (err) + return err; + + entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED; + ether_addr_copy(entry.mac, addr); + eth_addr_dec(entry.mac); + + err = mv88e6xxx_g1_atu_getnext(chip, vlan.fid, &entry); + if (err) + return err; + + /* Initialize a fresh ATU entry if it isn't found */ + if (entry.state == MV88E6XXX_G1_ATU_DATA_STATE_UNUSED || + !ether_addr_equal(entry.mac, addr)) { + memset(&entry, 0, sizeof(entry)); + ether_addr_copy(entry.mac, addr); + } + + /* Purge the ATU entry only if no port is using it anymore */ + if (state == MV88E6XXX_G1_ATU_DATA_STATE_UNUSED) { + entry.portvec &= ~BIT(port); + if (!entry.portvec) + entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED; + } else { + entry.portvec |= BIT(port); + entry.state = state; + } + + return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry); +} + static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, u16 vid, u8 member) { @@ -1324,50 +1368,6 @@ unlock: return err; } -static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) -{ - struct mv88e6xxx_vtu_entry vlan; - struct mv88e6xxx_atu_entry entry; - int err; - - /* Null VLAN ID corresponds to the port private database */ - if (vid == 0) - err = mv88e6xxx_port_get_fid(chip, port, &vlan.fid); - else - err = mv88e6xxx_vtu_get(chip, vid, &vlan, false); - if (err) - return err; - - entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED; - ether_addr_copy(entry.mac, addr); - eth_addr_dec(entry.mac); - - err = mv88e6xxx_g1_atu_getnext(chip, vlan.fid, &entry); - if (err) - return err; - - /* Initialize a fresh ATU entry if it isn't found */ - if (entry.state == MV88E6XXX_G1_ATU_DATA_STATE_UNUSED || - !ether_addr_equal(entry.mac, addr)) { - memset(&entry, 0, sizeof(entry)); - ether_addr_copy(entry.mac, addr); - } - - /* Purge the ATU entry only if no port is using it anymore */ - if (state == MV88E6XXX_G1_ATU_DATA_STATE_UNUSED) { - entry.portvec &= ~BIT(port); - if (!entry.portvec) - entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED; - } else { - entry.portvec |= BIT(port); - entry.state = state; - } - - return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry); -} - static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { From 87fa886e1fb7d08a35be2f39d15225d249daeea2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Nov 2017 22:29:56 +0100 Subject: [PATCH 2074/2177] net: dsa: mv88e6xxx: Flood broadcast frames in hardware By default, the switch does not flood broadcast frames. Instead the broadcast address is unknown in the ATU, so the frame gets forwarded out the cpu port. The software bridge then floods it back to the individual switch ports which are members of the bridge. Add an ATU entry in the switch so that it floods broadcast frames out ports, rather than have the software bridge do it. Also, send a copy out the cpu port and any dsa ports. Rely on the port vectors to prevent broadcast frames leaking between bridges, and separated ports. Additionally, when a VLAN is added, a new FID is allocated. This represents a new table of ATU entries. A broadcast entry is added to the new FID. With offload_fwd_mark being set, the software bridge will not flood the frames it receives back to the switch. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 263b714a5ae3..6dd5fdfeafcf 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1252,6 +1252,29 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry); } +static int mv88e6xxx_port_add_broadcast(struct mv88e6xxx_chip *chip, int port, + u16 vid) +{ + const char broadcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + u8 state = MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC; + + return mv88e6xxx_port_db_load_purge(chip, port, broadcast, vid, state); +} + +static int mv88e6xxx_broadcast_setup(struct mv88e6xxx_chip *chip, u16 vid) +{ + int port; + int err; + + for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { + err = mv88e6xxx_port_add_broadcast(chip, port, vid); + if (err) + return err; + } + + return 0; +} + static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, u16 vid, u8 member) { @@ -1264,7 +1287,11 @@ static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, vlan.member[port] = member; - return mv88e6xxx_vtu_loadpurge(chip, &vlan); + err = mv88e6xxx_vtu_loadpurge(chip, &vlan); + if (err) + return err; + + return mv88e6xxx_broadcast_setup(chip, vid); } static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, @@ -2049,6 +2076,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + err = mv88e6xxx_broadcast_setup(chip, 0); + if (err) + goto unlock; + err = mv88e6xxx_pot_setup(chip); if (err) goto unlock; From 40cff8fca9e3a7f005a44d011141ec83501013be Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 10 Nov 2017 00:36:41 +0100 Subject: [PATCH 2075/2177] net: dsa: mv88e6xxx: Fix stats histogram mode The statistics histogram mode was not being explicitly initialized on devices other than the 6390 family. Clearing the statistics then overwrote the default setting, setting the histogram to a reserved mode. Explicitly set the histogram mode for all devices. Change the statistics clear into a read/modify/write, and since it is now more complex, move it into global1.c. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 34 +++++++++++++++++----------- drivers/net/dsa/mv88e6xxx/global1.c | 35 +++++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/global1.h | 2 ++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6dd5fdfeafcf..d6c3a22c8789 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2004,19 +2004,7 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) if (err) return err; - /* Clear the statistics counters for all ports */ - err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, - MV88E6XXX_G1_STATS_OP_BUSY | - MV88E6XXX_G1_STATS_OP_FLUSH_ALL); - if (err) - return err; - - /* Wait for the flush to complete. */ - err = mv88e6xxx_g1_stats_wait(chip); - if (err) - return err; - - return 0; + return mv88e6xxx_g1_stats_clear(chip); } static int mv88e6xxx_setup(struct dsa_switch *ds) @@ -2299,6 +2287,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2326,6 +2315,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .port_set_egress_floods = mv88e6185_port_set_egress_floods, .port_set_upstream_port = mv88e6095_port_set_upstream_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2356,6 +2346,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2383,6 +2374,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2413,6 +2405,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2449,6 +2442,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6390_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6390_stats_get_stats, @@ -2481,6 +2475,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2506,6 +2501,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2539,6 +2535,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2574,6 +2571,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2608,6 +2606,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2643,6 +2642,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2670,6 +2670,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting, .port_set_upstream_port = mv88e6095_port_set_upstream_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2811,6 +2812,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -2882,6 +2884,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6320_stats_get_stats, @@ -2915,6 +2918,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6320_stats_get_stats, @@ -2947,6 +2951,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6390_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6390_stats_get_stats, @@ -2980,6 +2985,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -3013,6 +3019,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, @@ -3048,6 +3055,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index d76d7c7ea819..b43bd6476632 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -374,6 +374,22 @@ int mv88e6xxx_g1_stats_wait(struct mv88e6xxx_chip *chip) MV88E6XXX_G1_STATS_OP_BUSY); } +int mv88e6095_g1_stats_set_histogram(struct mv88e6xxx_chip *chip) +{ + u16 val; + int err; + + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STATS_OP, &val); + if (err) + return err; + + val |= MV88E6XXX_G1_STATS_OP_HIST_RX_TX; + + err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, val); + + return err; +} + int mv88e6xxx_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port) { int err; @@ -444,3 +460,22 @@ void mv88e6xxx_g1_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val) *val = value | reg; } + +int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip) +{ + int err; + u16 val; + + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STATS_OP, &val); + if (err) + return err; + + val |= MV88E6XXX_G1_STATS_OP_BUSY | MV88E6XXX_G1_STATS_OP_FLUSH_ALL; + + err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, val); + if (err) + return err; + + /* Wait for the flush to complete. */ + return mv88e6xxx_g1_stats_wait(chip); +} diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 950b914f9251..b0dc7518b47f 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -235,8 +235,10 @@ int mv88e6xxx_g1_stats_wait(struct mv88e6xxx_chip *chip); int mv88e6xxx_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port); int mv88e6320_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port); int mv88e6390_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port); +int mv88e6095_g1_stats_set_histogram(struct mv88e6xxx_chip *chip); int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip); void mv88e6xxx_g1_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val); +int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip); int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port); int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port); int mv88e6095_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port); From 940c9c458866725e0ade96d5c1c7dbe5fcf1cf85 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Fri, 10 Nov 2017 13:03:37 +0530 Subject: [PATCH 2076/2177] cxgb4: collect vpd info directly from hardware Collect vpd information directly from hardware instead of software adapter context. Move EEPROM physical address to virtual address translation logic to t4_hw.c and update relevant files. Fixes: 6f92a6544f1a ("cxgb4: collect hardware misc dumps") Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 6 ++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 77 ++++++++++++++++--- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 33 +------- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 29 +++++++ 5 files changed, 104 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 239c43084e77..1de1d811fde3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -166,6 +166,12 @@ struct cudbg_mps_tcam { u8 reserved[2]; }; +#define CUDBG_VPD_PF_SIZE 0x800 +#define CUDBG_SCFG_VER_ADDR 0x06 +#define CUDBG_SCFG_VER_LEN 4 +#define CUDBG_VPD_VER_ADDR 0x18c7 +#define CUDBG_VPD_VER_LEN 2 + struct cudbg_vpd_data { u8 sn[SERNUM_LEN + 1]; u8 bn[PN_LEN + 1]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index fe3a9ef0ec3f..32c9858da110 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -68,6 +68,22 @@ struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i) (sizeof(struct cudbg_entity_hdr) * (i - 1))); } +static int cudbg_read_vpd_reg(struct adapter *padap, u32 addr, u32 len, + void *dest) +{ + int vaddr, rc; + + vaddr = t4_eeprom_ptov(addr, padap->pf, EEPROMPFSIZE); + if (vaddr < 0) + return vaddr; + + rc = pci_read_vpd(padap->pdev, vaddr, len, dest); + if (rc < 0) + return rc; + + return 0; +} + int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) @@ -1289,8 +1305,47 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, { struct adapter *padap = pdbg_init->adap; struct cudbg_buffer temp_buff = { 0 }; + char vpd_str[CUDBG_VPD_VER_LEN + 1]; + u32 scfg_vers, vpd_vers, fw_vers; struct cudbg_vpd_data *vpd_data; - int rc; + struct vpd_params vpd = { 0 }; + int rc, ret; + + rc = t4_get_raw_vpd_params(padap, &vpd); + if (rc) + return rc; + + rc = t4_get_fw_version(padap, &fw_vers); + if (rc) + return rc; + + /* Serial Configuration Version is located beyond the PF's vpd size. + * Temporarily give access to entire EEPROM to get it. + */ + rc = pci_set_vpd_size(padap->pdev, EEPROMVSIZE); + if (rc < 0) + return rc; + + ret = cudbg_read_vpd_reg(padap, CUDBG_SCFG_VER_ADDR, CUDBG_SCFG_VER_LEN, + &scfg_vers); + + /* Restore back to original PF's vpd size */ + rc = pci_set_vpd_size(padap->pdev, CUDBG_VPD_PF_SIZE); + if (rc < 0) + return rc; + + if (ret) + return ret; + + rc = cudbg_read_vpd_reg(padap, CUDBG_VPD_VER_ADDR, CUDBG_VPD_VER_LEN, + vpd_str); + if (rc) + return rc; + + vpd_str[CUDBG_VPD_VER_LEN] = '\0'; + rc = kstrtouint(vpd_str, 0, &vpd_vers); + if (rc) + return rc; rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_vpd_data), &temp_buff); @@ -1298,16 +1353,16 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, return rc; vpd_data = (struct cudbg_vpd_data *)temp_buff.data; - memcpy(vpd_data->sn, padap->params.vpd.sn, SERNUM_LEN + 1); - memcpy(vpd_data->bn, padap->params.vpd.pn, PN_LEN + 1); - memcpy(vpd_data->na, padap->params.vpd.na, MACADDR_LEN + 1); - memcpy(vpd_data->mn, padap->params.vpd.id, ID_LEN + 1); - vpd_data->scfg_vers = padap->params.scfg_vers; - vpd_data->vpd_vers = padap->params.vpd_vers; - vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(padap->params.fw_vers); - vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(padap->params.fw_vers); - vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(padap->params.fw_vers); - vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(padap->params.fw_vers); + memcpy(vpd_data->sn, vpd.sn, SERNUM_LEN + 1); + memcpy(vpd_data->bn, vpd.pn, PN_LEN + 1); + memcpy(vpd_data->na, vpd.na, MACADDR_LEN + 1); + memcpy(vpd_data->mn, vpd.id, ID_LEN + 1); + vpd_data->scfg_vers = scfg_vers; + vpd_data->vpd_vers = vpd_vers; + vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(fw_vers); + vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers); + vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(fw_vers); + vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(fw_vers); cudbg_write_and_release_buff(&temp_buff, dbg_buff); return rc; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0c83ceb5a1a6..0de1a4b2223e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1459,6 +1459,7 @@ static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, unsigned int t4_get_regs_len(struct adapter *adapter); void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size); +int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz); int t4_seeprom_wp(struct adapter *adapter, bool enable); int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_get_vpd_params(struct adapter *adapter, struct vpd_params *p); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 1b7f6b9ccc8b..eb338212f5af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -1064,40 +1064,11 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) return 0; } -/** - * eeprom_ptov - translate a physical EEPROM address to virtual - * @phys_addr: the physical EEPROM address - * @fn: the PCI function number - * @sz: size of function-specific area - * - * Translate a physical EEPROM address to virtual. The first 1K is - * accessed through virtual addresses starting at 31K, the rest is - * accessed through virtual addresses starting at 0. - * - * The mapping is as follows: - * [0..1K) -> [31K..32K) - * [1K..1K+A) -> [31K-A..31K) - * [1K+A..ES) -> [0..ES-A-1K) - * - * where A = @fn * @sz, and ES = EEPROM size. - */ -static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz) -{ - fn *= sz; - if (phys_addr < 1024) - return phys_addr + (31 << 10); - if (phys_addr < 1024 + fn) - return 31744 - fn + phys_addr - 1024; - if (phys_addr < EEPROMSIZE) - return phys_addr - 1024 - fn; - return -EINVAL; -} - /* The next two routines implement eeprom read/write from physical addresses. */ static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v) { - int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); + int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); if (vaddr >= 0) vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v); @@ -1106,7 +1077,7 @@ static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v) static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v) { - int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); + int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); if (vaddr >= 0) vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index efe9d3a20135..b4fad081ac78 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2638,6 +2638,35 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) #define VPD_LEN 1024 #define CHELSIO_VPD_UNIQUE_ID 0x82 +/** + * t4_eeprom_ptov - translate a physical EEPROM address to virtual + * @phys_addr: the physical EEPROM address + * @fn: the PCI function number + * @sz: size of function-specific area + * + * Translate a physical EEPROM address to virtual. The first 1K is + * accessed through virtual addresses starting at 31K, the rest is + * accessed through virtual addresses starting at 0. + * + * The mapping is as follows: + * [0..1K) -> [31K..32K) + * [1K..1K+A) -> [31K-A..31K) + * [1K+A..ES) -> [0..ES-A-1K) + * + * where A = @fn * @sz, and ES = EEPROM size. + */ +int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz) +{ + fn *= sz; + if (phys_addr < 1024) + return phys_addr + (31 << 10); + if (phys_addr < 1024 + fn) + return 31744 - fn + phys_addr - 1024; + if (phys_addr < EEPROMSIZE) + return phys_addr - 1024 - fn; + return -EINVAL; +} + /** * t4_seeprom_wp - enable/disable EEPROM write protection * @adapter: the adapter From 2aee43078afc24fe8073829159292664c1eed424 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Fri, 10 Nov 2017 12:54:34 +0100 Subject: [PATCH 2077/2177] net: dsa: lan9303: Set up trapping of IGMP to CPU port IGMP packets should be trapped to the CPU port. The SW bridge knows whether to forward to other ports. With "IGMP snooping for local traffic" merged, IGMP trapping is also required for stable IGMPv2 operation. LAN9303 does not trap IGMP packets by default. Enable IGMP trapping in lan9303_setup. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 320651a57c6f..6d7dee67d822 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -153,6 +153,8 @@ # define LAN9303_SWE_VLAN_UNTAG_PORT0 BIT(12) #define LAN9303_SWE_VLAN_CMD_STS 0x1810 #define LAN9303_SWE_GLB_INGRESS_CFG 0x1840 +# define LAN9303_SWE_GLB_INGR_IGMP_TRAP BIT(7) +# define LAN9303_SWE_GLB_INGR_IGMP_PORT(p) BIT(10 + p) #define LAN9303_SWE_PORT_STATE 0x1843 # define LAN9303_SWE_PORT_STATE_FORWARDING_PORT2 (0) # define LAN9303_SWE_PORT_STATE_LEARNING_PORT2 BIT(5) @@ -450,6 +452,21 @@ on_error: return ret; } +static int lan9303_write_switch_reg_mask(struct lan9303 *chip, u16 regnum, + u32 val, u32 mask) +{ + int ret; + u32 reg; + + ret = lan9303_read_switch_reg(chip, regnum, ®); + if (ret) + return ret; + + reg = (reg & ~mask) | val; + + return lan9303_write_switch_reg(chip, regnum, reg); +} + static int lan9303_write_switch_port(struct lan9303 *chip, int port, u16 regnum, u32 val) { @@ -905,6 +922,15 @@ static int lan9303_setup(struct dsa_switch *ds) if (ret) dev_err(chip->dev, "failed to re-enable switching %d\n", ret); + /* Trap IGMP to port 0 */ + ret = lan9303_write_switch_reg_mask(chip, LAN9303_SWE_GLB_INGRESS_CFG, + LAN9303_SWE_GLB_INGR_IGMP_TRAP | + LAN9303_SWE_GLB_INGR_IGMP_PORT(0), + LAN9303_SWE_GLB_INGR_IGMP_PORT(1) | + LAN9303_SWE_GLB_INGR_IGMP_PORT(2)); + if (ret) + dev_err(chip->dev, "failed to setup IGMP trap %d\n", ret); + return 0; } From 4672cd36053e4a8480f7ca796758d56ef23ccb78 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Fri, 10 Nov 2017 12:54:35 +0100 Subject: [PATCH 2078/2177] net: dsa: lan9303: Clear offload_fwd_mark for IGMP Now that IGMP packets no longer is flooded in HW, we want the SW bridge to forward packets based on bridge configuration. To make that happen, IGMP packets must have skb->offload_fwd_mark = 0. Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 5ba01fc3c6ba..b8c5e52b2eff 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -92,6 +92,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, { u16 *lan9303_tag; unsigned int source_port; + u16 ether_type_nw; + u8 ip_protocol; if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { dev_warn_ratelimited(&dev->dev, @@ -129,6 +131,17 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN, eth_stp_addr); + /* We also need IGMP packets to have skb->offload_fwd_mark = 0. + * Solving this for all conceivable situations would add more cost to + * every packet. Instead we handle just the common case: + * No VLAN tag + Ethernet II framing. + * Test least probable term first. + */ + ether_type_nw = lan9303_tag[2]; + ip_protocol = *(skb->data + 9); + if (ip_protocol == IPPROTO_IGMP && ether_type_nw == htons(ETH_P_IP)) + skb->offload_fwd_mark = 0; + return skb; } From 4d215ae730786f51b108873227c67dc7815dbdef Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 11:33:25 -0800 Subject: [PATCH 2079/2177] net: bgmac: Pad packets to a minimum size In preparation for enabling Broadcom tags with b53, pad packets to a minimum size of 64 bytes (sans FCS) in order for the Broadcom switch to accept ingressing frames. Without this, we would typically be able to DHCP, but not resolve with ARP because packets are too small and get rejected by the switch. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 48d672b204a4..1d96cd594ade 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bgmac.h" static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask, @@ -127,6 +128,8 @@ bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring, dma_desc->ctl1 = cpu_to_le32(ctl1); } +#define ENET_BRCM_TAG_LEN 4 + static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, struct bgmac_dma_ring *ring, struct sk_buff *skb) @@ -139,6 +142,18 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, u32 flags; int i; + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 68 bytes + * (including FCS and tag) because the length verification is done after + * the Broadcom tag is stripped off the ingress packet. + */ + if (netdev_uses_dsa(net_dev)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) + goto err_stats; + } + if (skb->len > BGMAC_DESC_CTL1_LEN) { netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len); goto err_drop; @@ -225,6 +240,7 @@ err_dma_head: err_drop: dev_kfree_skb(skb); +err_stats: net_dev->stats.tx_dropped++; net_dev->stats.tx_errors++; return NETDEV_TX_OK; From c499696e7901bda18385ac723b7bd27c3a4af624 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 11:33:26 -0800 Subject: [PATCH 2080/2177] net: dsa: b53: Stop using dev->cpu_port incorrectly dev->cpu_port is the driver local information that should only be used to look up register offsets for a particular port, when they differ (e.g: IMP port override), but it should certainly not be used in place of the DSA configured CPU port. Since the DSA switch layer calls port_vlan_{add,del}() on the CPU port as well, we can remove the specific setting of the CPU port within port_vlan_{add,del}. Fixes: ff39c2d68679 ("net: dsa: b53: Add bridge support") Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a7ca62ba27b7..17f12484ce24 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -505,7 +505,7 @@ EXPORT_SYMBOL(b53_imp_vlan_setup); int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; - unsigned int cpu_port = dev->cpu_port; + unsigned int cpu_port = ds->ports[port].cpu_dp->index; u16 pvlan; /* Clear the Rx and Tx disable bits and set to no spanning tree */ @@ -1054,7 +1054,6 @@ void b53_vlan_add(struct dsa_switch *ds, int port, struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; u16 vid; @@ -1063,12 +1062,11 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); - vl->members |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); if (untagged) vl->untag |= BIT(port); else vl->untag &= ~BIT(port); - vl->untag &= ~BIT(cpu_port); b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); @@ -1432,8 +1430,8 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } else { b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(dev->cpu_port); - vl->untag |= BIT(port) | BIT(dev->cpu_port); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); b53_set_vlan_entry(dev, pvid, vl); } } From 7edc58d614d49c3b47ec4b9441d5e95758d0f711 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 11:33:27 -0800 Subject: [PATCH 2081/2177] net: dsa: b53: Turn on Broadcom tags Enable Broadcom tags for b53 devices, except 5325 and 5365 which use a different Broadcom tag format not yet supported by net/dsa/tag_brcm.c. We also make sure that we can turn on Broadcom tags on a CPU port number that is capable of that: 5, 7 or 8. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/Kconfig | 1 + drivers/net/dsa/b53/b53_common.c | 48 ++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig index 27f32a50df57..b413d100c6b3 100644 --- a/drivers/net/dsa/b53/Kconfig +++ b/drivers/net/dsa/b53/Kconfig @@ -1,6 +1,7 @@ menuconfig B53 tristate "Broadcom BCM53xx managed switch support" depends on NET_DSA + select NET_DSA_TAG_BRCM help This driver adds support for Broadcom managed switch chips. It supports BCM5325E, BCM5365, BCM539x, BCM53115 and BCM53125 as well as BCM63XX diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 17f12484ce24..44a9a03bff55 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -325,7 +325,6 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, static void b53_set_forwarding(struct b53_device *dev, int enable) { - struct dsa_switch *ds = dev->ds; u8 mgmt; b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); @@ -337,14 +336,11 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); - /* Include IMP port in dumb forwarding mode when no tagging protocol is - * set + /* Include IMP port in dumb forwarding mode */ - if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) { - b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); - mgmt |= B53_MII_DUMB_FWDG_EN; - b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); - } + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); + mgmt |= B53_MII_DUMB_FWDG_EN; + b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); } static void b53_enable_vlan(struct b53_device *dev, bool enable) @@ -612,6 +608,8 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) PORT_CTRL_RX_MCST_EN | PORT_CTRL_RX_UCST_EN; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); + + b53_brcm_hdr_setup(dev->ds, port); } static void b53_enable_mib(struct b53_device *dev) @@ -1480,9 +1478,41 @@ void b53_br_fast_age(struct dsa_switch *ds, int port) } EXPORT_SYMBOL(b53_br_fast_age); +static bool b53_can_enable_brcm_tags(struct dsa_switch *ds) +{ + unsigned int brcm_tag_mask; + unsigned int i; + + /* Broadcom switches will accept enabling Broadcom tags on the + * following ports: 5, 7 and 8, any other port is not supported + */ + brcm_tag_mask = BIT(B53_CPU_PORT_25) | BIT(7) | BIT(B53_CPU_PORT); + + for (i = 0; i < ds->num_ports; i++) { + if (dsa_is_cpu_port(ds, i)) { + if (!(BIT(i) & brcm_tag_mask)) { + dev_warn(ds->dev, + "Port %d is not Broadcom tag capable\n", + i); + return false; + } + } + } + + return true; +} + static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) { - return DSA_TAG_PROTO_NONE; + struct b53_device *dev = ds->priv; + + /* Older models support a different tag format that we do not + * support in net/dsa/tag_brcm.c yet. + */ + if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds)) + return DSA_TAG_PROTO_NONE; + else + return DSA_TAG_PROTO_BRCM; } int b53_mirror_add(struct dsa_switch *ds, int port, From 8fdfd6595bd739d8338fd1f3d553a9a34ed9824c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 11 Nov 2017 06:06:29 +0900 Subject: [PATCH 2082/2177] l2tp: remove .tunnel_sock from struct l2tp_eth This field has never been used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 3e2dec1fb0f5..5c366ecfa1cb 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -41,7 +41,6 @@ /* via netdev_priv() */ struct l2tp_eth { - struct sock *tunnel_sock; struct l2tp_session *session; atomic_long_t tx_bytes; atomic_long_t tx_packets; @@ -313,7 +312,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, priv = netdev_priv(dev); priv->session = session; - priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; session->session_close = l2tp_eth_delete; #if IS_ENABLED(CONFIG_L2TP_DEBUGFS) From 7198c77aa05560c257ee377ec1f4796812121580 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 11 Nov 2017 06:06:31 +0900 Subject: [PATCH 2083/2177] l2tp: avoid using ->tunnel_sock for getting session's parent tunnel Sessions don't need to use l2tp_sock_to_tunnel(xxx->tunnel_sock) for accessing their parent tunnel. They have the .tunnel field in the l2tp_session structure for that. Furthermore, in all these cases, the session is registered, so we're guaranteed that .tunnel isn't NULL and that the session properly holds a reference on the tunnel. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 66 +++++++++------------------------------------ 1 file changed, 12 insertions(+), 54 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5f5c78b632d0..88b4cb1b7cde 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -295,7 +295,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, int error; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int uhlen; error = -ENOTCONN; @@ -308,10 +307,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, if (session == NULL) goto error; - ps = l2tp_session_priv(session); - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto error_put_sess; + tunnel = session->tunnel; uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; @@ -322,7 +318,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 0, GFP_KERNEL); if (!skb) - goto error_put_sess_tun; + goto error_put_sess; /* Reserve space for headers. */ skb_reserve(skb, NET_SKB_PAD); @@ -340,20 +336,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, error = memcpy_from_msg(skb_put(skb, total_len), m, total_len); if (error < 0) { kfree_skb(skb); - goto error_put_sess_tun; + goto error_put_sess; } local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); local_bh_enable(); - sock_put(ps->tunnel_sock); sock_put(sk); return total_len; -error_put_sess_tun: - sock_put(ps->tunnel_sock); error_put_sess: sock_put(sk); error: @@ -377,10 +370,8 @@ error: static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = (struct sock *) chan->private; - struct sock *sk_tun; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) @@ -391,13 +382,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (session == NULL) goto abort; - ps = l2tp_session_priv(session); - sk_tun = ps->tunnel_sock; - if (sk_tun == NULL) - goto abort_put_sess; - tunnel = l2tp_sock_to_tunnel(sk_tun); - if (tunnel == NULL) - goto abort_put_sess; + tunnel = session->tunnel; uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; headroom = NET_SKB_PAD + @@ -406,7 +391,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) session->hdr_len + /* L2TP header */ 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ if (skb_cow_head(skb, headroom)) - goto abort_put_sess_tun; + goto abort_put_sess; /* Setup PPP header */ __skb_push(skb, 2); @@ -417,12 +402,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) l2tp_xmit_skb(session, skb, session->hdr_len); local_bh_enable(); - sock_put(sk_tun); sock_put(sk); + return 1; -abort_put_sess_tun: - sock_put(sk_tun); abort_put_sess: sock_put(sk); abort: @@ -919,9 +902,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, goto end; pls = l2tp_session_priv(session); - tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; + tunnel = session->tunnel; inet = inet_sk(tunnel->sock); if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { @@ -1001,8 +982,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, *usockaddr_len = len; error = 0; - sock_put(pls->tunnel_sock); -end_put_sess: sock_put(sk); end: return error; @@ -1241,7 +1220,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, struct sock *sk = sock->sk; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int err; if (!sk) @@ -1265,16 +1243,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, /* Special case: if session's session_id is zero, treat ioctl as a * tunnel ioctl */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); - sock_put(ps->tunnel_sock); goto end_put_sess; } @@ -1400,7 +1372,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int val; int err; @@ -1425,20 +1396,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); - sock_put(ps->tunnel_sock); - } else + } else { err = pppol2tp_session_setsockopt(sk, session, optname, val); + } -end_put_sess: sock_put(sk); end: return err; @@ -1526,7 +1491,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, struct l2tp_tunnel *tunnel; int val, len; int err; - struct pppol2tp_session *ps; if (level != SOL_PPPOL2TP) return -EINVAL; @@ -1550,16 +1514,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, goto end; /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); - sock_put(ps->tunnel_sock); if (err) goto end_put_sess; } else { From da9ca825ef79fd1c64297b5267d594b11a9e5b16 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 11 Nov 2017 06:06:37 +0900 Subject: [PATCH 2084/2177] l2tp: remove the .tunnel_sock field from struct pppol2tp_session The last user of .tunnel_sock is pppol2tp_connect() which defensively uses it to verify internal data consistency. This check isn't necessary: l2tp_session_get() guarantees that the returned session belongs to the tunnel passed as parameter. And .tunnel_sock is never updated, so checking that it still points to the parent tunnel socket is useless; that test can never fail. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 88b4cb1b7cde..b412fc3351dc 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -127,8 +127,6 @@ struct pppol2tp_session { * PPPoX socket */ struct sock *__sk; /* Copy of .sk, for cleanup */ struct rcu_head rcu; /* For asynchronous release */ - struct sock *tunnel_sock; /* Pointer to the tunnel UDP - * socket */ int flags; /* accessed by PPPIOCGFLAGS. * Unused. */ }; @@ -592,7 +590,6 @@ static void pppol2tp_session_init(struct l2tp_session *session) ps = l2tp_session_priv(session); mutex_init(&ps->sk_lock); - ps->tunnel_sock = session->tunnel->sock; ps->owner = current->pid; /* If PMTU discovery was enabled, use the MTU that was discovered */ @@ -743,13 +740,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = -EEXIST; goto end; } - - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) { - mutex_unlock(&ps->sk_lock); - error = -EEXIST; - goto end; - } } else { /* Default MTU must allow space for UDP/L2TP/PPP headers */ cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; From 39b175211053c7a6a4d794c42e225994f1c069c2 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 10 Nov 2017 14:03:51 -0800 Subject: [PATCH 2085/2177] net: Remove unused skb_shared_info member ip6_frag_id was only used by UFO, which has been removed. ipv6_proxy_select_ident() only existed to set ip6_frag_id and has no in-tree callers. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - include/net/ipv6.h | 1 - net/ipv6/output_core.c | 31 ------------------------------- 3 files changed, 33 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 57d712671081..54fe91183a8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -500,7 +500,6 @@ struct skb_shared_info { struct skb_shared_hwtstamps hwtstamps; unsigned int gso_type; u32 tskey; - __be32 ip6_frag_id; /* * Warning : all fields before dataref are cleared in __alloc_skb() diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fb6d67012de6..ec14f0d5a3a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -767,7 +767,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr); -void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index a338bbc33cf3..4a7e5ffa5108 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, return id; } -/* This function exists only for tap drivers that must support broken - * clients requesting UFO without specifying an IPv6 fragment ID. - * - * This is similar to ipv6_select_ident() but we use an independent hash - * seed to limit information leakage. - * - * The network header must be set before calling this. - */ -void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) -{ - static u32 ip6_proxy_idents_hashrnd __read_mostly; - struct in6_addr buf[2]; - struct in6_addr *addrs; - u32 id; - - addrs = skb_header_pointer(skb, - skb_network_offset(skb) + - offsetof(struct ipv6hdr, saddr), - sizeof(buf), buf); - if (!addrs) - return; - - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); - skb_shinfo(skb)->ip6_frag_id = htonl(id); -} -EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); - __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) From 30482e4e2885bc71d058241d5aa1691480c8f938 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Wed, 8 Nov 2017 11:44:36 +0100 Subject: [PATCH 2086/2177] net: dsa: lan9303: Fix lan9303_alr_del_port() Fix embarrassing bug in lan9303_alr_del_port(): Instead of zeroing entr->mac_addr, I destroyed the next cache entry. Affected .port_fdb_del and .port_mdb_del. Fixes: 0620427ea0d6 ("net: dsa: lan9303: Add fdb/mdb manipulation") Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 6d7dee67d822..a2610085e7ba 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -719,7 +719,7 @@ static int lan9303_alr_del_port(struct lan9303 *chip, const u8 *mac, int port) entr->port_map &= ~BIT(port); if (entr->port_map == 0) /* zero means its free again */ - eth_zero_addr(&entr->port_map); + eth_zero_addr(entr->mac_addr); lan9303_alr_set_entry(chip, mac, entr->port_map, entr->stp_override); return 0; From 4b33709d894f2f058566ac06d9769dbc5617268d Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Wed, 8 Nov 2017 11:55:14 +0100 Subject: [PATCH 2087/2177] net: dsa: lan9303: Documentation: Add missing word "Mbps" Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- Documentation/networking/dsa/lan9303.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/dsa/lan9303.txt b/Documentation/networking/dsa/lan9303.txt index ec28683d107d..144b02b95207 100644 --- a/Documentation/networking/dsa/lan9303.txt +++ b/Documentation/networking/dsa/lan9303.txt @@ -1,9 +1,9 @@ LAN9303 Ethernet switch driver ============================== -The LAN9303 is a three port 10/100 ethernet switch with integrated phys for the -two external ethernet ports. The third port is an RMII/MII interface to a host -master network interface (e.g. fixed link). +The LAN9303 is a three port 10/100 Mbps ethernet switch with integrated phys for +the two external ethernet ports. The third port is an RMII/MII interface to a +host master network interface (e.g. fixed link). Driver details From 07842561a873e8f25272bcebe7f6c7f1af7c1a2d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 8 Nov 2017 13:23:23 +0000 Subject: [PATCH 2088/2177] net: realtek: r8169: remove redundant assignment to giga_ctrl The variable giga_ctrl is being assigned to zero however this is never read and hence the assignment is redundant, so remove it. Cleans up clang warning: drivers/net/ethernet/realtek/r8169.c:1978:3: warning: Value stored to 'giga_ctrl' is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index fd218fd9ef3c..dcb8c39382e7 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1982,8 +1982,6 @@ static int rtl8169_set_speed_xmii(struct net_device *dev, rtl_writephy(tp, MII_ADVERTISE, auto_nego); rtl_writephy(tp, MII_CTRL1000, giga_ctrl); } else { - giga_ctrl = 0; - if (speed == SPEED_10) bmcr = 0; else if (speed == SPEED_100) From 338d182fa542b3ca05456ad1ce9cebe6580083b1 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Wed, 8 Nov 2017 11:23:46 -0800 Subject: [PATCH 2089/2177] ipv6: try not to take rtnl_lock in ip6mr_sk_done Avoid traversing the list of mr6_tables (which requires the rtnl_lock) in ip6mr_sk_done(), when we know in advance that a match will not be found. This can happen when rawv6_close()/ip6mr_sk_done() is invoked on non-mroute6 sockets. This patch helps reduce rtnl_lock contention when destroying a large number of net namespaces, each having a non-mroute6 raw socket. v2: same patch, only fixed subject line and expanded comment. Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 59fad81e5f7a..9c24b85949c1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk) struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return err; + rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == mrt->mroute6_sk) { From 112f9cb65643caf7b922e1a66dc752bfab40aeb1 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:26 -0800 Subject: [PATCH 2090/2177] netem: convert to qdisc_watchdog_schedule_ns Upgrade the internal netem scheduler to use nanoseconds rather than ticks throughout. Convert to and from the std "ticks" userspace api automatically, while allowing for finer grained scheduling to take place. Signed-off-by: Dave Taht Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index db0228a65e8c..e64e0e0d94ff 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,8 +77,8 @@ struct netem_sched_data { struct qdisc_watchdog watchdog; - psched_tdiff_t latency; - psched_tdiff_t jitter; + s64 latency; + s64 jitter; u32 loss; u32 ecn; @@ -145,7 +145,7 @@ struct netem_sched_data { * we save skb->tstamp value in skb->cb[] before destroying it. */ struct netem_skb_cb { - psched_time_t time_to_send; + u64 time_to_send; }; static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) @@ -305,11 +305,11 @@ static bool loss_event(struct netem_sched_data *q) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, - struct crndstate *state, - const struct disttable *dist) +static s64 tabledist(s64 mu, s64 sigma, + struct crndstate *state, + const struct disttable *dist) { - psched_tdiff_t x; + s64 x; long t; u32 rnd; @@ -332,10 +332,10 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +static u64 packet_len_2_sched_time(unsigned int len, + struct netem_sched_data *q) { - u64 ticks; - + u64 offset; len += q->packet_overhead; if (q->cell_size) { @@ -345,11 +345,9 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche cells++; len = cells * (q->cell_size + q->cell_overhead); } - - ticks = (u64)len * NSEC_PER_SEC; - - do_div(ticks, q->rate); - return PSCHED_NS2TICKS(ticks); + offset = (u64)len * NSEC_PER_SEC; + do_div(offset, q->rate); + return offset; } static void tfifo_reset(struct Qdisc *sch) @@ -369,7 +367,7 @@ static void tfifo_reset(struct Qdisc *sch) static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + u64 tnext = netem_skb_cb(nskb)->time_to_send; struct rb_node **p = &q->t_root.rb_node, *parent = NULL; while (*p) { @@ -515,13 +513,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { - psched_time_t now; - psched_tdiff_t delay; + u64 now; + s64 delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - now = psched_get_time(); + now = ktime_get_ns(); if (q->rate) { struct netem_skb_cb *last = NULL; @@ -547,7 +545,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * from delay. */ delay -= last->time_to_send - now; - delay = max_t(psched_tdiff_t, 0, delay); + delay = max_t(s64, 0, delay); now = last->time_to_send; } @@ -562,7 +560,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * Do re-ordering by putting one out of N packets at the front * of the queue. */ - cb->time_to_send = psched_get_time(); + cb->time_to_send = ktime_get_ns(); q->counter = 0; netem_enqueue_skb_head(&sch->q, skb); @@ -609,13 +607,13 @@ deliver: } p = rb_first(&q->t_root); if (p) { - psched_time_t time_to_send; + u64 time_to_send; skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= psched_get_time()) { + if (time_to_send <= ktime_get_ns()) { rb_erase(p, &q->t_root); sch->q.qlen--; @@ -659,7 +657,7 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, time_to_send); + qdisc_watchdog_schedule_ns(&q->watchdog, time_to_send); } if (q->qdisc) { @@ -888,8 +886,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) sch->limit = qopt->limit; - q->latency = qopt->latency; - q->jitter = qopt->jitter; + q->latency = PSCHED_TICKS2NS(qopt->latency); + q->jitter = PSCHED_TICKS2NS(qopt->jitter); q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; @@ -1011,8 +1009,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; - qopt.latency = q->latency; - qopt.jitter = q->jitter; + qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + UINT_MAX); + qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; From 99803171ef04037092bf5eb29ae801e8b4d49a75 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:27 -0800 Subject: [PATCH 2091/2177] netem: add uapi to express delay and jitter in nanoseconds netem userspace has long relied on a horrible /proc/net/psched hack to translate the current notion of "ticks" to nanoseconds. Expressing latency and jitter instead, in well defined nanoseconds, increases the dynamic range of emulated delays and jitter in netem. It will also ease a transition where reducing a tick to nsec equivalence would constrain the max delay in prior versions of netem to only 4.3 seconds. Signed-off-by: Dave Taht Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_netem.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6a2c5ea7e9c4..8fe6d1842bee 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -537,6 +537,8 @@ enum { TCA_NETEM_ECN, TCA_NETEM_RATE64, TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, __TCA_NETEM_MAX, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e64e0e0d94ff..47d6decba0ea 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -819,6 +819,8 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, [TCA_NETEM_RATE64] = { .type = NLA_U64 }, + [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, + [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -916,6 +918,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) q->rate = max_t(u64, q->rate, nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_LATENCY64]) + q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); + + if (tb[TCA_NETEM_JITTER64]) + q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); @@ -1020,6 +1028,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; + if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) + goto nla_put_failure; + + if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) + goto nla_put_failure; + cor.delay_corr = q->delay_cor.rho; cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; From 836af83b54e3e285c4a0cc06c24aeb737d3e0e18 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:28 -0800 Subject: [PATCH 2092/2177] netem: support delivering packets in delayed time slots Slotting is a crude approximation of the behaviors of shared media such as cable, wifi, and LTE, which gather up a bunch of packets within a varying delay window and deliver them, relative to that, nearly all at once. It works within the existing loss, duplication, jitter and delay parameters of netem. Some amount of inherent latency must be specified, regardless. The new "slot" parameter specifies a minimum and maximum delay between transmission attempts. The "bytes" and "packets" parameters can be used to limit the amount of information transferred per slot. Examples of use: tc qdisc add dev eth0 root netem delay 200us \ slot 800us 10ms bytes 64k packets 42 A more correct example, using stacked netem instances and a packet limit to emulate a tail drop wifi queue with slots and variable packet delivery, with a 200Mbit isochronous underlying rate, and 20ms path delay: tc qdisc add dev eth0 root handle 1: netem delay 20ms rate 200mbit \ limit 10000 tc qdisc add dev eth0 parent 1:1 handle 10:1 netem delay 200us \ slot 800us 10ms bytes 64k packets 42 limit 512 Signed-off-by: Dave Taht Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 8 ++++ net/sched/sch_netem.c | 74 ++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8fe6d1842bee..af3cc2f4e1ad 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -539,6 +539,7 @@ enum { TCA_NETEM_PAD, TCA_NETEM_LATENCY64, TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, __TCA_NETEM_MAX, }; @@ -576,6 +577,13 @@ struct tc_netem_rate { __s32 cell_overhead; }; +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; +}; + enum { NETEM_LOSS_UNSPEC, NETEM_LOSS_GI, /* General Intuitive - 4 state model */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 47d6decba0ea..b686e755fda9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -135,6 +135,13 @@ struct netem_sched_data { u32 a5; /* p23 used only in 4-states */ } clg; + struct tc_netem_slot slot_config; + struct slotstate { + u64 slot_next; + s32 packets_left; + s32 bytes_left; + } slot; + }; /* Time stamp put into socket buffer control block @@ -591,6 +598,20 @@ finish_segs: return NET_XMIT_SUCCESS; } +/* Delay the next round with a new future slot with a + * correct number of bytes and packets. + */ + +static void get_slot_next(struct netem_sched_data *q, u64 now) +{ + q->slot.slot_next = now + q->slot_config.min_delay + + (prandom_u32() * + (q->slot_config.max_delay - + q->slot_config.min_delay) >> 32); + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; +} + static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -608,14 +629,17 @@ deliver: p = rb_first(&q->t_root); if (p) { u64 time_to_send; + u64 now = ktime_get_ns(); skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= ktime_get_ns()) { - rb_erase(p, &q->t_root); + if (q->slot.slot_next && q->slot.slot_next < time_to_send) + get_slot_next(q, now); + if (time_to_send <= now && q->slot.slot_next <= now) { + rb_erase(p, &q->t_root); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; @@ -634,6 +658,14 @@ deliver: skb->tstamp = 0; #endif + if (q->slot.slot_next) { + q->slot.packets_left--; + q->slot.bytes_left -= qdisc_pkt_len(skb); + if (q->slot.packets_left <= 0 || + q->slot.bytes_left <= 0) + get_slot_next(q, now); + } + if (q->qdisc) { unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; @@ -657,7 +689,10 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule_ns(&q->watchdog, time_to_send); + + qdisc_watchdog_schedule_ns(&q->watchdog, + max(time_to_send, + q->slot.slot_next)); } if (q->qdisc) { @@ -688,6 +723,7 @@ static void dist_free(struct disttable *d) * Distribution data is a variable size payload containing * signed 16 bit values. */ + static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -718,6 +754,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return 0; } +static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) +{ + const struct tc_netem_slot *c = nla_data(attr); + + q->slot_config = *c; + if (q->slot_config.max_packets == 0) + q->slot_config.max_packets = INT_MAX; + if (q->slot_config.max_bytes == 0) + q->slot_config.max_bytes = INT_MAX; + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; + if (q->slot_config.min_delay | q->slot_config.max_delay) + q->slot.slot_next = ktime_get_ns(); + else + q->slot.slot_next = 0; +} + static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corr *c = nla_data(attr); @@ -821,6 +874,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_RATE64] = { .type = NLA_U64 }, [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, + [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -927,6 +981,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); + if (tb[TCA_NETEM_SLOT]) + get_slot(q, tb[TCA_NETEM_SLOT]); + return ret; } @@ -1016,6 +1073,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; + struct tc_netem_slot slot; qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); @@ -1070,6 +1128,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (dump_loss_model(q, skb) != 0) goto nla_put_failure; + if (q->slot_config.min_delay | q->slot_config.max_delay) { + slot = q->slot_config; + if (slot.max_packets == INT_MAX) + slot.max_packets = 0; + if (slot.max_bytes == INT_MAX) + slot.max_bytes = 0; + if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) + goto nla_put_failure; + } + return nla_nest_end(skb, nla); nla_put_failure: From 0642840b8bb008528dbdf929cec9f65ac4231ad0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 9 Nov 2017 13:04:44 +0900 Subject: [PATCH 2093/2177] af_netlink: ensure that NLMSG_DONE never fails in dumps The way people generally use netlink_dump is that they fill in the skb as much as possible, breaking when nla_put returns an error. Then, they get called again and start filling out the next skb, and again, and so forth. The mechanism at work here is the ability for the iterative dumping function to detect when the skb is filled up and not fill it past the brim, waiting for a fresh skb for the rest of the data. However, if the attributes are small and nicely packed, it is possible that a dump callback function successfully fills in attributes until the skb is of size 4080 (libmnl's default page-sized receive buffer size). The dump function completes, satisfied, and then, if it happens to be that this is actually the last skb, and no further ones are to be sent, then netlink_dump will add on the NLMSG_DONE part: nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); It is very important that netlink_dump does this, of course. However, in this example, that call to nlmsg_put_answer will fail, because the previous filling by the dump function did not leave it enough room. And how could it possibly have done so? All of the nla_put variety of functions simply check to see if the skb has enough tailroom, independent of the context it is in. In order to keep the important assumptions of all netlink dump users, it is therefore important to give them an skb that has this end part of the tail already reserved, so that the call to nlmsg_put_answer does not fail. Otherwise, library authors are forced to find some bizarre sized receive buffer that has a large modulo relative to the common sizes of messages received, which is ugly and buggy. This patch thus saves the NLMSG_DONE for an additional message, for the case that things are dangerously close to the brim. This requires keeping track of the errno from ->dump() across calls. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 17 +++++++++++------ net/netlink/af_netlink.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 26ded4239611..a3eab903a9cd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2136,7 +2136,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct module *module; - int len, err = -ENOBUFS; + int err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2183,9 +2183,11 @@ static int netlink_dump(struct sock *sk) skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); - len = cb->dump(skb, cb); + if (nlk->dump_done_errno > 0) + nlk->dump_done_errno = cb->dump(skb, cb); - if (len > 0) { + if (nlk->dump_done_errno > 0 || + skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { mutex_unlock(nlk->cb_mutex); if (sk_filter(sk, skb)) @@ -2195,13 +2197,15 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - if (!nlh) + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, + sizeof(nlk->dump_done_errno), NLM_F_MULTI); + if (WARN_ON(!nlh)) goto errout_skb; nl_dump_check_consistent(cb, nlh); - memcpy(nlmsg_data(nlh), &len, sizeof(len)); + memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, + sizeof(nlk->dump_done_errno)); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -2273,6 +2277,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk->cb_running = true; + nlk->dump_done_errno = INT_MAX; mutex_unlock(nlk->cb_mutex); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 028188597eaa..962de7b3c023 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -34,6 +34,7 @@ struct netlink_sock { wait_queue_head_t wait; bool bound; bool cb_running; + int dump_done_errno; struct netlink_callback cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; From a1b8714593b67d0f783a9d82ed7e5800d02776bb Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Fri, 10 Nov 2017 09:10:29 +0200 Subject: [PATCH 2094/2177] net/mlx4: Use Kconfig flag to remove support of old gen2 Mellanox devices Since Mellanox focus is on newer adapters, we would like to have the ability to disable the support for old gen2 adapters. This can be done by turning off the MLX4_CORE_GEN2 Kconfig flag. We keep it turned on by default. Signed-off-by: Slava Shwartsman Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/Kconfig | 8 ++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 22b1cc012bc9..36054e6fb9d3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -38,3 +38,11 @@ config MLX4_DEBUG mlx4_core driver. The output can be turned on via the debug_level module parameter (which can also be set after the driver is loaded through sysfs). + +config MLX4_CORE_GEN2 + bool "Support for old gen2 Mellanox PCI IDs" if (MLX4_CORE) + depends on MLX4_CORE + default y + ---help--- + Say Y here if you want to use old gen2 Mellanox devices in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e61c99ef741d..4d84cab77105 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4066,6 +4066,7 @@ int mlx4_restart_one(struct pci_dev *pdev) #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 } static const struct pci_device_id mlx4_pci_table[] = { +#ifdef CONFIG_MLX4_CORE_GEN2 /* MT25408 "Hermon" */ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR), /* SDR */ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR), /* DDR */ @@ -4085,6 +4086,7 @@ static const struct pci_device_id mlx4_pci_table[] = { MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2), /* MT25400 Family [ConnectX-2] */ MLX_VF(0x1002), /* Virtual Function */ +#endif /* CONFIG_MLX4_CORE_GEN2 */ /* MT27500 Family [ConnectX-3] */ MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3), MLX_VF(0x1004), /* Virtual Function */ From ee9d3429c0e47a57e3e73b638785cafa33773639 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 10 Nov 2017 15:09:53 -0800 Subject: [PATCH 2095/2177] net/sched/sch_red.c: work around gcc-4.4.4 anon union initializer issue gcc-4.4.4 (at lest) has issues with initializers and anonymous unions: net/sched/sch_red.c: In function 'red_dump_offload': net/sched/sch_red.c:282: error: unknown field 'stats' specified in initializer net/sched/sch_red.c:282: warning: initialization makes integer from pointer without a cast net/sched/sch_red.c:283: error: unknown field 'stats' specified in initializer net/sched/sch_red.c:283: warning: initialization makes integer from pointer without a cast net/sched/sch_red.c: In function 'red_dump_stats': net/sched/sch_red.c:352: error: unknown field 'xstats' specified in initializer net/sched/sch_red.c:352: warning: initialization makes integer from pointer without a cast Work around this. Fixes: 602f3baf2218 ("net_sch: red: Add offload ability to RED qdisc") Cc: Nogah Frankel Cc: Jiri Pirko Cc: Simon Horman Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sched/sch_red.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 007dd8ef8aac..7f8ea9e297c3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -276,11 +276,13 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) { struct net_device *dev = qdisc_dev(sch); struct tc_red_qopt_offload hw_stats = { + .command = TC_RED_STATS, .handle = sch->handle, .parent = sch->parent, - .command = TC_RED_STATS, - .stats.bstats = &sch->bstats, - .stats.qstats = &sch->qstats, + { + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }, }; int err; @@ -346,10 +348,12 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { struct red_stats hw_stats = {0}; struct tc_red_qopt_offload hw_stats_request = { + .command = TC_RED_XSTATS, .handle = sch->handle, .parent = sch->parent, - .command = TC_RED_XSTATS, - .xstats = &hw_stats, + { + .xstats = &hw_stats, + }, }; if (!dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, From 5ed4e3eb021762fee584ce65620bc822131c7aa0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:52 -0800 Subject: [PATCH 2096/2177] net: dsa: Pass a port to get_tag_protocol() A number of drivers want to check whether the configured CPU port is a possible configuration for enabling tagging, pass down the CPU port number so they verify that. Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 32 +++++++++++--------------- drivers/net/dsa/bcm_sf2.c | 3 ++- drivers/net/dsa/dsa_loop.c | 3 ++- drivers/net/dsa/lan9303-core.c | 3 ++- drivers/net/dsa/microchip/ksz_common.c | 3 ++- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/mv88e6060.c | 3 ++- drivers/net/dsa/mv88e6xxx/chip.c | 3 ++- drivers/net/dsa/qca8k.c | 2 +- include/net/dsa.h | 3 ++- net/dsa/dsa2.c | 2 +- net/dsa/legacy.c | 2 +- 12 files changed, 32 insertions(+), 31 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 44a9a03bff55..f72aeb9ed12a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -541,7 +541,8 @@ EXPORT_SYMBOL(b53_disable_port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) { - bool tag_en = !!(ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_BRCM); + bool tag_en = !!(ds->ops->get_tag_protocol(ds, port) == + DSA_TAG_PROTO_BRCM); struct b53_device *dev = ds->priv; u8 hdr_ctl, val; u16 reg; @@ -1478,38 +1479,31 @@ void b53_br_fast_age(struct dsa_switch *ds, int port) } EXPORT_SYMBOL(b53_br_fast_age); -static bool b53_can_enable_brcm_tags(struct dsa_switch *ds) +static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port) { - unsigned int brcm_tag_mask; - unsigned int i; - /* Broadcom switches will accept enabling Broadcom tags on the * following ports: 5, 7 and 8, any other port is not supported */ - brcm_tag_mask = BIT(B53_CPU_PORT_25) | BIT(7) | BIT(B53_CPU_PORT); - - for (i = 0; i < ds->num_ports; i++) { - if (dsa_is_cpu_port(ds, i)) { - if (!(BIT(i) & brcm_tag_mask)) { - dev_warn(ds->dev, - "Port %d is not Broadcom tag capable\n", - i); - return false; - } - } + switch (port) { + case B53_CPU_PORT_25: + case 7: + case B53_CPU_PORT: + return true; } - return true; + dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n", port); + return false; } -static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, + int port) { struct b53_device *dev = ds->priv; /* Older models support a different tag format that we do not * support in net/dsa/tag_brcm.c yet. */ - if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds)) + if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; else return DSA_TAG_PROTO_BRCM; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 2d6867f4008c..93faa1fed6f2 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -35,7 +35,8 @@ #include "b53/b53_priv.h" #include "b53/b53_regs.h" -static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds, + int port) { return DSA_TAG_PROTO_BRCM; } diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 3a3f4f7ba364..bb71d3d6f65b 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -64,7 +64,8 @@ struct dsa_loop_priv { static struct phy_device *phydevs[PHY_MAX_ADDR]; -static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds, + int port) { dev_dbg(ds->dev, "%s\n", __func__); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index a2610085e7ba..fdfdb0edfe62 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -894,7 +894,8 @@ static int lan9303_check_device(struct lan9303 *chip) /* ---------------------------- DSA -----------------------------------*/ -static enum dsa_tag_protocol lan9303_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol lan9303_get_tag_protocol(struct dsa_switch *ds, + int port) { return DSA_TAG_PROTO_LAN9303; } diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 56cd6d365352..b5be93a1e0df 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -394,7 +394,8 @@ static int ksz_setup(struct dsa_switch *ds) return 0; } -static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, + int port) { return DSA_TAG_PROTO_KSZ; } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 627c039f12ca..2820d69810b3 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -907,11 +907,11 @@ err: } static enum dsa_tag_protocol -mtk_get_tag_protocol(struct dsa_switch *ds) +mtk_get_tag_protocol(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - if (!dsa_is_cpu_port(ds, MT7530_CPU_PORT)) { + if (port != MT7530_CPU_PORT) { dev_warn(priv->dev, "port not matched with tagging CPU port\n"); return DSA_TAG_PROTO_NONE; diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 45768e3c5bc5..65f10fec25b3 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -70,7 +70,8 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) return NULL; } -static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds, + int port) { return DSA_TAG_PROTO_TRAILER; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d6c3a22c8789..8171055fde7a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3731,7 +3731,8 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } -static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) +static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds, + int port) { struct mv88e6xxx_chip *chip = ds->priv; diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index cf72e274275f..9df22ebee822 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -823,7 +823,7 @@ qca8k_port_fdb_dump(struct dsa_switch *ds, int port, } static enum dsa_tag_protocol -qca8k_get_tag_protocol(struct dsa_switch *ds) +qca8k_get_tag_protocol(struct dsa_switch *ds, int port) { return DSA_TAG_PROTO_QCA; } diff --git a/include/net/dsa.h b/include/net/dsa.h index 6c239257309b..68e232fd4b0f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -321,7 +321,8 @@ struct dsa_switch_ops { struct device *host_dev, int sw_addr, void **priv); - enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, + int port); int (*setup)(struct dsa_switch *ds); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index fd54a8e17986..44e3fb7dec8c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -539,7 +539,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - tag_protocol = ds->ops->get_tag_protocol(ds); + tag_protocol = ds->ops->get_tag_protocol(ds, dp->index); tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 4863e3e398b6..84611d7fcfa2 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -151,7 +151,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - tag_protocol = ops->get_tag_protocol(ds); + tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index); tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) return PTR_ERR(tag_ops); From f7c39e3d1e094af1c3e0676b76c00d8c1f8e7774 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:53 -0800 Subject: [PATCH 2097/2177] net: dsa: tag_brcm: Prepare for supporting prepended tag In preparation for supporting the same Broadcom tag format, but instead of inserted between the MAC SA and EtherType, prepended to the Ethernet frame, restructure the code a little bit to make that possible and take an offset parameter. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_brcm.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 9e082bae3cb0..771409a1e65c 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -59,7 +59,9 @@ #define BRCM_EG_TC_MASK 0x7 #define BRCM_EG_PID_MASK 0x1f -static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, + struct net_device *dev, + unsigned int offset) { struct dsa_port *dp = dsa_slave_to_port(dev); u16 queue = skb_get_queue_mapping(skb); @@ -70,10 +72,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev skb_push(skb, BRCM_TAG_LEN); - memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN); + if (offset) + memmove(skb->data, skb->data + BRCM_TAG_LEN, offset); - /* Build the tag after the MAC Source Address */ - brcm_tag = skb->data + 2 * ETH_ALEN; + brcm_tag = skb->data + offset; /* Set the ingress opcode, traffic class, tag enforcment is * deprecated @@ -94,8 +96,17 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev return skb; } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* Build the tag after the MAC Source Address */ + return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN); +} + +static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt, + unsigned int offset) { int source_port; u8 *brcm_tag; @@ -103,8 +114,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) return NULL; - /* skb->data points to the EtherType, the tag is right before it */ - brcm_tag = skb->data - 2; + brcm_tag = skb->data - offset; /* The opcode should never be different than 0b000 */ if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) @@ -126,12 +136,25 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); + return skb; +} + +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + struct sk_buff *nskb; + + /* skb->data points to the EtherType, the tag is right before it */ + nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + if (!nskb) + return nskb; + /* Move the Ethernet DA and SA */ - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - BRCM_TAG_LEN, + memmove(nskb->data - ETH_HLEN, + nskb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - return skb; + return nskb; } const struct dsa_device_ops brcm_netdev_ops = { From b74b70c44986dee87881fbed3d912e02c5dcf78c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:54 -0800 Subject: [PATCH 2098/2177] net: dsa: Support prepended Broadcom tag Add a new type: DSA_TAG_PROTO_PREPEND which allows us to support for the 4-bytes Broadcom tag that we already support, but in a format where it is pre-pended to the packet instead of located between the MAC SA and the Ethertyper (DSA_TAG_PROTO_BRCM). Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/Kconfig | 3 +++ net/dsa/Makefile | 1 + net/dsa/dsa.c | 3 +++ net/dsa/dsa_priv.h | 1 + net/dsa/tag_brcm.c | 39 ++++++++++++++++++++++++++++++++------- 6 files changed, 41 insertions(+), 7 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 68e232fd4b0f..2a05738570d8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -29,6 +29,7 @@ struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_BRCM_PREPEND, DSA_TAG_PROTO_DSA, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_KSZ, diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index cc5f8f971689..2fed892094bc 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -19,6 +19,9 @@ if NET_DSA config NET_DSA_TAG_BRCM bool +config NET_DSA_TAG_BRCM_PREPEND + bool + config NET_DSA_TAG_DSA bool diff --git a/net/dsa/Makefile b/net/dsa/Makefile index e9a4a0f33e86..0e13c1f95d13 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o +dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b8f2d9f7c3ed..6a9d0f50fbee 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -44,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, #endif +#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND + [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops, +#endif #ifdef CONFIG_NET_DSA_TAG_DSA [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, #endif diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 507e1ce4d4d2..7d036696e8c4 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -191,6 +191,7 @@ void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +extern const struct dsa_device_ops brcm_prepend_netdev_ops; /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 771409a1e65c..e6e0b7b6025c 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -96,13 +96,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, return skb; } -static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - /* Build the tag after the MAC Source Address */ - return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN); -} - static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, @@ -139,6 +132,15 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, return skb; } +#ifdef CONFIG_NET_DSA_TAG_BRCM +static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* Build the tag after the MAC Source Address */ + return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN); +} + + static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { @@ -161,3 +163,26 @@ const struct dsa_device_ops brcm_netdev_ops = { .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, }; +#endif + +#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND +static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, + struct net_device *dev) +{ + /* tag is prepended to the packet */ + return brcm_tag_xmit_ll(skb, dev, 0); +} + +static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt) +{ + /* tag is prepended to the packet */ + return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); +} + +const struct dsa_device_ops brcm_prepend_netdev_ops = { + .xmit = brcm_tag_xmit_prepend, + .rcv = brcm_tag_rcv_prepend, +}; +#endif From 11606039604c4ce2d3c5045e30efb0c687a6a0de Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 10 Nov 2017 15:22:55 -0800 Subject: [PATCH 2099/2177] net: dsa: b53: Support prepended Broadcom tags On BCM58xx devices (Northstar Plus), there is an accelerator attached to port 8 which would only work if we use prepended Broadcom tags. Resolve that difference in our get_tag_protocol() function by setting the appropriate tagging protocol in that case. We need to change b53_brcm_hdr_setup() a little bit now since we can deal with two types of Broadcom tags. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/Kconfig | 1 + drivers/net/dsa/b53/b53_common.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig index b413d100c6b3..2f988216dab9 100644 --- a/drivers/net/dsa/b53/Kconfig +++ b/drivers/net/dsa/b53/Kconfig @@ -2,6 +2,7 @@ menuconfig B53 tristate "Broadcom BCM53xx managed switch support" depends on NET_DSA select NET_DSA_TAG_BRCM + select NET_DSA_TAG_BRCM_PREPEND help This driver adds support for Broadcom managed switch chips. It supports BCM5325E, BCM5365, BCM539x, BCM53115 and BCM53125 as well as BCM63XX diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index f72aeb9ed12a..f5a8dd96fd75 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -541,8 +541,8 @@ EXPORT_SYMBOL(b53_disable_port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) { - bool tag_en = !!(ds->ops->get_tag_protocol(ds, port) == - DSA_TAG_PROTO_BRCM); + bool tag_en = !(ds->ops->get_tag_protocol(ds, port) == + DSA_TAG_PROTO_NONE); struct b53_device *dev = ds->priv; u8 hdr_ctl, val; u16 reg; @@ -1505,8 +1505,14 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, */ if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; - else - return DSA_TAG_PROTO_BRCM; + + /* Broadcom BCM58xx chips have a flow accelerator on Port 8 + * which requires us to use the prepended Broadcom tag type + */ + if (dev->chip_id == BCM58XX_DEVICE_ID && port == B53_CPU_PORT) + return DSA_TAG_PROTO_BRCM_PREPEND; + + return DSA_TAG_PROTO_BRCM; } int b53_mirror_add(struct dsa_switch *ds, int port, From 5794040647de4011598a6d005fdad95d24fd385b Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:40 -0800 Subject: [PATCH 2100/2177] openvswitch: Add meter netlink definitions Meter has its own netlink family. Define netlink messages and attributes for communicating with the user space programs. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ec75a685f1dd..d60b9a4cf3d1 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -883,4 +883,55 @@ enum ovs_action_attr { #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) +/* Meters. */ +#define OVS_METER_FAMILY "ovs_meter" +#define OVS_METER_MCGROUP "ovs_meter" +#define OVS_METER_VERSION 0x1 + +enum ovs_meter_cmd { + OVS_METER_CMD_UNSPEC, + OVS_METER_CMD_FEATURES, /* Get features supported by the datapath. */ + OVS_METER_CMD_SET, /* Add or modify a meter. */ + OVS_METER_CMD_DEL, /* Delete a meter. */ + OVS_METER_CMD_GET /* Get meter stats. */ +}; + +enum ovs_meter_attr { + OVS_METER_ATTR_UNSPEC, + OVS_METER_ATTR_ID, /* u32 meter ID within datapath. */ + OVS_METER_ATTR_KBPS, /* No argument. If set, units in kilobits + * per second. Otherwise, units in + * packets per second. + */ + OVS_METER_ATTR_STATS, /* struct ovs_flow_stats for the meter. */ + OVS_METER_ATTR_BANDS, /* Nested attributes for meter bands. */ + OVS_METER_ATTR_USED, /* u64 msecs last used in monotonic time. */ + OVS_METER_ATTR_CLEAR, /* Flag to clear stats, used. */ + OVS_METER_ATTR_MAX_METERS, /* u32 number of meters supported. */ + OVS_METER_ATTR_MAX_BANDS, /* u32 max number of bands per meter. */ + OVS_METER_ATTR_PAD, + __OVS_METER_ATTR_MAX +}; + +#define OVS_METER_ATTR_MAX (__OVS_METER_ATTR_MAX - 1) + +enum ovs_band_attr { + OVS_BAND_ATTR_UNSPEC, + OVS_BAND_ATTR_TYPE, /* u32 OVS_METER_BAND_TYPE_* constant. */ + OVS_BAND_ATTR_RATE, /* u32 band rate in meter units (see above). */ + OVS_BAND_ATTR_BURST, /* u32 burst size in meter units. */ + OVS_BAND_ATTR_STATS, /* struct ovs_flow_stats for the band. */ + __OVS_BAND_ATTR_MAX +}; + +#define OVS_BAND_ATTR_MAX (__OVS_BAND_ATTR_MAX - 1) + +enum ovs_meter_band_type { + OVS_METER_BAND_TYPE_UNSPEC, + OVS_METER_BAND_TYPE_DROP, /* Drop exceeding packets. */ + __OVS_METER_BAND_TYPE_MAX +}; + +#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1) + #endif /* _LINUX_OPENVSWITCH_H */ From 9602c01e57f7b868d748c2ba2aef0efa64b71ffc Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:41 -0800 Subject: [PATCH 2101/2177] openvswitch: export get_dp() API. Later patches will invoke get_dp() outside of datapath.c. Export it. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 29 ----------------------------- net/openvswitch/datapath.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d38ac044cee..6e098035bb8f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -142,35 +142,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *, uint32_t cutlen); -/* Must be called with rcu_read_lock. */ -static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) -{ - struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); - - if (dev) { - struct vport *vport = ovs_internal_dev_get_vport(dev); - if (vport) - return vport->dp; - } - - return NULL; -} - -/* The caller must hold either ovs_mutex or rcu_read_lock to keep the - * returned dp pointer valid. - */ -static inline struct datapath *get_dp(struct net *net, int dp_ifindex) -{ - struct datapath *dp; - - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); - rcu_read_lock(); - dp = get_dp_rcu(net, dp_ifindex); - rcu_read_unlock(); - - return dp; -} - /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4a104ef9e12c..954c4ed465a5 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -30,6 +30,7 @@ #include "conntrack.h" #include "flow.h" #include "flow_table.h" +#include "vport-internal_dev.h" #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 @@ -190,6 +191,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n return ovs_lookup_vport(dp, port_no); } +/* Must be called with rcu_read_lock. */ +static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) +{ + struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); + + if (dev) { + struct vport *vport = ovs_internal_dev_get_vport(dev); + + if (vport) + return vport->dp; + } + + return NULL; +} + +/* The caller must hold either ovs_mutex or rcu_read_lock to keep the + * returned dp pointer valid. + */ +static inline struct datapath *get_dp(struct net *net, int dp_ifindex) +{ + struct datapath *dp; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); + rcu_read_lock(); + dp = get_dp_rcu(net, dp_ifindex); + rcu_read_unlock(); + + return dp; +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; From 96fbc13d7e770b542d2d1fcf700d0baadc6e8063 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:42 -0800 Subject: [PATCH 2102/2177] openvswitch: Add meter infrastructure OVS kernel datapath so far does not support Openflow meter action. This is the first stab at adding kernel datapath meter support. This implementation supports only drop band type. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- net/openvswitch/Makefile | 1 + net/openvswitch/datapath.c | 14 +- net/openvswitch/datapath.h | 3 + net/openvswitch/meter.c | 604 +++++++++++++++++++++++++++++++++++++ net/openvswitch/meter.h | 54 ++++ 5 files changed, 674 insertions(+), 2 deletions(-) create mode 100644 net/openvswitch/meter.c create mode 100644 net/openvswitch/meter.h diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 299f4476cf44..41109c326f3a 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -12,6 +12,7 @@ openvswitch-y := \ flow.o \ flow_netlink.o \ flow_table.o \ + meter.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6e098035bb8f..0dab33fb9844 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,6 +55,7 @@ #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" +#include "meter.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -174,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); kfree(dp->ports); + ovs_meters_exit(dp); kfree(dp); } @@ -1572,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_meters_init(dp); + if (err) + goto err_destroy_ports_array; + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1600,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - goto err_destroy_ports_array; + goto err_destroy_meters; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1615,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_ports_array: +err_destroy_meters: ovs_unlock(); + ovs_meters_exit(dp); +err_destroy_ports_array: kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -2265,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = { &dp_vport_genl_family, &dp_flow_genl_family, &dp_packet_genl_family, + &dp_meter_genl_family, }; static void dp_unregister_genl(int n_families) @@ -2445,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 954c4ed465a5..5d2997b42460 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -92,6 +92,9 @@ struct datapath { u32 user_features; u32 max_headroom; + + /* Switch meters. */ + struct hlist_head *meters; }; /** diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c new file mode 100644 index 000000000000..2a5ba356c472 --- /dev/null +++ b/net/openvswitch/meter.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "datapath.h" +#include "meter.h" + +#define METER_HASH_BUCKETS 1024 + +static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { + [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, + [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, + [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, + [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, + [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, + [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { + [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, +}; + +static void rcu_free_ovs_meter_callback(struct rcu_head *rcu) +{ + struct dp_meter *meter = container_of(rcu, struct dp_meter, rcu); + + kfree(meter); +} + +static void ovs_meter_free(struct dp_meter *meter) +{ + if (!meter) + return; + + call_rcu(&meter->rcu, rcu_free_ovs_meter_callback); +} + +static struct hlist_head *meter_hash_bucket(const struct datapath *dp, + u32 meter_id) +{ + return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; +} + +/* Call with ovs_mutex or RCU read lock. */ +static struct dp_meter *lookup_meter(const struct datapath *dp, + u32 meter_id) +{ + struct dp_meter *meter; + struct hlist_head *head; + + head = meter_hash_bucket(dp, meter_id); + hlist_for_each_entry_rcu(meter, head, dp_hash_node) { + if (meter->id == meter_id) + return meter; + } + return NULL; +} + +static void attach_meter(struct datapath *dp, struct dp_meter *meter) +{ + struct hlist_head *head = meter_hash_bucket(dp, meter->id); + + hlist_add_head_rcu(&meter->dp_hash_node, head); +} + +static void detach_meter(struct dp_meter *meter) +{ + ASSERT_OVSL(); + if (meter) + hlist_del_rcu(&meter->dp_hash_node); +} + +static struct sk_buff * +ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, + struct ovs_header **ovs_reply_header) +{ + struct sk_buff *skb; + struct ovs_header *ovs_header = info->userhdr; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + *ovs_reply_header = genlmsg_put(skb, info->snd_portid, + info->snd_seq, + &dp_meter_genl_family, 0, cmd); + if (!ovs_reply_header) { + nlmsg_free(skb); + return ERR_PTR(-EMSGSIZE); + } + (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; + + return skb; +} + +static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, + struct dp_meter *meter) +{ + struct nlattr *nla; + struct dp_meter_band *band; + u16 i; + + if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) + goto error; + + if (!meter) + return 0; + + if (nla_put(reply, OVS_METER_ATTR_STATS, + sizeof(struct ovs_flow_stats), &meter->stats) || + nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, + OVS_METER_ATTR_PAD)) + goto error; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto error; + + band = meter->bands; + + for (i = 0; i < meter->n_bands; ++i, ++band) { + struct nlattr *band_nla; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, + sizeof(struct ovs_flow_stats), + &band->stats)) + goto error; + nla_nest_end(reply, band_nla); + } + nla_nest_end(reply, nla); + + return 0; +error: + return -EMSGSIZE; +} + +static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct nlattr *nla, *band_nla; + int err; + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, + &ovs_reply_header); + if (!reply) + return PTR_ERR(reply); + + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || + nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) + goto nla_put_failure; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto nla_put_failure; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla) + goto nla_put_failure; + /* Currently only DROP band type is supported. */ + if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) + goto nla_put_failure; + nla_nest_end(reply, band_nla); + nla_nest_end(reply, nla); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +nla_put_failure: + nlmsg_free(reply); + err = -EMSGSIZE; + return err; +} + +static struct dp_meter *dp_meter_create(struct nlattr **a) +{ + struct nlattr *nla; + int rem; + u16 n_bands = 0; + struct dp_meter *meter; + struct dp_meter_band *band; + int err; + + /* Validate attributes, count the bands. */ + if (!a[OVS_METER_ATTR_BANDS]) + return ERR_PTR(-EINVAL); + + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) + if (++n_bands > DP_MAX_BANDS) + return ERR_PTR(-EINVAL); + + /* Allocate and set up the meter before locking anything. */ + meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + + sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + meter->used = div_u64(ktime_get_ns(), 1000 * 1000); + meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; + meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; + spin_lock_init(&meter->lock); + if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { + meter->stats = *(struct ovs_flow_stats *) + nla_data(a[OVS_METER_ATTR_STATS]); + } + meter->n_bands = n_bands; + + /* Set up meter bands. */ + band = meter->bands; + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { + struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; + u32 band_max_delta_t; + + err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, + nla_data(nla), nla_len(nla), band_policy, + NULL); + if (err) + goto exit_free_meter; + + if (!attr[OVS_BAND_ATTR_TYPE] || + !attr[OVS_BAND_ATTR_RATE] || + !attr[OVS_BAND_ATTR_BURST]) { + err = -EINVAL; + goto exit_free_meter; + } + + band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); + band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); + /* Figure out max delta_t that is enough to fill any bucket. + * Keep max_delta_t size to the bucket units: + * pkts => 1/1000 packets, kilobits => bits. + */ + band_max_delta_t = (band->burst_size + band->rate) * 1000; + /* Start with a full bucket. */ + band->bucket = band_max_delta_t; + if (band_max_delta_t > meter->max_delta_t) + meter->max_delta_t = band_max_delta_t; + band++; + } + + return meter; + +exit_free_meter: + kfree(meter); + return ERR_PTR(err); +} + +static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct dp_meter *meter, *old_meter; + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct ovs_header *ovs_header = info->userhdr; + struct datapath *dp; + int err; + u32 meter_id; + bool failed; + + meter = dp_meter_create(a); + if (IS_ERR_OR_NULL(meter)) + return PTR_ERR(meter); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, + &ovs_reply_header); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + goto exit_free_meter; + } + + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + if (!a[OVS_METER_ATTR_ID]) { + err = -ENODEV; + goto exit_unlock; + } + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + /* Cannot fail after this. */ + old_meter = lookup_meter(dp, meter_id); + detach_meter(old_meter); + attach_meter(dp, meter); + ovs_unlock(); + + /* Build response with the meter_id and stats from + * the old meter, if any. + */ + failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); + WARN_ON(failed); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + if (old_meter->keep_stats) { + err = ovs_meter_cmd_reply_stats(reply, meter_id, + old_meter); + WARN_ON(err); + } + spin_unlock_bh(&old_meter->lock); + ovs_meter_free(old_meter); + } + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); +exit_free_meter: + kfree(meter); + return err; +} + +static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + /* Locate meter, copy stats. */ + meter = lookup_meter(dp, meter_id); + if (!meter) { + err = -ENOENT; + goto exit_unlock; + } + + spin_lock_bh(&meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); + spin_unlock_bh(&meter->lock); + if (err) + goto exit_unlock; + + ovs_unlock(); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *old_meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + old_meter = lookup_meter(dp, meter_id); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); + WARN_ON(err); + spin_unlock_bh(&old_meter->lock); + detach_meter(old_meter); + } + ovs_unlock(); + ovs_meter_free(old_meter); + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +/* Meter action execution. + * + * Return true 'meter_id' drop band is triggered. The 'skb' should be + * dropped by the caller'. + */ +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id) +{ + struct dp_meter *meter; + struct dp_meter_band *band; + long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); + long long int long_delta_ms; + u32 delta_ms; + u32 cost; + int i, band_exceeded_max = -1; + u32 band_exceeded_rate = 0; + + meter = lookup_meter(dp, meter_id); + /* Do not drop the packet when there is no meter. */ + if (!meter) + return false; + + /* Lock the meter while using it. */ + spin_lock(&meter->lock); + + long_delta_ms = (now_ms - meter->used); /* ms */ + + /* Make sure delta_ms will not be too large, so that bucket will not + * wrap around below. + */ + delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) + ? meter->max_delta_t : (u32)long_delta_ms; + + /* Update meter statistics. + */ + meter->used = now_ms; + meter->stats.n_packets += 1; + meter->stats.n_bytes += skb->len; + + /* Bucket rate is either in kilobits per second, or in packets per + * second. We maintain the bucket in the units of either bits or + * 1/1000th of a packet, correspondingly. + * Then, when rate is multiplied with milliseconds, we get the + * bucket units: + * msec * kbps = bits, and + * msec * packets/sec = 1/1000 packets. + * + * 'cost' is the number of bucket units in this packet. + */ + cost = (meter->kbps) ? skb->len * 8 : 1000; + + /* Update all bands and find the one hit with the highest rate. */ + for (i = 0; i < meter->n_bands; ++i) { + long long int max_bucket_size; + + band = &meter->bands[i]; + max_bucket_size = (band->burst_size + band->rate) * 1000; + + band->bucket += delta_ms * band->rate; + if (band->bucket > max_bucket_size) + band->bucket = max_bucket_size; + + if (band->bucket >= cost) { + band->bucket -= cost; + } else if (band->rate > band_exceeded_rate) { + band_exceeded_rate = band->rate; + band_exceeded_max = i; + } + } + + if (band_exceeded_max >= 0) { + /* Update band statistics. */ + band = &meter->bands[band_exceeded_max]; + band->stats.n_packets += 1; + band->stats.n_bytes += skb->len; + + /* Drop band triggered, let the caller drop the 'skb'. */ + if (band->type == OVS_METER_BAND_TYPE_DROP) { + spin_unlock(&meter->lock); + return true; + } + } + + spin_unlock(&meter->lock); + return false; +} + +static struct genl_ops dp_meter_genl_ops[] = { + { .cmd = OVS_METER_CMD_FEATURES, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_features + }, + { .cmd = OVS_METER_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_set, + }, + { .cmd = OVS_METER_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_get, + }, + { .cmd = OVS_METER_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_del + }, +}; + +static const struct genl_multicast_group ovs_meter_multicast_group = { + .name = OVS_METER_MCGROUP, +}; + +struct genl_family dp_meter_genl_family __ro_after_init = { + .hdrsize = sizeof(struct ovs_header), + .name = OVS_METER_FAMILY, + .version = OVS_METER_VERSION, + .maxattr = OVS_METER_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_meter_genl_ops, + .n_ops = ARRAY_SIZE(dp_meter_genl_ops), + .mcgrps = &ovs_meter_multicast_group, + .n_mcgrps = 1, + .module = THIS_MODULE, +}; + +int ovs_meters_init(struct datapath *dp) +{ + int i; + + dp->meters = kmalloc_array(METER_HASH_BUCKETS, + sizeof(struct hlist_head), GFP_KERNEL); + + if (!dp->meters) + return -ENOMEM; + + for (i = 0; i < METER_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->meters[i]); + + return 0; +} + +void ovs_meters_exit(struct datapath *dp) +{ + int i; + + for (i = 0; i < METER_HASH_BUCKETS; i++) { + struct hlist_head *head = &dp->meters[i]; + struct dp_meter *meter; + struct hlist_node *n; + + hlist_for_each_entry_safe(meter, n, head, dp_hash_node) + kfree(meter); + } + + kfree(dp->meters); +} diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h new file mode 100644 index 000000000000..964ace2650f8 --- /dev/null +++ b/net/openvswitch/meter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#ifndef METER_H +#define METER_H 1 + +#include +#include +#include +#include +#include +#include +#include + +#include "flow.h" +struct datapath; + +#define DP_MAX_BANDS 1 + +struct dp_meter_band { + u32 type; + u32 rate; + u32 burst_size; + u32 bucket; /* 1/1000 packets, or in bits */ + struct ovs_flow_stats stats; +}; + +struct dp_meter { + spinlock_t lock; /* Per meter lock */ + struct rcu_head rcu; + struct hlist_node dp_hash_node; /*Element in datapath->meters + * hash table. + */ + u32 id; + u16 kbps:1, keep_stats:1; + u16 n_bands; + u32 max_delta_t; + u64 used; + struct ovs_flow_stats stats; + struct dp_meter_band bands[]; +}; + +extern struct genl_family dp_meter_genl_family; +int ovs_meters_init(struct datapath *dp); +void ovs_meters_exit(struct datapath *dp); +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id); + +#endif /* meter.h */ From cd8a6c33693c1b89d2737ffdbf9611564e9ac907 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:43 -0800 Subject: [PATCH 2103/2177] openvswitch: Add meter action support Implements OVS kernel meter action support. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 3 +++ net/openvswitch/actions.c | 6 ++++++ net/openvswitch/datapath.h | 1 + net/openvswitch/flow_netlink.c | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d60b9a4cf3d1..4265d7f9e1f2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -838,6 +838,8 @@ struct ovs_action_push_eth { * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet. * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet. * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet. + * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the + * packet, or modify the packet (e.g., change the DSCP field). * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -870,6 +872,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ OVS_ACTION_ATTR_POP_NSH, /* No argument. */ + OVS_ACTION_ATTR_METER, /* u32 meter ID. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 9a6a6d51e421..30a5df27116e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1330,6 +1330,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_NSH: err = pop_nsh(skb, key); break; + + case OVS_ACTION_ATTR_METER: + if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { + consume_skb(skb); + return 0; + } } if (unlikely(err)) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 5d2997b42460..523d65526766 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -30,6 +30,7 @@ #include "conntrack.h" #include "flow.h" #include "flow_table.h" +#include "meter.h" #include "vport-internal_dev.h" #define DP_MAX_PORTS USHRT_MAX diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4201f9293af3..bb4dae198c78 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -90,6 +90,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_METER: default: return true; } @@ -2844,6 +2845,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_POP_ETH] = 0, [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, [OVS_ACTION_ATTR_POP_NSH] = 0, + [OVS_ACTION_ATTR_METER] = sizeof(u32), }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3029,6 +3031,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_METER: + /* Non-existent meters are simply ignored. */ + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; From 0d728b844c2dd8dd3875ed304eee43967c5d14f6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 10 Nov 2017 21:10:00 -0500 Subject: [PATCH 2104/2177] forcedeth: remove redudant assignments in xmit In xmit process, the variables are set many times. In fact, it is enough for these variables to be set once. After a long time test, the throughput performance is better than before. CC: Srinivas Eeda CC: Joe Jin CC: Junxiao Bi Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 28 ++++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 31a943860f32..ac8439ceea10 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2226,8 +2226,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* setup the header buffer */ do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, @@ -2262,8 +2260,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) offset = 0; do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; if (!start_tx_ctx) start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; @@ -2304,6 +2300,16 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) } while (frag_size); } + if (unlikely(put_tx == np->first_tx.orig)) + prev_tx = np->last_tx.orig; + else + prev_tx = put_tx - 1; + + if (unlikely(np->put_tx_ctx == np->first_tx_ctx)) + prev_tx_ctx = np->last_tx_ctx; + else + prev_tx_ctx = np->put_tx_ctx - 1; + /* set last fragment flag */ prev_tx->flaglen |= cpu_to_le32(tx_flags_extra); @@ -2377,8 +2383,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* setup the header buffer */ do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, @@ -2414,8 +2418,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, offset = 0; do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size; if (!start_tx_ctx) start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; @@ -2456,6 +2458,16 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, } while (frag_size); } + if (unlikely(put_tx == np->first_tx.ex)) + prev_tx = np->last_tx.ex; + else + prev_tx = put_tx - 1; + + if (unlikely(np->put_tx_ctx == np->first_tx_ctx)) + prev_tx_ctx = np->last_tx_ctx; + else + prev_tx_ctx = np->put_tx_ctx - 1; + /* set last fragment flag */ prev_tx->flaglen |= cpu_to_le32(NV_TX2_LASTPACKET); From 929fc0327569aa745c9c3cb68a213c22fad3f3f9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:06:49 +0800 Subject: [PATCH 2105/2177] ip6_gre: add the process for redirect in ip6gre_err This patch is to add redirect icmp packet process for ip6gre by calling ip6_redirect() in ip6gre_err(), as in vti6_err. Prior to this patch, there's even no route cache generated after receiving redirect. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3e10c51e7e0c..0684d0ccaaa5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); const struct gre_base_hdr *greh; const struct ipv6hdr *ipv6h; int grehlen = sizeof(*greh); @@ -442,6 +443,10 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; return; + case NDISC_REDIRECT: + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + return; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) From fe1a4ca0a2b7884672661284666a0b8c183b0b1e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:06:50 +0800 Subject: [PATCH 2106/2177] ip6_gre: process toobig in a better way Now ip6gre processes toobig icmp packet by setting gre dev's mtu in ip6gre_err, which would cause few things not good: - It couldn't set mtu with dev_set_mtu due to it's not in user context, which causes route cache and idev->cnf.mtu6 not to be updated. - It has to update sk dst pmtu in tx path according to gredev->mtu for ip6gre, while it updates pmtu again according to lower dst pmtu in ip6_tnl_xmit. - To change dev->mtu by toobig icmp packet is not a good idea, it should only work on pmtu. This patch is to process toobig by updating the lower dst's pmtu, as later sk dst pmtu will be updated in ip6_tnl_xmit, the same way as in ip4gre. Note that gre dev's mtu will not be updated any more, it doesn't make any sense to change dev's mtu after receiving a toobig packet. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0684d0ccaaa5..b90bad7a4e56 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -403,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; switch (type) { - __u32 teli; struct ipv6_tlv_tnl_enc_lim *tel; - __u32 mtu; + __u32 teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); @@ -436,12 +435,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } return; case ICMPV6_PKT_TOOBIG: - mtu = be32_to_cpu(info) - offset - t->tun_hlen; - if (t->dev->type == ARPHRD_ETHER) - mtu -= ETH_HLEN; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - t->dev->mtu = mtu; + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); return; case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, @@ -508,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - struct dst_entry *dst = skb_dst(skb); __be16 protocol; if (dev->type == ARPHRD_ETHER) @@ -527,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - /* TooBig packet may have updated dst->dev's mtu */ - if (dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); - return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } From 383c1f88759bba7f387b7b705c31081e5df40b38 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:06:51 +0800 Subject: [PATCH 2107/2177] ip6_tunnel: add the process for redirect in ip6_tnl_err The same process for redirect in "ip6_gre: add the process for redirect in ip6gre_err" is needed by ip4ip6 and ip6ip6 as well. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 439d65f7e094..a1f704c799d9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -471,15 +471,16 @@ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; - struct ip6_tnl *t; - int rel_msg = 0; + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + struct net *net = dev_net(skb->dev); u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; - u8 tproto; __u32 rel_info = 0; - __u16 len; + struct ip6_tnl *t; int err = -ENOENT; + int rel_msg = 0; + u8 tproto; + __u16 len; /* If the packet doesn't contain the original IPv6 header we are in trouble since we might need the source address for further @@ -543,6 +544,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rel_msg = 1; } break; + case NDISC_REDIRECT: + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + break; } *type = rel_type; From b00f543240b9423eda73ad06c57becdd5478bc26 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:06:52 +0800 Subject: [PATCH 2108/2177] ip6_tunnel: process toobig in a better way The same improvement in "ip6_gre: process toobig in a better way" is needed by ip4ip6 and ip6ip6 as well. Note that ip4ip6 and ip6ip6 will also update sk dst pmtu in their err_handlers. Like I said before, gre6 could not do this as it's inner proto is not certain. But for all of them, sk dst pmtu will be updated in tx path if in need. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a1f704c799d9..7e9e205b69d9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -498,9 +498,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, err = 0; switch (*type) { - __u32 teli; struct ipv6_tlv_tnl_enc_lim *tel; - __u32 mtu; + __u32 mtu, teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); @@ -531,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: + ip6_update_pmtu(skb, net, htonl(*info), 0, 0, + sock_net_uid(net, NULL)); mtu = *info - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - t->dev->mtu = mtu; - len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; From 77552cfa39c48e695c39d0553afc8c6018e411ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:06:53 +0800 Subject: [PATCH 2109/2177] ip6_tunnel: clean up ip4ip6 and ip6ip6's err_handlers This patch is to remove some useless codes of redirect and fix some indents on ip4ip6 and ip6ip6's err_handlers. Note that redirect icmp packet is already processed in ip6_tnl_err, the old redirect codes in ip4ip6_err actually never worked even before this patch. Besides, there's no need to send redirect to user's sk, it's for lower dst, so just remove it in this patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7e9e205b69d9..00882fdb1223 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -563,13 +563,12 @@ static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - int rel_msg = 0; + __u32 rel_info = ntohl(info); + const struct iphdr *eiph; + struct sk_buff *skb2; + int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; - __u32 rel_info = ntohl(info); - int err; - struct sk_buff *skb2; - const struct iphdr *eiph; struct rtable *rt; struct flowi4 fl4; @@ -594,10 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; - case NDISC_REDIRECT: - rel_type = ICMP_REDIRECT; - rel_code = ICMP_REDIR_HOST; - /* fall through */ default: return 0; } @@ -616,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, eiph = ip_hdr(skb2); /* Try to guess incoming interface */ - rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, - eiph->saddr, 0, - 0, 0, - IPPROTO_IPIP, RT_TOS(eiph->tos), 0); + rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, + 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; + ip_rt_put(rt); /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, - eiph->daddr, eiph->saddr, - 0, 0, - IPPROTO_IPIP, - RT_TOS(eiph->tos), 0); - if (IS_ERR(rt) || - rt->dst.dev->type != ARPHRD_TUNNEL) { + eiph->daddr, eiph->saddr, 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; } skb_dst_set(skb2, &rt->dst); } else { - ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) @@ -654,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, + rel_info); } - if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -670,11 +657,10 @@ static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - int rel_msg = 0; + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; - __u32 rel_info = ntohl(info); - int err; err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); From ee0ab7a2b00fdb65e50ed9e252900b29c8fd802f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 11 Nov 2017 16:29:41 +0100 Subject: [PATCH 2110/2177] net: dsa: Fix dependencies on bridge DSA now uses one of the symbols exported by the bridge, br_vlan_enabled(). This has a stub, if the bridge is not enabled. However, if the bridge is enabled, we cannot have DSA built in and the bridge as a module, otherwise we get undefined symbols at link time: net/dsa/port.o: In function `dsa_port_vlan_add': net/dsa/port.c:255: undefined reference to `br_vlan_enabled' net/dsa/port.o: In function `dsa_port_vlan_del': net/dsa/port.c:270: undefined reference to `br_vlan_enabled' Reported-by: kbuild test robot Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 2fed892094bc..03c3bdf25468 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -7,6 +7,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" depends on HAVE_NET_DSA && MAY_USE_DEVLINK + depends on BRIDGE || BRIDGE=n select NET_SWITCHDEV select PHYLIB ---help--- From 663faeab5c7e205160f9dbeb9a699c5dbdb78ce2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:26:53 +0300 Subject: [PATCH 2111/2177] af_key: replace BUG_ON on WARN_ON in net_exit hook Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index a00d607e7224..3dffb892d52c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3845,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net) struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); - BUG_ON(!hlist_empty(&net_pfkey->table)); + WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { From ab384b63c76679712edf2578930a238329aa41a9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:27:19 +0300 Subject: [PATCH 2112/2177] geneve: exit_net cleanup check added Be sure that sock_list initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- drivers/net/geneve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 5ec39f113127..688906aad19c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1654,6 +1654,7 @@ static void __net_exit geneve_exit_net(struct net *net) /* unregister the devices gathered above */ unregister_netdevice_many(&list); rtnl_unlock(); + WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { From 669f8f1a5c6849d4522de4038c8d1a3e09fcc4ce Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:27:49 +0300 Subject: [PATCH 2113/2177] packet: exit_net cleanup check added Be sure that packet.sklist initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9603f6ff17a4..737092ca9b4e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4560,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net) static void __net_exit packet_net_exit(struct net *net) { remove_proc_entry("packet", net->proc_net); + WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); } static struct pernet_operations packet_net_ops = { From 0e4ec5acad8b9dde5a42c37ae3499f7d0f230e75 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:28:10 +0300 Subject: [PATCH 2114/2177] vxlan: exit_net cleanup checks added Be sure that sock_list array initialized in net_init hook was return to initial state Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d7c49cf1d5e9..c02d85651eba 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3704,6 +3704,7 @@ static void __net_exit vxlan_exit_net(struct net *net) struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan, *next; struct net_device *dev, *aux; + unsigned int h; LIST_HEAD(list); rtnl_lock(); @@ -3723,6 +3724,9 @@ static void __net_exit vxlan_exit_net(struct net *net) unregister_netdevice_many(&list); rtnl_unlock(); + + for (h = 0; h < PORT_HASH_SIZE; ++h) + WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); } static struct pernet_operations vxlan_net_ops = { From ee21b18b6bdbb17a6e0b26255d2f662baf0d8409 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:28:46 +0300 Subject: [PATCH 2115/2177] netdev: exit_net cleanup check added Be sure that dev_base_head list initialized in net_init hook was return to initial state Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 30b5fe32c525..658337bf33e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8667,6 +8667,8 @@ static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); kfree(net->dev_index_head); + if (net != &init_net) + WARN_ON_ONCE(!list_empty(&net->dev_base_head)); } static struct pernet_operations __net_initdata netdev_net_ops = { From 0b6f59553544f47caa940c01015ac1f1113f046a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:29:33 +0300 Subject: [PATCH 2116/2177] fib_notifier: exit_net cleanup check added Be sure that fib_notifier_ops list initilized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/core/fib_notifier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 4fc202dbdfb6..6b8cd494574f 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -161,8 +161,14 @@ static int __net_init fib_notifier_net_init(struct net *net) return 0; } +static void __net_exit fib_notifier_net_exit(struct net *net) +{ + WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops)); +} + static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, + .exit = fib_notifier_net_exit, }; static int __init fib_notifier_init(void) From ce2b7db38af1ffefa6b04b5113b4d69ad36d38ba Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:30:01 +0300 Subject: [PATCH 2117/2177] fib_rules: exit_net cleanup check added Be sure that rules_ops list initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/core/fib_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index fafd0a41e3f7..98e1066c3d55 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1022,8 +1022,14 @@ static int __net_init fib_rules_net_init(struct net *net) return 0; } +static void __net_exit fib_rules_net_exit(struct net *net) +{ + WARN_ON_ONCE(!list_empty(&net->rules_ops)); +} + static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, + .exit = fib_rules_net_exit, }; static int __init fib_rules_init(void) From 1e7af3b2cdf2dc9e4fd86c799414b3c504975270 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:30:31 +0300 Subject: [PATCH 2118/2177] l2tp: exit_net cleanup check added Be sure that l2tp_session_hlist array initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 350fcd39ebd8..115918ad8eca 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1833,6 +1833,7 @@ static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; + int hash; rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { @@ -1842,6 +1843,9 @@ static __net_exit void l2tp_exit_net(struct net *net) flush_workqueue(l2tp_wq); rcu_barrier(); + + for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) + WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); } static struct pernet_operations l2tp_net_ops = { From ae61e8cd061d6ce9a2e2a13e081e7d936139e01e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:32:47 +0300 Subject: [PATCH 2119/2177] phonet: exit_net cleanup check added Be sure that pndevs.list initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 2cb4c5dfad6f..77787512fc32 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { + struct phonet_net *pnn = phonet_pernet(net); + remove_proc_entry("phonet", net->proc_net); + WARN_ON_ONCE(!list_empty(&pnn->pndevs.list)); } static struct pernet_operations phonet_net_ops = { From e6675000f9a404f7651724c0b2e2e71f7247d3a1 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:33:22 +0300 Subject: [PATCH 2120/2177] ppp: exit_net cleanup checks added Be sure that lists initialized in net_init hook were return to initial state. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 44891335f9af..d8e5747ff4e3 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -962,6 +962,8 @@ static __net_exit void ppp_exit_net(struct net *net) mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); + WARN_ON_ONCE(!list_empty(&pn->all_channels)); + WARN_ON_ONCE(!list_empty(&pn->new_channels)); } static struct pernet_operations ppp_net_ops = { From baeb0dbbb56598a2ccd98b56e0da3e9d22869112 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 22:34:03 +0300 Subject: [PATCH 2121/2177] xfrm6_tunnel: exit_net cleanup check added Be sure that spi_byaddr and spi_byspi arrays initialized in net_init hook were return to initial state Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv6/xfrm6_tunnel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 4e438bc7ee87..f85f0d7480ac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net) static void __net_exit xfrm6_tunnel_net_exit(struct net *net) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); + unsigned int i; + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i])); } static struct pernet_operations xfrm6_tunnel_net_ops = { From 8bff3685a4bbf175a96bc6a528f13455d8d38244 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 11 Nov 2017 19:58:50 +0800 Subject: [PATCH 2122/2177] vxlan: fix the issue that neigh proxy blocks all icmpv6 packets Commit f1fb08f6337c ("vxlan: fix ND proxy when skb doesn't have transport header offset") removed icmp6_code and icmp6_type check before calling neigh_reduce when doing neigh proxy. It means all icmpv6 packets would be blocked by this, not only ns packet. In Jianlin's env, even ping6 couldn't work through it. This patch is to bring the icmp6_code and icmp6_type check back and also removed the same check from neigh_reduce(). Fixes: f1fb08f6337c ("vxlan: fix ND proxy when skb doesn't have transport header offset") Reported-by: Jianlin Shi Signed-off-by: Xin Long Reviewed-by: Vincent Bernat Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c02d85651eba..c437707a8549 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1623,26 +1623,19 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct nd_msg *msg; - const struct ipv6hdr *iphdr; const struct in6_addr *daddr; - struct neighbour *n; + const struct ipv6hdr *iphdr; struct inet6_dev *in6_dev; + struct neighbour *n; + struct nd_msg *msg; in6_dev = __in6_dev_get(dev); if (!in6_dev) goto out; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - goto out; - iphdr = ipv6_hdr(skb); daddr = &iphdr->daddr; - msg = (struct nd_msg *)(iphdr + 1); - if (msg->icmph.icmp6_code != 0 || - msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - goto out; if (ipv6_addr_loopback(daddr) || ipv6_addr_is_multicast(&msg->target)) @@ -2240,11 +2233,11 @@ tx_error: static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - const struct ip_tunnel_info *info; - struct ethhdr *eth; - bool did_rsc = false; struct vxlan_rdst *rdst, *fdst = NULL; + const struct ip_tunnel_info *info; + bool did_rsc = false; struct vxlan_fdb *f; + struct ethhdr *eth; __be32 vni = 0; info = skb_tunnel_info(skb); @@ -2269,12 +2262,14 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (ntohs(eth->h_proto) == ETH_P_ARP) return arp_reduce(dev, skb, vni); #if IS_ENABLED(CONFIG_IPV6) - else if (ntohs(eth->h_proto) == ETH_P_IPV6) { - struct ipv6hdr *hdr, _hdr; - if ((hdr = skb_header_pointer(skb, - skb_network_offset(skb), - sizeof(_hdr), &_hdr)) && - hdr->nexthdr == IPPROTO_ICMPV6) + else if (ntohs(eth->h_proto) == ETH_P_IPV6 && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1); + + if (m->icmph.icmp6_code == 0 && + m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) return neigh_reduce(dev, skb, vni); } #endif From 03e98b9118bed1960993466f4d64f9f5a9146b66 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Sat, 11 Nov 2017 19:48:15 +0530 Subject: [PATCH 2123/2177] cxgb4: collect LE-TCAM dump Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 30 +++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 175 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 7 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 7 + drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 41 ++++ 6 files changed, 261 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 1de1d811fde3..f99db7b283fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -185,6 +185,36 @@ struct cudbg_vpd_data { u32 vpd_vers; }; +#define CUDBG_MAX_TCAM_TID 0x800 + +enum cudbg_le_entry_types { + LE_ET_UNKNOWN = 0, + LE_ET_TCAM_CON = 1, + LE_ET_TCAM_SERVER = 2, + LE_ET_TCAM_FILTER = 3, + LE_ET_TCAM_CLIP = 4, + LE_ET_TCAM_ROUTING = 5, + LE_ET_HASH_CON = 6, + LE_ET_INVALID_TID = 8, +}; + +struct cudbg_tcam { + u32 filter_start; + u32 server_start; + u32 clip_start; + u32 routing_start; + u32 tid_hash_base; + u32 max_tid; +}; + +struct cudbg_tid_data { + u32 tid; + u32 dbig_cmd; + u32 dbig_conf; + u32 dbig_rsp_stat; + u32 data[NUM_LE_DB_DBGI_RSP_DATA_INSTANCES]; +}; + #define CUDBG_NUM_ULPTX 11 #define CUDBG_NUM_ULPTX_READ 512 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index e484c514e9ae..4e5d189eae62 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -65,6 +65,7 @@ enum cudbg_dbg_entity_type { CUDBG_TID_INFO = 54, CUDBG_MPS_TCAM = 57, CUDBG_VPD_DATA = 58, + CUDBG_LE_TCAM = 59, CUDBG_CCTRL = 60, CUDBG_MA_INDIRECT = 61, CUDBG_ULPTX_LA = 62, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 32c9858da110..dd7e26be98cf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1367,6 +1367,181 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, return rc; } +static int cudbg_read_tid(struct cudbg_init *pdbg_init, u32 tid, + struct cudbg_tid_data *tid_data) +{ + struct adapter *padap = pdbg_init->adap; + int i, cmd_retry = 8; + u32 val; + + /* Fill REQ_DATA regs with 0's */ + for (i = 0; i < NUM_LE_DB_DBGI_REQ_DATA_INSTANCES; i++) + t4_write_reg(padap, LE_DB_DBGI_REQ_DATA_A + (i << 2), 0); + + /* Write DBIG command */ + val = DBGICMD_V(4) | DBGITID_V(tid); + t4_write_reg(padap, LE_DB_DBGI_REQ_TCAM_CMD_A, val); + tid_data->dbig_cmd = val; + + val = DBGICMDSTRT_F | DBGICMDMODE_V(1); /* LE mode */ + t4_write_reg(padap, LE_DB_DBGI_CONFIG_A, val); + tid_data->dbig_conf = val; + + /* Poll the DBGICMDBUSY bit */ + val = 1; + while (val) { + val = t4_read_reg(padap, LE_DB_DBGI_CONFIG_A); + val = val & DBGICMDBUSY_F; + cmd_retry--; + if (!cmd_retry) + return CUDBG_SYSTEM_ERROR; + } + + /* Check RESP status */ + val = t4_read_reg(padap, LE_DB_DBGI_RSP_STATUS_A); + tid_data->dbig_rsp_stat = val; + if (!(val & 1)) + return CUDBG_SYSTEM_ERROR; + + /* Read RESP data */ + for (i = 0; i < NUM_LE_DB_DBGI_RSP_DATA_INSTANCES; i++) + tid_data->data[i] = t4_read_reg(padap, + LE_DB_DBGI_RSP_DATA_A + + (i << 2)); + tid_data->tid = tid; + return 0; +} + +static int cudbg_get_le_type(u32 tid, struct cudbg_tcam tcam_region) +{ + int type = LE_ET_UNKNOWN; + + if (tid < tcam_region.server_start) + type = LE_ET_TCAM_CON; + else if (tid < tcam_region.filter_start) + type = LE_ET_TCAM_SERVER; + else if (tid < tcam_region.clip_start) + type = LE_ET_TCAM_FILTER; + else if (tid < tcam_region.routing_start) + type = LE_ET_TCAM_CLIP; + else if (tid < tcam_region.tid_hash_base) + type = LE_ET_TCAM_ROUTING; + else if (tid < tcam_region.max_tid) + type = LE_ET_HASH_CON; + else + type = LE_ET_INVALID_TID; + + return type; +} + +static int cudbg_is_ipv6_entry(struct cudbg_tid_data *tid_data, + struct cudbg_tcam tcam_region) +{ + int ipv6 = 0; + int le_type; + + le_type = cudbg_get_le_type(tid_data->tid, tcam_region); + if (tid_data->tid & 1) + return 0; + + if (le_type == LE_ET_HASH_CON) { + ipv6 = tid_data->data[16] & 0x8000; + } else if (le_type == LE_ET_TCAM_CON) { + ipv6 = tid_data->data[16] & 0x8000; + if (ipv6) + ipv6 = tid_data->data[9] == 0x00C00000; + } else { + ipv6 = 0; + } + return ipv6; +} + +void cudbg_fill_le_tcam_info(struct adapter *padap, + struct cudbg_tcam *tcam_region) +{ + u32 value; + + /* Get the LE regions */ + value = t4_read_reg(padap, LE_DB_TID_HASHBASE_A); /* hash base index */ + tcam_region->tid_hash_base = value; + + /* Get routing table index */ + value = t4_read_reg(padap, LE_DB_ROUTING_TABLE_INDEX_A); + tcam_region->routing_start = value; + + /*Get clip table index */ + value = t4_read_reg(padap, LE_DB_CLIP_TABLE_INDEX_A); + tcam_region->clip_start = value; + + /* Get filter table index */ + value = t4_read_reg(padap, LE_DB_FILTER_TABLE_INDEX_A); + tcam_region->filter_start = value; + + /* Get server table index */ + value = t4_read_reg(padap, LE_DB_SERVER_INDEX_A); + tcam_region->server_start = value; + + /* Check whether hash is enabled and calculate the max tids */ + value = t4_read_reg(padap, LE_DB_CONFIG_A); + if ((value >> HASHEN_S) & 1) { + value = t4_read_reg(padap, LE_DB_HASH_CONFIG_A); + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + tcam_region->max_tid = (value & 0xFFFFF) + + tcam_region->tid_hash_base; + } else { + value = HASHTIDSIZE_G(value); + value = 1 << value; + tcam_region->max_tid = value + + tcam_region->tid_hash_base; + } + } else { /* hash not enabled */ + tcam_region->max_tid = CUDBG_MAX_TCAM_TID; + } +} + +int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tcam tcam_region = { 0 }; + struct cudbg_tid_data *tid_data; + u32 bytes = 0; + int rc, size; + u32 i; + + cudbg_fill_le_tcam_info(padap, &tcam_region); + + size = sizeof(struct cudbg_tid_data) * tcam_region.max_tid; + size += sizeof(struct cudbg_tcam); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + memcpy(temp_buff.data, &tcam_region, sizeof(struct cudbg_tcam)); + bytes = sizeof(struct cudbg_tcam); + tid_data = (struct cudbg_tid_data *)(temp_buff.data + bytes); + /* read all tid */ + for (i = 0; i < tcam_region.max_tid; ) { + rc = cudbg_read_tid(pdbg_init, i, tid_data); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + /* ipv6 takes two tids */ + cudbg_is_ipv6_entry(tid_data, tcam_region) ? i += 2 : i++; + + tid_data++; + bytes += sizeof(struct cudbg_tid_data); + } + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index 230ba88a6a81..ebb2d9907fc9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -129,6 +129,9 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -155,4 +158,8 @@ struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, struct cudbg_entity_hdr *entity_hdr); u32 cudbg_cim_obq_size(struct adapter *padap, int qid); + +struct cudbg_tcam; +void cudbg_fill_le_tcam_info(struct adapter *padap, + struct cudbg_tcam *tcam_region); #endif /* __CUDBG_LIB_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 7373617da635..05eb2d2ef592 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -62,6 +62,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_TID_INFO, cudbg_collect_tid }, { CUDBG_MPS_TCAM, cudbg_collect_mps_tcam }, { CUDBG_VPD_DATA, cudbg_collect_vpd_data }, + { CUDBG_LE_TCAM, cudbg_collect_le_tcam }, { CUDBG_CCTRL, cudbg_collect_cctrl }, { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, @@ -72,6 +73,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) { + struct cudbg_tcam tcam_region = { 0 }; u32 value, n = 0, len = 0; switch (entity) { @@ -223,6 +225,11 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_VPD_DATA: len = sizeof(struct cudbg_vpd_data); break; + case CUDBG_LE_TCAM: + cudbg_fill_le_tcam_info(adap, &tcam_region); + len = sizeof(struct cudbg_tcam) + + sizeof(struct cudbg_tid_data) * tcam_region.max_tid; + break; case CUDBG_CCTRL: len = sizeof(u16) * NMTUS * NCCTRL_WIN; break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 623f453bd327..f5576ce004fa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -65,6 +65,9 @@ #define PCIE_FW_REG(reg_addr, idx) ((reg_addr) + (idx) * 4) +#define NUM_LE_DB_DBGI_REQ_DATA_INSTANCES 17 +#define NUM_LE_DB_DBGI_RSP_DATA_INSTANCES 17 + #define SGE_PF_KDOORBELL_A 0x0 #define QID_S 15 @@ -2273,6 +2276,35 @@ #define CHNENABLE_V(x) ((x) << CHNENABLE_S) #define CHNENABLE_F CHNENABLE_V(1U) +#define LE_DB_DBGI_CONFIG_A 0x19cf0 + +#define DBGICMDBUSY_S 3 +#define DBGICMDBUSY_V(x) ((x) << DBGICMDBUSY_S) +#define DBGICMDBUSY_F DBGICMDBUSY_V(1U) + +#define DBGICMDSTRT_S 2 +#define DBGICMDSTRT_V(x) ((x) << DBGICMDSTRT_S) +#define DBGICMDSTRT_F DBGICMDSTRT_V(1U) + +#define DBGICMDMODE_S 0 +#define DBGICMDMODE_M 0x3U +#define DBGICMDMODE_V(x) ((x) << DBGICMDMODE_S) + +#define LE_DB_DBGI_REQ_TCAM_CMD_A 0x19cf4 + +#define DBGICMD_S 20 +#define DBGICMD_M 0xfU +#define DBGICMD_V(x) ((x) << DBGICMD_S) + +#define DBGITID_S 0 +#define DBGITID_M 0xfffffU +#define DBGITID_V(x) ((x) << DBGITID_S) + +#define LE_DB_DBGI_REQ_DATA_A 0x19d00 +#define LE_DB_DBGI_RSP_STATUS_A 0x19d94 + +#define LE_DB_DBGI_RSP_DATA_A 0x19da0 + #define PRTENABLE_S 29 #define PRTENABLE_V(x) ((x) << PRTENABLE_S) #define PRTENABLE_F PRTENABLE_V(1U) @@ -2882,11 +2914,20 @@ #define T6_LIPMISS_F T6_LIPMISS_V(1U) #define LE_DB_CONFIG_A 0x19c04 +#define LE_DB_ROUTING_TABLE_INDEX_A 0x19c10 #define LE_DB_ACTIVE_TABLE_START_INDEX_A 0x19c10 +#define LE_DB_FILTER_TABLE_INDEX_A 0x19c14 #define LE_DB_SERVER_INDEX_A 0x19c18 #define LE_DB_SRVR_START_INDEX_A 0x19c18 +#define LE_DB_CLIP_TABLE_INDEX_A 0x19c1c #define LE_DB_ACT_CNT_IPV4_A 0x19c20 #define LE_DB_ACT_CNT_IPV6_A 0x19c24 +#define LE_DB_HASH_CONFIG_A 0x19c28 + +#define HASHTIDSIZE_S 16 +#define HASHTIDSIZE_M 0x3fU +#define HASHTIDSIZE_G(x) (((x) >> HASHTIDSIZE_S) & HASHTIDSIZE_M) + #define LE_DB_HASH_TID_BASE_A 0x19c30 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30 #define LE_DB_INT_CAUSE_A 0x19c3c From 9e5c598c720792e210f83964441ee1c99451e8d1 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Sat, 11 Nov 2017 19:48:16 +0530 Subject: [PATCH 2124/2177] cxgb4: collect SGE queue context dump Collect SGE freelist queue and congestion manager contexts. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_entity.h | 8 ++ drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h | 1 + .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 78 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cudbg_lib.h | 4 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 + .../net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 4 + drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 62 +++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 7 ++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 27 +++++++ 9 files changed, 195 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index f99db7b283fc..605689957496 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -145,6 +145,14 @@ struct cudbg_tid_info_region_rev1 { u32 reserved[16]; }; +#define CUDBG_MAX_FL_QIDS 1024 + +struct cudbg_ch_cntxt { + u32 cntxt_type; + u32 cntxt_id; + u32 data[SGE_CTXT_SIZE / 4]; +}; + #define CUDBG_MAX_RPLC_SIZE 128 struct cudbg_mps_tcam { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 4e5d189eae62..e10ff1ee62c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -63,6 +63,7 @@ enum cudbg_dbg_entity_type { CUDBG_PCIE_INDIRECT = 50, CUDBG_PM_INDIRECT = 51, CUDBG_TID_INFO = 54, + CUDBG_DUMP_CONTEXT = 56, CUDBG_MPS_TCAM = 57, CUDBG_VPD_DATA = 58, CUDBG_LE_TCAM = 59, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index dd7e26be98cf..d699bf88d18f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1115,6 +1115,84 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, return rc; } +int cudbg_dump_context_size(struct adapter *padap) +{ + u32 value, size; + u8 flq; + + value = t4_read_reg(padap, SGE_FLM_CFG_A); + + /* Get number of data freelist queues */ + flq = HDRSTARTFLQ_G(value); + size = CUDBG_MAX_FL_QIDS >> flq; + + /* Add extra space for congestion manager contexts. + * The number of CONM contexts are same as number of freelist + * queues. + */ + size += size; + return size * sizeof(struct cudbg_ch_cntxt); +} + +static void cudbg_read_sge_ctxt(struct cudbg_init *pdbg_init, u32 cid, + enum ctxt_type ctype, u32 *data) +{ + struct adapter *padap = pdbg_init->adap; + int rc = -1; + + /* Under heavy traffic, the SGE Queue contexts registers will be + * frequently accessed by firmware. + * + * To avoid conflicts with firmware, always ask firmware to fetch + * the SGE Queue contexts via mailbox. On failure, fallback to + * accessing hardware registers directly. + */ + if (is_fw_attached(pdbg_init)) + rc = t4_sge_ctxt_rd(padap, padap->mbox, cid, ctype, data); + if (rc) + t4_sge_ctxt_rd_bd(padap, cid, ctype, data); +} + +int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ch_cntxt *buff; + u32 size, i = 0; + int rc; + + rc = cudbg_dump_context_size(padap); + if (rc <= 0) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + size = rc; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + buff = (struct cudbg_ch_cntxt *)temp_buff.data; + while (size > 0) { + buff->cntxt_type = CTXT_FLM; + buff->cntxt_id = i; + cudbg_read_sge_ctxt(pdbg_init, i, CTXT_FLM, buff->data); + buff++; + size -= sizeof(struct cudbg_ch_cntxt); + + buff->cntxt_type = CTXT_CNM; + buff->cntxt_id = i; + cudbg_read_sge_ctxt(pdbg_init, i, CTXT_CNM, buff->data); + buff++; + size -= sizeof(struct cudbg_ch_cntxt); + + i++; + } + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + static inline void cudbg_tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) { *mask = x | y; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h index ebb2d9907fc9..caeee8e33e86 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -123,6 +123,9 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, int cudbg_collect_tid(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); +int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); @@ -158,6 +161,7 @@ struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, struct cudbg_entity_hdr *entity_hdr); u32 cudbg_cim_obq_size(struct adapter *padap, int qid); +int cudbg_dump_context_size(struct adapter *padap); struct cudbg_tcam; void cudbg_fill_le_tcam_info(struct adapter *padap, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0de1a4b2223e..6f9fa6e3c42a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1670,6 +1670,10 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]); void t4_get_tx_sched(struct adapter *adap, unsigned int sched, unsigned int *kbps, unsigned int *ipg, bool sleep_ok); +int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid, + enum ctxt_type ctype, u32 *data); +int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, + enum ctxt_type ctype, u32 *data); int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 05eb2d2ef592..29cc625e9833 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -60,6 +60,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, { CUDBG_TID_INFO, cudbg_collect_tid }, + { CUDBG_DUMP_CONTEXT, cudbg_collect_dump_context }, { CUDBG_MPS_TCAM, cudbg_collect_mps_tcam }, { CUDBG_VPD_DATA, cudbg_collect_vpd_data }, { CUDBG_LE_TCAM, cudbg_collect_le_tcam }, @@ -218,6 +219,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_TID_INFO: len = sizeof(struct cudbg_tid_info_region_rev1); break; + case CUDBG_DUMP_CONTEXT: + len = cudbg_dump_context_size(adap); + break; case CUDBG_MPS_TCAM: len = sizeof(struct cudbg_mps_tcam) * adap->params.arch.mps_tcam_size; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index b4fad081ac78..f63210f15579 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -9647,6 +9647,68 @@ void t4_get_tx_sched(struct adapter *adap, unsigned int sched, } } +/* t4_sge_ctxt_rd - read an SGE context through FW + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @cid: the context id + * @ctype: the context type + * @data: where to store the context data + * + * Issues a FW command through the given mailbox to read an SGE context. + */ +int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid, + enum ctxt_type ctype, u32 *data) +{ + struct fw_ldst_cmd c; + int ret; + + if (ctype == CTXT_FLM) + ret = FW_LDST_ADDRSPC_SGE_FLMC; + else + ret = FW_LDST_ADDRSPC_SGE_CONMC; + + memset(&c, 0, sizeof(c)); + c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_LDST_CMD_ADDRSPACE_V(ret)); + c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.idctxt.physid = cpu_to_be32(cid); + + ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); + if (ret == 0) { + data[0] = be32_to_cpu(c.u.idctxt.ctxt_data0); + data[1] = be32_to_cpu(c.u.idctxt.ctxt_data1); + data[2] = be32_to_cpu(c.u.idctxt.ctxt_data2); + data[3] = be32_to_cpu(c.u.idctxt.ctxt_data3); + data[4] = be32_to_cpu(c.u.idctxt.ctxt_data4); + data[5] = be32_to_cpu(c.u.idctxt.ctxt_data5); + } + return ret; +} + +/** + * t4_sge_ctxt_rd_bd - read an SGE context bypassing FW + * @adap: the adapter + * @cid: the context id + * @ctype: the context type + * @data: where to store the context data + * + * Reads an SGE context directly, bypassing FW. This is only for + * debugging when FW is unavailable. + */ +int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, + enum ctxt_type ctype, u32 *data) +{ + int i, ret; + + t4_write_reg(adap, SGE_CTXT_CMD_A, CTXTQID_V(cid) | CTXTTYPE_V(ctype)); + ret = t4_wait_op_done(adap, SGE_CTXT_CMD_A, BUSY_F, 0, 3, 1); + if (!ret) + for (i = SGE_CTXT_DATA0_A; i <= SGE_CTXT_DATA5_A; i += 4) + *data++ = t4_read_reg(adap, i); + return ret; +} + int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 7c6af14905c2..a964ed184356 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -68,6 +68,12 @@ enum { ULPRX_LA_SIZE = 512, /* # of 256-bit words in ULP_RX LA */ }; +/* SGE context types */ +enum ctxt_type { + CTXT_FLM = 2, + CTXT_CNM, +}; + enum { SF_PAGE_SIZE = 256, /* serial flash page size */ SF_SEC_SIZE = 64 * 1024, /* serial flash sector size */ @@ -79,6 +85,7 @@ enum { MBOX_OWNER_NONE, MBOX_OWNER_FW, MBOX_OWNER_DRV }; /* mailbox owners */ enum { SGE_MAX_WR_LEN = 512, /* max WR size in bytes */ + SGE_CTXT_SIZE = 24, /* size of SGE context */ SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ SGE_MAX_IQ_SIZE = 65520, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index f5576ce004fa..a7cfece72828 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -153,6 +153,23 @@ #define T6_DBVFIFO_SIZE_M 0x1fffU #define T6_DBVFIFO_SIZE_G(x) (((x) >> T6_DBVFIFO_SIZE_S) & T6_DBVFIFO_SIZE_M) +#define SGE_CTXT_CMD_A 0x11fc + +#define BUSY_S 31 +#define BUSY_V(x) ((x) << BUSY_S) +#define BUSY_F BUSY_V(1U) + +#define CTXTTYPE_S 24 +#define CTXTTYPE_M 0x3U +#define CTXTTYPE_V(x) ((x) << CTXTTYPE_S) + +#define CTXTQID_S 0 +#define CTXTQID_M 0x1ffffU +#define CTXTQID_V(x) ((x) << CTXTQID_S) + +#define SGE_CTXT_DATA0_A 0x1200 +#define SGE_CTXT_DATA5_A 0x1214 + #define GLOBALENABLE_S 0 #define GLOBALENABLE_V(x) ((x) << GLOBALENABLE_S) #define GLOBALENABLE_F GLOBALENABLE_V(1U) @@ -322,6 +339,16 @@ #define SGE_IMSG_CTXT_BADDR_A 0x1088 #define SGE_FLM_CACHE_BADDR_A 0x108c +#define SGE_FLM_CFG_A 0x1090 + +#define NOHDR_S 18 +#define NOHDR_V(x) ((x) << NOHDR_S) +#define NOHDR_F NOHDR_V(1U) + +#define HDRSTARTFLQ_S 11 +#define HDRSTARTFLQ_M 0x7U +#define HDRSTARTFLQ_G(x) (((x) >> HDRSTARTFLQ_S) & HDRSTARTFLQ_M) + #define SGE_INGRESS_RX_THRESHOLD_A 0x10a0 #define THRESHOLD_0_S 24 From 442866ff9743d51957685cecaa722a7fd47b02e2 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 11 Nov 2017 10:42:03 -0500 Subject: [PATCH 2125/2177] bnx2x: fix slowpath null crash When "NETDEV WATCHDOG: em4 (bnx2x): transmit queue 2 timed out" occurs, BNX2X_SP_RTNL_TX_TIMEOUT is set. In the function bnx2x_sp_rtnl_task, bnx2x_nic_unload and bnx2x_nic_load are executed to shutdown and open NIC. In the function bnx2x_nic_load, bnx2x_alloc_mem allocates dma failure. The message "bnx2x: [bnx2x_alloc_mem:8399(em4)]Can't allocate memory" pops out. The variable slowpath is set to NULL. When shutdown the NIC, the function bnx2x_nic_unload is called. In the function bnx2x_nic_unload, the following functions are executed. bnx2x_chip_cleanup bnx2x_set_storm_rx_mode bnx2x_set_q_rx_mode bnx2x_set_q_rx_mode bnx2x_config_rx_mode bnx2x_set_rx_mode_e2 In the function bnx2x_set_rx_mode_e2, the variable slowpath is operated. Then the crash occurs. To fix this crash, the variable slowpath is checked. And in the function bnx2x_sp_rtnl_task, after dma memory allocation fails, another shutdown and open NIC is executed. CC: Joe Jin CC: Junxiao Bi Signed-off-by: Zhu Yanjun Acked-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 54d1571384a0..be9fd7d184d0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9332,7 +9332,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) /* Schedule the rx_mode command */ if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state); - else + else if (bp->slowpath) bnx2x_set_storm_rx_mode(bp); /* Cleanup multicast configuration */ @@ -10271,8 +10271,15 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) smp_mb(); bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); - bnx2x_nic_load(bp, LOAD_NORMAL); - + /* When ret value shows failure of allocation failure, + * the nic is rebooted again. If open still fails, a error + * message to notify the user. + */ + if (bnx2x_nic_load(bp, LOAD_NORMAL) == -ENOMEM) { + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); + if (bnx2x_nic_load(bp, LOAD_NORMAL)) + BNX2X_ERR("Open the NIC fails again!\n"); + } rtnl_unlock(); return; } From 3a9b76fd0db9f0d426533f96a68a62a58753a51e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 11 Nov 2017 15:54:12 -0800 Subject: [PATCH 2126/2177] tcp: allow drivers to tweak TSQ logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I had many reports that TSQ logic breaks wifi aggregation. Current logic is to allow up to 1 ms of bytes to be queued into qdisc and drivers queues. But Wifi aggregation needs a bigger budget to allow bigger rates to be discovered by various TCP Congestion Controls algorithms. This patch adds an extra socket field, allowing wifi drivers to select another log scale to derive TCP Small Queue credit from current pacing rate. Initial value is 10, meaning that this patch does not change current behavior. We expect wifi drivers to set this field to smaller values (tests have been done with values from 6 to 9) They would have to use following template : if (skb->sk && skb->sk->sk_pacing_shift != MY_PACING_SHIFT) skb->sk->sk_pacing_shift = MY_PACING_SHIFT; Ref: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1670041 Signed-off-by: Eric Dumazet Cc: Johannes Berg Cc: Toke Høiland-Jørgensen Cc: Kir Kolyshkin Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 1 + net/ipv4/tcp_output.c | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 688a823dccc3..f8715c5af37d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -267,6 +267,7 @@ struct sock_common { * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments + * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -451,6 +452,7 @@ struct sock { kmemcheck_bitfield_end(flags); u16 sk_gso_max_segs; + u8 sk_pacing_shift; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; diff --git a/net/core/sock.c b/net/core/sock.c index 57bbd6040eb6..13719af7b4e3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2746,6 +2746,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0256f7a41041..76dbe884f246 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1720,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, { u32 bytes, segs; - bytes = min(sk->sk_pacing_rate >> 10, + bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -2198,7 +2198,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, { unsigned int limit; - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); limit = min_t(u32, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; From 9fd29c08e52023252f0480ab8f6906a1ecc9a8d5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 12 Nov 2017 14:49:09 -0800 Subject: [PATCH 2127/2177] bpf: improve verifier ARG_CONST_SIZE_OR_ZERO semantics For helpers, the argument type ARG_CONST_SIZE_OR_ZERO permits the access size to be 0 when accessing the previous argument (arg). Right now, it requires the arg needs to be NULL when size passed is 0 or could be 0. It also requires a non-NULL arg when the size is proved to be non-0. This patch changes verifier ARG_CONST_SIZE_OR_ZERO behavior such that for size-0 or possible size-0, it is not required the arg equal to NULL. There are a couple of reasons for this semantics change, and all of them intends to simplify user bpf programs which may improve user experience and/or increase chances of verifier acceptance. Together with the next patch which changes bpf_probe_read arg2 type from ARG_CONST_SIZE to ARG_CONST_SIZE_OR_ZERO, the following two examples, which fail the verifier currently, are able to get verifier acceptance. Example 1: unsigned long len = pend - pstart; len = len > MAX_PAYLOAD_LEN ? MAX_PAYLOAD_LEN : len; len &= MAX_PAYLOAD_LEN; bpf_probe_read(data->payload, len, pstart); It does not have test for "len > 0" and it failed the verifier. Users may not be aware that they have to add this test. Converting the bpf_probe_read helper to have ARG_CONST_SIZE_OR_ZERO helps the above code get verifier acceptance. Example 2: Here is one example where llvm "messed up" the code and the verifier fails. ...... unsigned long len = pend - pstart; if (len > 0 && len <= MAX_PAYLOAD_LEN) bpf_probe_read(data->payload, len, pstart); ...... The compiler generates the following code and verifier fails: ...... 39: (79) r2 = *(u64 *)(r10 -16) 40: (1f) r2 -= r8 41: (bf) r1 = r2 42: (07) r1 += -1 43: (25) if r1 > 0xffe goto pc+3 R0=inv(id=0) R1=inv(id=0,umax_value=4094,var_off=(0x0; 0xfff)) R2=inv(id=0) R6=map_value(id=0,off=0,ks=4,vs=4095,imm=0) R7=inv(id=0) R8=inv(id=0) R9=inv0 R10=fp0 44: (bf) r1 = r6 45: (bf) r3 = r8 46: (85) call bpf_probe_read#45 R2 min value is negative, either use unsigned or 'var &= const' ...... The compiler optimization is correct. If r1 = 0, r1 - 1 = 0xffffffffffffffff > 0xffe. If r1 != 0, r1 - 1 will not wrap. r1 > 0xffe at insn #43 can actually capture both "r1 > 0" and "len <= MAX_PAYLOAD_LEN". This however causes an issue in verifier as the value range of arg2 "r2" does not properly get refined and lead to verification failure. Relaxing bpf_prog_read arg2 from ARG_CONST_SIZE to ARG_CONST_SIZE_OR_ZERO allows the following simplied code: unsigned long len = pend - pstart; if (len <= MAX_PAYLOAD_LEN) bpf_probe_read(data->payload, len, pstart); The llvm compiler will generate less complex code and the verifier is able to verify that the program is okay. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4a942e2e753d..dd54d20ace2f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -799,12 +799,13 @@ static int check_stack_read(struct bpf_verifier_env *env, /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, - int size) + int size, bool zero_size_allowed) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_map *map = regs[regno].map_ptr; - if (off < 0 || size <= 0 || off + size > map->value_size) { + if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || + off + size > map->value_size) { verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", map->value_size, off, size); return -EACCES; @@ -814,7 +815,7 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, - int off, int size) + int off, int size, bool zero_size_allowed) { struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *reg = &state->regs[regno]; @@ -837,7 +838,8 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, regno); return -EACCES; } - err = __check_map_access(env, regno, reg->smin_value + off, size); + err = __check_map_access(env, regno, reg->smin_value + off, size, + zero_size_allowed); if (err) { verbose(env, "R%d min value is outside of the array range\n", regno); @@ -853,7 +855,8 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, regno); return -EACCES; } - err = __check_map_access(env, regno, reg->umax_value + off, size); + err = __check_map_access(env, regno, reg->umax_value + off, size, + zero_size_allowed); if (err) verbose(env, "R%d max value is outside of the array range\n", regno); @@ -889,12 +892,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, } static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, - int off, int size) + int off, int size, bool zero_size_allowed) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; - if (off < 0 || size <= 0 || (u64)off + size > reg->range) { + if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || + (u64)off + size > reg->range) { verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; @@ -903,7 +907,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, } static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, - int size) + int size, bool zero_size_allowed) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; @@ -922,7 +926,7 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, regno); return -EACCES; } - err = __check_packet_access(env, regno, off, size); + err = __check_packet_access(env, regno, off, size, zero_size_allowed); if (err) { verbose(env, "R%d offset is outside of the packet\n", regno); return err; @@ -1097,7 +1101,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_map_access(env, regno, off, size); + err = check_map_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); @@ -1184,7 +1188,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn value_regno); return -EACCES; } - err = check_packet_access(env, regno, off, size); + err = check_packet_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { @@ -1281,7 +1285,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, } off = regs[regno].off + regs[regno].var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || - access_size <= 0) { + access_size < 0 || (access_size == 0 && !zero_size_allowed)) { verbose(env, "invalid stack type R%d off=%d access_size=%d\n", regno, off, access_size); return -EACCES; @@ -1319,9 +1323,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, switch (reg->type) { case PTR_TO_PACKET: case PTR_TO_PACKET_META: - return check_packet_access(env, regno, reg->off, access_size); + return check_packet_access(env, regno, reg->off, access_size, + zero_size_allowed); case PTR_TO_MAP_VALUE: - return check_map_access(env, regno, reg->off, access_size); + return check_map_access(env, regno, reg->off, access_size, + zero_size_allowed); default: /* scalar_value|ptr_to_stack or invalid ptr */ return check_stack_boundary(env, regno, access_size, zero_size_allowed, meta); @@ -1415,7 +1421,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, - meta->map_ptr->key_size); + meta->map_ptr->key_size, + false); else err = check_stack_boundary(env, regno, meta->map_ptr->key_size, @@ -1431,7 +1438,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, - meta->map_ptr->value_size); + meta->map_ptr->value_size, + false); else err = check_stack_boundary(env, regno, meta->map_ptr->value_size, From 9c019e2bc4b2bd8223c8c0d4b6962478b479834d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 12 Nov 2017 14:49:10 -0800 Subject: [PATCH 2128/2177] bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO The helper bpf_probe_read arg2 type is changed from ARG_CONST_SIZE to ARG_CONST_SIZE_OR_ZERO to permit size-0 buffer. Together with newer ARG_CONST_SIZE_OR_ZERO semantics which allows non-NULL buffer with size 0, this allows simpler bpf programs with verifier acceptance. The previous commit which changes ARG_CONST_SIZE_OR_ZERO semantics has details on examples. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 506efe6e8ed9..a5580c670866 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,12 +78,16 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - int ret; + int ret = 0; + + if (unlikely(size == 0)) + goto out; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); + out: return ret; } @@ -92,7 +96,7 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; From b6ff63911232c2823e2763371ec4462ac64cc331 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 12 Nov 2017 14:49:11 -0800 Subject: [PATCH 2129/2177] bpf: fix and add test cases for ARG_CONST_SIZE_OR_ZERO semantics change Fix a few test cases to allow non-NULL map/packet/stack pointer with size = 0. Change a few tests using bpf_probe_read to use bpf_probe_write_user so ARG_CONST_SIZE arg can still be properly tested. One existing test case already covers size = 0 with non-NULL packet pointer, so add additional tests so all cases of size = 0 and 0 <= size <= legal_upper_bound with non-NULL map/packet/stack pointer are covered. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 131 +++++++++++++++++--- 1 file changed, 112 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index bb3c4ad8c59f..bf092b83e453 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3579,7 +3579,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test19, cls helper fail range zero", + "helper access to packet: test19, cls helper range zero", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3599,8 +3599,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid access to packet", + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -4379,10 +4378,10 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_write_user), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, @@ -4486,9 +4485,10 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_write_user), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, @@ -4622,13 +4622,14 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_write_user), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R2 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4765,13 +4766,14 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EMIT_CALL(BPF_FUNC_probe_write_user), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R2 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5350,7 +5352,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=0", + .errstr = "invalid indirect read from stack off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5505,7 +5507,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=0", + .errstr = "invalid indirect read from stack off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5668,7 +5670,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size = 0 not allowed on != NULL", + "helper access to variable memory: size = 0 allowed on != NULL stack pointer", .insns = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), @@ -5681,8 +5683,99 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-8 access_size=0", - .result = REJECT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: size = 0 allowed on != NULL map pointer", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: size possible = 0 allowed on != NULL map pointer", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { From 61ef6da622aa7b66bf92991bd272490eea6c712e Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:44 +0200 Subject: [PATCH 2130/2177] tls: Use kzalloc for aead_request allocation Use kzalloc for aead_request allocation as we don't set all the bits in the request. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7d80040a37b6..f00383a37622 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -219,7 +219,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx, struct aead_request *aead_req; int rc; - aead_req = kmalloc(req_size, flags); + aead_req = kzalloc(req_size, flags); if (!aead_req) return -ENOMEM; From 6d88207fcfddc002afe3e2e4a455e5201089d5d9 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:45 +0200 Subject: [PATCH 2131/2177] tls: Add function to update the TLS socket configuration The tx configuration is now stored in ctx->tx_conf. And sk->sk_prot is updated trough a function This will simplify things when we add rx and support for different possible tx and rx cross configurations. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 46 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index b89d397dd62f..f058a6e08eaa 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -83,6 +83,8 @@ struct tls_context { void *priv_ctx; + u8 tx_conf:2; + u16 prepend_size; u16 tag_size; u16 overhead_size; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 60aff60e30ad..de6a1416bc41 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); -static struct proto tls_base_prot; -static struct proto tls_sw_prot; +enum { + TLS_BASE_TX, + TLS_SW_TX, + TLS_NUM_CONFIG, +}; + +static struct proto tls_prots[TLS_NUM_CONFIG]; + +static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) +{ + sk->sk_prot = &tls_prots[ctx->tx_conf]; +} int wait_on_pending_writer(struct sock *sk, long *timeo) { @@ -340,8 +350,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, { struct tls_crypto_info *crypto_info, tmp_crypto_info; struct tls_context *ctx = tls_get_ctx(sk); - struct proto *prot = NULL; int rc = 0; + int tx_conf; if (!optval || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; @@ -396,11 +406,12 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); - prot = &tls_sw_prot; + tx_conf = TLS_SW_TX; if (rc) goto err_crypto_info; - sk->sk_prot = prot; + ctx->tx_conf = tx_conf; + update_sk_prot(sk, ctx); goto out; err_crypto_info: @@ -453,7 +464,9 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; - sk->sk_prot = &tls_base_prot; + + ctx->tx_conf = TLS_BASE_TX; + update_sk_prot(sk, ctx); out: return rc; } @@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].close = tls_sk_proto_close; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int __init tls_register(void) { - tls_base_prot = tcp_prot; - tls_base_prot.setsockopt = tls_setsockopt; - tls_base_prot.getsockopt = tls_getsockopt; - - tls_sw_prot = tls_base_prot; - tls_sw_prot.sendmsg = tls_sw_sendmsg; - tls_sw_prot.sendpage = tls_sw_sendpage; - tls_sw_prot.close = tls_sk_proto_close; + build_protos(tls_prots, &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); From ff45d820a2df163957ad8ab459b6eb6976144c18 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:46 +0200 Subject: [PATCH 2132/2177] tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used. Previously the TLS ulp context would leak if we attached a TLS ulp to a socket but did not use the TLS_TX setsockopt, or did use it but it failed. This patch solves the issue by overriding prot[TLS_BASE_TX].close and fixing tls_sk_proto_close to work properly when its called with ctx->tx_conf == TLS_BASE_TX. This patch also removes ctx->free_resources as we can use ctx->tx_conf to obtain the relevant information. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- net/tls/tls_main.c | 22 ++++++++++++++-------- net/tls/tls_sw.c | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index f058a6e08eaa..7cb58a6b8fd0 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -99,7 +99,6 @@ struct tls_context { u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); - void (*free_resources)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -124,6 +123,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_free_tx_resources(struct sock *sk); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index de6a1416bc41..13427ee7c582 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -226,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); lock_sock(sk); + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf == TLS_BASE_TX) { + kfree(ctx); + goto skip_tx_cleanup; + } if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) tls_handle_open_record(sk, 0); @@ -242,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) sg++; } } - ctx->free_resources(sk); + kfree(ctx->rec_seq); kfree(ctx->iv); - sk_proto_close = ctx->sk_proto_close; - kfree(ctx); + if (ctx->tx_conf == TLS_SW_TX) + tls_sw_free_tx_resources(sk); +skip_tx_cleanup: release_sock(sk); sk_proto_close(sk, timeout); } @@ -402,8 +409,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; - ctx->sk_proto_close = sk->sk_prot->close; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -464,6 +469,7 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; + ctx->sk_proto_close = sk->sk_prot->close; ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); @@ -480,11 +486,11 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static void build_protos(struct proto *prot, struct proto *base) { prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].close = tls_sk_proto_close; prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; prot[TLS_SW_TX].sendpage = tls_sw_sendpage; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f00383a37622..fcd92a9c2d06 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -639,7 +639,7 @@ sendpage_end: return ret; } -static void tls_sw_free_resources(struct sock *sk) +void tls_sw_free_tx_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); @@ -650,6 +650,7 @@ static void tls_sw_free_resources(struct sock *sk) tls_free_both_sg(sk); kfree(ctx); + kfree(tls_ctx); } int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) @@ -679,7 +680,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - ctx->free_resources = tls_sw_free_resources; crypto_info = &ctx->crypto_send; switch (crypto_info->cipher_type) { From 213ef6e7c9c063c482d77f12cc438872628d48ec Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:47 +0200 Subject: [PATCH 2133/2177] tls: Move tls_make_aad to header to allow sharing move tls_make_aad as it is going to be reused by the device offload code and rx path. Remove unused recv parameter. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/net/tls.h | 15 +++++++++++++++ net/tls/tls_sw.c | 18 +----------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 7cb58a6b8fd0..70becd0a9299 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -214,6 +214,21 @@ static inline void tls_fill_prepend(struct tls_context *ctx, ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); } +static inline void tls_make_aad(char *buf, + size_t size, + char *record_sequence, + int record_sequence_size, + unsigned char record_type) +{ + memcpy(buf, record_sequence, record_sequence_size); + + buf[8] = record_type; + buf[9] = TLS_1_2_VERSION_MAJOR; + buf[10] = TLS_1_2_VERSION_MINOR; + buf[11] = size >> 8; + buf[12] = size & 0xFF; +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fcd92a9c2d06..73d19210dd49 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -39,22 +39,6 @@ #include -static inline void tls_make_aad(int recv, - char *buf, - size_t size, - char *record_sequence, - int record_sequence_size, - unsigned char record_type) -{ - memcpy(buf, record_sequence, record_sequence_size); - - buf[8] = record_type; - buf[9] = TLS_1_2_VERSION_MAJOR; - buf[10] = TLS_1_2_VERSION_MINOR; - buf[11] = size >> 8; - buf[12] = size & 0xFF; -} - static void trim_sg(struct sock *sk, struct scatterlist *sg, int *sg_num_elem, unsigned int *sg_size, int target_size) { @@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags, sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); - tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size, + tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size, tls_ctx->rec_seq, tls_ctx->rec_seq_size, record_type); From 196c31b4b54474b31dee3c30352c45c2a93e9226 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:48 +0200 Subject: [PATCH 2134/2177] tls: Avoid copying crypto_info again after cipher_type check. Avoid copying crypto_info again after cipher_type check to avoid a TOCTOU exploits. The temporary array on the stack is removed as we don't really need it Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/tls/tls_main.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 13427ee7c582..ab1bd167b63b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -355,7 +355,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, unsigned int optlen) { - struct tls_crypto_info *crypto_info, tmp_crypto_info; + struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); int rc = 0; int tx_conf; @@ -365,36 +365,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; } - rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); + crypto_info = &ctx->crypto_send; + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) + goto out; + + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto out; } /* check version */ - if (tmp_crypto_info.version != TLS_1_2_VERSION) { + if (crypto_info->version != TLS_1_2_VERSION) { rc = -ENOTSUPP; - goto out; + goto err_crypto_info; } - /* get user crypto info */ - crypto_info = &ctx->crypto_send; - - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) - goto out; - - switch (tmp_crypto_info.cipher_type) { + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; goto out; } - rc = copy_from_user( - crypto_info, - optval, - sizeof(struct tls12_crypto_info_aes_gcm_128)); - + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; From ee181e5201e640a4b92b217e9eab2531dab57d2c Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:49 +0200 Subject: [PATCH 2135/2177] tls: don't override sk_write_space if tls_set_sw_offload fails. If we fail to enable tls in the kernel we shouldn't override the sk_write_space callback Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/tls/tls_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ab1bd167b63b..e07ee3ae0023 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -401,9 +401,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; } - ctx->sk_write_space = sk->sk_write_space; - sk->sk_write_space = tls_write_space; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -412,6 +409,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->tx_conf = tx_conf; update_sk_prot(sk, ctx); + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; goto out; err_crypto_info: From 51f299dd94bb1e28d03eefbc4fe0b9282f9ee2fa Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:04 +0100 Subject: [PATCH 2136/2177] net: core: improve sanity checking in __dev_alloc_name __dev_alloc_name is called from the public (and exported) dev_alloc_name(), so we don't have a guarantee that strlen(name) is at most IFNAMSIZ. If somebody manages to get __dev_alloc_name called with a % char beyond the 31st character, we'd be making a snprintf() call that will very easily crash the kernel (using an appropriate %p extension, we'll likely dereference some completely bogus pointer). In the normal case where strlen() is sane, we don't even save anything by limiting to IFNAMSIZ, so just use strchr(). Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 658337bf33e4..1a5d31fdea27 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1064,7 +1064,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) unsigned long *inuse; struct net_device *d; - p = strnchr(name, IFNAMSIZ-1, '%'); + p = strchr(name, '%'); if (p) { /* * Verify the string as this thing may have come from From 2c88b855981481970b731bf3f4508400aac429fb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:05 +0100 Subject: [PATCH 2137/2177] net: core: move dev_alloc_name_ns a little higher No functional change. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1a5d31fdea27..4545685d2fa7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1107,6 +1107,19 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) return -ENFILE; } +static int dev_alloc_name_ns(struct net *net, + struct net_device *dev, + const char *name) +{ + char buf[IFNAMSIZ]; + int ret; + + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_alloc_name - allocate a name for a device * @dev: device @@ -1136,19 +1149,6 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_alloc_name_ns(struct net *net, - struct net_device *dev, - const char *name) -{ - char buf[IFNAMSIZ]; - int ret; - - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; -} - int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { From c46d7642e915106b0301bc4d53a79e8e806c2eb9 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:06 +0100 Subject: [PATCH 2138/2177] net: core: eliminate dev_alloc_name{,_ns} code duplication dev_alloc_name contained a BUG_ON(), which I moved to dev_alloc_name_ns; the only other caller of that already has the same BUG_ON. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4545685d2fa7..7580c2046c95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1114,6 +1114,7 @@ static int dev_alloc_name_ns(struct net *net, char buf[IFNAMSIZ]; int ret; + BUG_ON(!net); ret = __dev_alloc_name(net, name, buf); if (ret >= 0) strlcpy(dev->name, buf, IFNAMSIZ); @@ -1136,16 +1137,7 @@ static int dev_alloc_name_ns(struct net *net, int dev_alloc_name(struct net_device *dev, const char *name) { - char buf[IFNAMSIZ]; - struct net *net; - int ret; - - BUG_ON(!dev_net(dev)); - net = dev_net(dev); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; + return dev_alloc_name_ns(dev_net(dev), dev, name); } EXPORT_SYMBOL(dev_alloc_name); From 6224abda0db8845756571833744d4414f144ecb5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:07 +0100 Subject: [PATCH 2139/2177] net: core: drop pointless check in __dev_alloc_name The only caller passes a stack buffer as buf, so it won't equal the passed-in name. Moreover, we're already using buf as a scratch buffer inside the if (p) {} block, so if buf and name were the same, that snprintf() call would be overwriting its own format string. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7580c2046c95..4cedc7595f1f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1095,8 +1095,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) free_page((unsigned long) inuse); } - if (buf != name) - snprintf(buf, IFNAMSIZ, name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!__dev_get_by_name(net, buf)) return i; From 93809105cf9d43790839d8b8e29a8a505290ec68 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:08 +0100 Subject: [PATCH 2140/2177] net: core: check dev_valid_name in __dev_alloc_name We currently only exclude non-sysfs-friendly names via dev_get_valid_name; there doesn't seem to be a reason to allow such names when we're called via dev_alloc_name. This does duplicate the dev_valid_name check in the dev_get_valid_name() case; we'll fix that shortly. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 4cedc7595f1f..cb3d95edf58d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1064,6 +1064,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) unsigned long *inuse; struct net_device *d; + if (!dev_valid_name(name)) + return -EINVAL; + p = strchr(name, '%'); if (p) { /* From d6f295e9def0bee85b37bdffb95153721935c342 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:09 +0100 Subject: [PATCH 2141/2177] net: core: maybe return -EEXIST in __dev_alloc_name If we're given format string with no %d, -EEXIST is a saner error code. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index cb3d95edf58d..1bb856eaed1c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) * when the name is long and there isn't enough space left * for the digits, or if all bits are used. */ - return -ENFILE; + return p ? -ENFILE : -EEXIST; } static int dev_alloc_name_ns(struct net *net, From 87c320e51519a83c496ab7bfb4e96c8f9c001e89 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Nov 2017 00:15:10 +0100 Subject: [PATCH 2142/2177] net: core: dev_get_valid_name is now the same as dev_alloc_name_ns If name contains a %, it's easy to see that this patch doesn't change anything (other than eliminate the duplicate dev_valid_name call). Otherwise, we'll now just spend a little time in snprintf() copying name to the stack buffer allocated in dev_alloc_name_ns, and do the __dev_get_by_name using that buffer rather than name. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- net/core/dev.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1bb856eaed1c..ad5f90dacd92 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1146,19 +1146,7 @@ EXPORT_SYMBOL(dev_alloc_name); int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - BUG_ON(!net); - - if (!dev_valid_name(name)) - return -EINVAL; - - if (strchr(name, '%')) - return dev_alloc_name_ns(net, dev, name); - else if (__dev_get_by_name(net, name)) - return -EEXIST; - else if (dev->name != name) - strlcpy(dev->name, name, IFNAMSIZ); - - return 0; + return dev_alloc_name_ns(net, dev, name); } EXPORT_SYMBOL(dev_get_valid_name); From 3ba88c477bac891a53ba5a0efb351285297a5e5b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 13 Nov 2017 07:18:45 +0900 Subject: [PATCH 2143/2177] net: Extend Kernel GTP-U tunneling documentation * clarify specification references for v0/v1 * add section "APN vs. Network device" * add section "Local GTP-U entity and tunnel identification" Signed-off-by: Andreas Schultz Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- Documentation/networking/gtp.txt | 103 +++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/gtp.txt b/Documentation/networking/gtp.txt index 93e96750f103..0d9c18f05ec6 100644 --- a/Documentation/networking/gtp.txt +++ b/Documentation/networking/gtp.txt @@ -1,6 +1,7 @@ The Linux kernel GTP tunneling module ====================================================================== -Documentation by Harald Welte +Documentation by Harald Welte and + Andreas Schultz In 'drivers/net/gtp.c' you are finding a kernel-level implementation of a GTP tunnel endpoint. @@ -91,9 +92,13 @@ http://git.osmocom.org/libgtpnl/ == Protocol Versions == -There are two different versions of GTP-U: v0 and v1. Both are -implemented in the Kernel GTP module. Version 0 is a legacy version, -and deprecated from recent 3GPP specifications. +There are two different versions of GTP-U: v0 [GSM TS 09.60] and v1 +[3GPP TS 29.281]. Both are implemented in the Kernel GTP module. +Version 0 is a legacy version, and deprecated from recent 3GPP +specifications. + +GTP-U uses UDP for transporting PDUs. The receiving UDP port is 2151 +for GTPv1-U and 3386 for GTPv0-U. There are three versions of GTP-C: v0, v1, and v2. As the kernel doesn't implement GTP-C, we don't have to worry about this. It's the @@ -133,3 +138,93 @@ doe to a lack of user interest, it never got merged. In 2015, Andreas Schultz came to the rescue and fixed lots more bugs, extended it with new features and finally pushed all of us to get it mainline, where it was merged in 4.7.0. + +== Architectural Details == + +=== Local GTP-U entity and tunnel identification === + +GTP-U uses UDP for transporting PDU's. The receiving UDP port is 2152 +for GTPv1-U and 3386 for GTPv0-U. + +There is only one GTP-U entity (and therefor SGSN/GGSN/S-GW/PDN-GW +instance) per IP address. Tunnel Endpoint Identifier (TEID) are unique +per GTP-U entity. + +A specific tunnel is only defined by the destination entity. Since the +destination port is constant, only the destination IP and TEID define +a tunnel. The source IP and Port have no meaning for the tunnel. + +Therefore: + + * when sending, the remote entity is defined by the remote IP and + the tunnel endpoint id. The source IP and port have no meaning and + can be changed at any time. + + * when receiving the local entity is defined by the local + destination IP and the tunnel endpoint id. The source IP and port + have no meaning and can change at any time. + +[3GPP TS 29.281] Section 4.3.0 defines this so: + +> The TEID in the GTP-U header is used to de-multiplex traffic +> incoming from remote tunnel endpoints so that it is delivered to the +> User plane entities in a way that allows multiplexing of different +> users, different packet protocols and different QoS levels. +> Therefore no two remote GTP-U endpoints shall send traffic to a +> GTP-U protocol entity using the same TEID value except +> for data forwarding as part of mobility procedures. + +The definition above only defines that two remote GTP-U endpoints +*should not* send to the same TEID, it *does not* forbid or exclude +such a scenario. In fact, the mentioned mobility procedures make it +necessary that the GTP-U entity accepts traffic for TEIDs from +multiple or unknown peers. + +Therefore, the receiving side identifies tunnels exclusively based on +TEIDs, not based on the source IP! + +== APN vs. Network Device == + +The GTP-U driver creates a Linux network device for each Gi/SGi +interface. + +[3GPP TS 29.281] calls the Gi/SGi reference point an interface. This +may lead to the impression that the GGSN/P-GW can have only one such +interface. + +Correct is that the Gi/SGi reference point defines the interworking +between +the 3GPP packet domain (PDN) based on GTP-U tunnel and IP +based networks. + +There is no provision in any of the 3GPP documents that limits the +number of Gi/SGi interfaces implemented by a GGSN/P-GW. + +[3GPP TS 29.061] Section 11.3 makes it clear that the selection of a +specific Gi/SGi interfaces is made through the Access Point Name +(APN): + +> 2. each private network manages its own addressing. In general this +> will result in different private networks having overlapping +> address ranges. A logically separate connection (e.g. an IP in IP +> tunnel or layer 2 virtual circuit) is used between the GGSN/P-GW +> and each private network. +> +> In this case the IP address alone is not necessarily unique. The +> pair of values, Access Point Name (APN) and IPv4 address and/or +> IPv6 prefixes, is unique. + +In order to support the overlapping address range use case, each APN +is mapped to a separate Gi/SGi interface (network device). + +NOTE: The Access Point Name is purely a control plane (GTP-C) concept. +At the GTP-U level, only Tunnel Endpoint Identifiers are present in +GTP-U packets and network devices are known + +Therefore for a given UE the mapping in IP to PDN network is: + * network device + MS IP -> Peer IP + Peer TEID, + +and from PDN to IP network: + * local GTP-U IP + TEID -> network device + +Furthermore, before a received T-PDU is injected into the network +device the MS IP is checked against the IP recorded in PDP context. From 8983487f5e4e377cfc873160f4b557907debd286 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 13 Nov 2017 07:21:34 +0900 Subject: [PATCH 2144/2177] net: Mention net-next status web page in netdev-FAQ.txt According to https://www.mail-archive.com/netdev@vger.kernel.org/msg177411.html there is a status page available at http://vger.kernel.org/~davem/net-next.html to obtain the current status of the net-next tree. Let's add this information to the netdev FAQ. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index cfc66ea72329..2a3278d5cf35 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -64,7 +64,10 @@ A: To understand this, you need to know a bit of background information If you aren't subscribed to netdev and/or are simply unsure if net-next has re-opened yet, simply check the net-next git repository link above for - any new networking-related commits. + any new networking-related commits. You may also check the following + website for the current status: + + http://vger.kernel.org/~davem/net-next.html The "net" tree continues to collect fixes for the vX.Y content, and is fed back to Linus at regular (~weekly) intervals. Meaning that the From 2f53fbd521825078c0b801d416702cdd877af262 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Sun, 12 Nov 2017 09:01:24 +0100 Subject: [PATCH 2145/2177] mlxsw: spectrum: Update minimum firmware version to 13.1530.152 This new firmware contains: - Support Spectrum A1 revision - Batch deletion of IPv6 neighbours - Remove incorrect VPD capability Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index b2cd1ebf4e36..2d46ec84ebdf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -74,8 +74,8 @@ #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 -#define MLXSW_FWREV_MINOR 1420 -#define MLXSW_FWREV_SUBMINOR 122 +#define MLXSW_FWREV_MINOR 1530 +#define MLXSW_FWREV_SUBMINOR 152 static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = { .major = MLXSW_FWREV_MAJOR, From 63dd00fa3e524c27cc0509190084ab147ecc8ae2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 12 Nov 2017 09:02:56 +0100 Subject: [PATCH 2146/2177] mlxsw: spectrum_router: Add batch neighbour deletion In commit 4a3c67a6e7cd ("mlxsw: spectrum_router: Don't batch neighbour deletion") I removed the support for batch deletion of neighbours on a router interface (RIF) since at that time the firmware did not support it for IPv6 neighbours. This is now supported by the version enforced by the driver, so there is no reason to delete neighbours one by one anymore. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e9187841d82a..632c7b229054 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2416,16 +2416,25 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } +static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif *rif) +{ + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + + mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, + rif->rif_index, rif->addr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; + mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) { - mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); + rif_list_node) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); - } } enum mlxsw_sp_nexthop_type { From 3697d058b08d5b874f0253de173ef72e5d648f9a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 12 Nov 2017 16:16:04 +0100 Subject: [PATCH 2147/2177] net: phy: realtek: fix RTL8211F interrupt mode After commit b94d22d94ad22 "ARM64: dts: meson-gx: add external PHY interrupt on some platforms" ethernet stopped working on my Odroid-C2 which has a RTL8211F phy. It turned out that no interrupts were triggered. Further analysis showed the register INER can't be altered on page 0. Because register INSR needs to be accessed via page 0xa43 I assumed that register INER needs to be accessed via some page too. Some brute force check resulted in page 0xa42 being the right one. With this patch the phy is working properly in interrupt mode. Fixes: 3447cf2e9a11 ("net/phy: Add support for Realtek RTL8211F") Signed-off-by: Heiner Kallweit Tested-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index d4670ecdb1ba..eda0a6e86918 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -115,11 +115,13 @@ static int rtl8211f_config_intr(struct phy_device *phydev) { int err; + phy_write(phydev, RTL821x_PAGE_SELECT, 0xa42); if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, RTL821x_INER, RTL8211F_INER_LINK_STATUS); else err = phy_write(phydev, RTL821x_INER, 0); + phy_write(phydev, RTL821x_PAGE_SELECT, 0); return err; } From 97438abcfb80656d4aedaca21b8ff8db4fcc93a1 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 12 Nov 2017 23:38:09 +0800 Subject: [PATCH 2148/2177] net: dsa: lan9303: correctly check return value of devm_gpiod_get_optional Function devm_gpiod_get_optional() returns an ERR_PTR on failure. Its return value should not be validated by a NULL check. Instead, use IS_ERR. Signed-off-by: Pan Bian Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index fdfdb0edfe62..b24566bb74d2 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1301,7 +1301,7 @@ static void lan9303_probe_reset_gpio(struct lan9303 *chip, chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset", GPIOD_OUT_LOW); - if (!chip->reset_gpio) { + if (IS_ERR(chip->reset_gpio)) { dev_dbg(chip->dev, "No reset GPIO defined\n"); return; } From 887c3820a3801a117a494aeca147ec52f95e1566 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Mon, 13 Nov 2017 11:39:38 +0000 Subject: [PATCH 2149/2177] net: hns3: Updates MSI/MSI-X alloc/free APIs(depricated) to new APIs This patch migrates the HNS3 driver code from use of depricated PCI MSI/MSI-X interrupt vector allocation/free APIs to new common APIs. Signed-off-by: Salil Mehta Suggested-by: Christoph Hellwig Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 113 ++++++------------ .../hisilicon/hns3/hns3pf/hclge_main.h | 17 ++- 2 files changed, 46 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 781d5a8cbb6a..59ed806a52c3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -891,14 +891,14 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S; if (hnae3_dev_roce_supported(hdev)) { - hdev->num_roce_msix = + hdev->num_roce_msi = hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ - hdev->num_msi = hdev->num_roce_msix + HCLGE_ROCE_VECTOR_OFFSET; + hdev->num_msi = hdev->num_roce_msi + HCLGE_ROCE_VECTOR_OFFSET; } else { hdev->num_msi = hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number), @@ -1950,7 +1950,7 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) struct hnae3_handle *roce = &vport->roce; struct hnae3_handle *nic = &vport->nic; - roce->rinfo.num_vectors = vport->back->num_roce_msix; + roce->rinfo.num_vectors = vport->back->num_roce_msi; if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors || vport->back->num_msi_left == 0) @@ -1968,68 +1968,48 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) return 0; } -static int hclge_init_msix(struct hclge_dev *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int ret, i; - - hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!hdev->msix_entries) - return -ENOMEM; - - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, - sizeof(u16), GFP_KERNEL); - if (!hdev->vector_status) - return -ENOMEM; - - for (i = 0; i < hdev->num_msi; i++) { - hdev->msix_entries[i].entry = i; - hdev->vector_status[i] = HCLGE_INVALID_VPORT; - } - - hdev->num_msi_left = hdev->num_msi; - hdev->base_msi_vector = hdev->pdev->irq; - hdev->roce_base_vector = hdev->base_msi_vector + - HCLGE_ROCE_VECTOR_OFFSET; - - ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries, - hdev->num_msi, hdev->num_msi); - if (ret < 0) { - dev_info(&hdev->pdev->dev, - "MSI-X vector alloc failed: %d\n", ret); - return ret; - } - - return 0; -} - static int hclge_init_msi(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; int vectors; int i; - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, - sizeof(u16), GFP_KERNEL); - if (!hdev->vector_status) - return -ENOMEM; - - for (i = 0; i < hdev->num_msi; i++) - hdev->vector_status[i] = HCLGE_INVALID_VPORT; - - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI); + vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { - dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors); - return -EINVAL; + dev_err(&pdev->dev, + "failed(%d) to allocate MSI/MSI-X vectors\n", + vectors); + return vectors; } + if (vectors < hdev->num_msi) + dev_warn(&hdev->pdev->dev, + "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n", + hdev->num_msi, vectors); + hdev->num_msi = vectors; hdev->num_msi_left = vectors; hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + HCLGE_ROCE_VECTOR_OFFSET; + hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, + sizeof(u16), GFP_KERNEL); + if (!hdev->vector_status) { + pci_free_irq_vectors(pdev); + return -ENOMEM; + } + + for (i = 0; i < hdev->num_msi; i++) + hdev->vector_status[i] = HCLGE_INVALID_VPORT; + + hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi, + sizeof(int), GFP_KERNEL); + if (!hdev->vector_irq) { + pci_free_irq_vectors(pdev); + return -ENOMEM; + } + return 0; } @@ -2704,6 +2684,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, vport->vport_id * HCLGE_VECTOR_VF_OFFSET; hdev->vector_status[i] = vport->vport_id; + hdev->vector_irq[i] = vector->vector; vector++; alloc++; @@ -2722,15 +2703,10 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector) { int i; - for (i = 0; i < hdev->num_msi; i++) { - if (hdev->msix_entries) { - if (vector == hdev->msix_entries[i].vector) - return i; - } else { - if (vector == (hdev->base_msi_vector + i)) - return i; - } - } + for (i = 0; i < hdev->num_msi; i++) + if (vector == hdev->vector_irq[i]) + return i; + return -EINVAL; } @@ -4664,14 +4640,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; - if (hdev->flag & HCLGE_FLAG_USE_MSIX) { - pci_disable_msix(pdev); - devm_kfree(&pdev->dev, hdev->msix_entries); - hdev->msix_entries = NULL; - } else { - pci_disable_msi(pdev); - } - + pci_free_irq_vectors(pdev); pci_clear_master(pdev); pci_release_mem_regions(pdev); pci_disable_device(pdev); @@ -4689,7 +4658,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_hclge_dev; } - hdev->flag |= HCLGE_FLAG_USE_MSIX; hdev->pdev = pdev; hdev->ae_dev = ae_dev; hdev->reset_type = HNAE3_NONE_RESET; @@ -4726,12 +4694,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - if (hdev->flag & HCLGE_FLAG_USE_MSIX) - ret = hclge_init_msix(hdev); - else - ret = hclge_init_msi(hdev); + ret = hclge_init_msi(hdev); if (ret) { - dev_err(&pdev->dev, "Init msix/msi error, ret = %d.\n", ret); + dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 742e6ee9efaf..7027814ea5d7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -425,9 +425,6 @@ struct hclge_dev { u16 num_tqps; /* Num task queue pairs of this PF */ u16 num_req_vfs; /* Num VFs requested for this PF */ - u16 num_roce_msix; /* Num of roce vectors for this PF */ - int roce_base_vector; - /* Base task tqp physical id of this PF */ u16 base_tqp_pid; u16 alloc_rss_size; /* Allocated RSS task queue */ @@ -457,8 +454,10 @@ struct hclge_dev { u16 num_msi_left; u16 num_msi_used; u32 base_msi_vector; - struct msix_entry *msix_entries; u16 *vector_status; + int *vector_irq; + u16 num_roce_msi; /* Num of roce vectors for this PF */ + int roce_base_vector; u16 pending_udp_bitmap; @@ -482,12 +481,10 @@ struct hclge_dev { struct hnae3_client *nic_client; struct hnae3_client *roce_client; -#define HCLGE_FLAG_USE_MSI 0x00000001 -#define HCLGE_FLAG_USE_MSIX 0x00000002 -#define HCLGE_FLAG_MAIN 0x00000004 -#define HCLGE_FLAG_DCB_CAPABLE 0x00000008 -#define HCLGE_FLAG_DCB_ENABLE 0x00000010 -#define HCLGE_FLAG_MQPRIO_ENABLE 0x00000020 +#define HCLGE_FLAG_MAIN BIT(0) +#define HCLGE_FLAG_DCB_CAPABLE BIT(1) +#define HCLGE_FLAG_DCB_ENABLE BIT(2) +#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3) u32 flag; u32 pkt_buf_size; /* Total pf buf size for tx/rx */ From 1a48fbd9ec1483f5bf3da63dfd907f4272828b4a Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 13 Nov 2017 14:25:25 +0100 Subject: [PATCH 2150/2177] net: dsa: lan9303: calculate offload_fwd_mark from tag The lan9303 set bits in the host CPU tag indicating if a ingress frame is a trapped IGMP or STP frame. Use these bits to calculate skb->offload_fwd_mark more efficiently. Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index b8c5e52b2eff..548c00254c07 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -42,6 +42,10 @@ #define LAN9303_TAG_LEN 4 # define LAN9303_TAG_TX_USE_ALR BIT(3) # define LAN9303_TAG_TX_STP_OVERRIDE BIT(4) +# define LAN9303_TAG_RX_IGMP BIT(3) +# define LAN9303_TAG_RX_STP BIT(4) +# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \ + LAN9303_TAG_RX_STP) /* Decide whether to transmit using ALR lookup, or transmit directly to * port using tag. ALR learning is performed only when using ALR lookup. @@ -91,9 +95,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u16 *lan9303_tag; + u16 lan9303_tag1; unsigned int source_port; - u16 ether_type_nw; - u8 ip_protocol; if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { dev_warn_ratelimited(&dev->dev, @@ -114,7 +117,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; } - source_port = ntohs(lan9303_tag[1]) & 0x3; + lan9303_tag1 = ntohs(lan9303_tag[1]); + source_port = lan9303_tag1 & 0x3; skb->dev = dsa_master_find_slave(dev, 0, source_port); if (!skb->dev) { @@ -128,19 +132,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull_rcsum(skb, 2 + 2); memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); - skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN, - eth_stp_addr); - - /* We also need IGMP packets to have skb->offload_fwd_mark = 0. - * Solving this for all conceivable situations would add more cost to - * every packet. Instead we handle just the common case: - * No VLAN tag + Ethernet II framing. - * Test least probable term first. - */ - ether_type_nw = lan9303_tag[2]; - ip_protocol = *(skb->data + 9); - if (ip_protocol == IPPROTO_IGMP && ether_type_nw == htons(ETH_P_IP)) - skb->offload_fwd_mark = 0; + skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU); return skb; } From 0c4b9169781cf15c4c1b9171dab98ff96d5c7fd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Nov 2017 15:57:30 +0100 Subject: [PATCH 2151/2177] netlink: remove unnecessary forward declaration netlink_skb_destructor() is actually defined before the first usage in the file, so remove the unnecessary forward declaration. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a3eab903a9cd..b9e0ee4e22f5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { }; static int netlink_dump(struct sock *sk); -static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion From 096d1dd0f03211fb42d6c2457f248827604b7f0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Nov 2017 16:19:46 +0100 Subject: [PATCH 2152/2177] netlink: remove unused NETLINK SKB flags These flags are unused, remove them to be less confusing. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6ddb4a5da371..49b4257ce1ea 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -17,9 +17,6 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) } enum netlink_skb_flags { - NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ - NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ - NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; From 0d63785c6b94b5d2f095f90755825f90eea791f5 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Mon, 13 Nov 2017 16:27:02 +0100 Subject: [PATCH 2153/2177] net: mvneta: fix handling of the Tx descriptor counter The mvneta controller provides a 8-bit register to update the pending Tx descriptor counter. Then, a maximum of 255 Tx descriptors can be added at once. In the current code the mvneta_txq_pend_desc_add function assumes the caller takes care of this limit. But it is not the case. In some situations (xmit_more flag), more than 255 descriptors are added. When this happens, the Tx descriptor counter register is updated with a wrong value, which breaks the whole Tx queue management. This patch fixes the issue by allowing the mvneta_txq_pend_desc_add function to process more than 255 Tx descriptors. Fixes: 2a90f7e1d5d0 ("net: mvneta: add xmit_more support") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Simon Guinot Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 64a04975bcf8..bc93b69cfd1e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -816,11 +816,14 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp, { u32 val; - /* Only 255 descriptors can be added at once ; Assume caller - * process TX desriptors in quanta less than 256 - */ - val = pend_desc + txq->pending; - mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc += txq->pending; + + /* Only 255 Tx descriptors can be added at once */ + do { + val = min(pend_desc, 255); + mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc -= val; + } while (pend_desc > 0); txq->pending = 0; } From fbec443bfe44f58a40e00962e969b5a9cafde457 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 13 Nov 2017 18:30:55 +0200 Subject: [PATCH 2154/2177] net: bridge: add vlan_tunnel to bridge port policies Found another missing port flag policy entry for IFLA_BRPORT_VLAN_TUNNEL so add it now. CC: Roopa Prabhu Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 67bae0f11c67..d0ef0a8e8831 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -657,6 +657,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 }, [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, }; From 4e6759be28e4e69c397ab58c1e780b0a15d8a6fd Mon Sep 17 00:00:00 2001 From: Desnes Augusto Nunes do Rosario Date: Mon, 13 Nov 2017 15:59:19 -0200 Subject: [PATCH 2155/2177] ibmvnic: Feature implementation of Vital Product Data (VPD) for the ibmvnic driver This patch implements and enables VDP support for the ibmvnic driver. Moreover, it includes the implementation of suitable structs, signal transmission/handling and functions which allows the retrival of firmware information from the ibmvnic card through the ethtool command. Signed-off-by: Desnes A. Nunes do Rosario Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 153 ++++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 27 ++++- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b918bc2f2e4f..04aaacbc3d45 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -574,6 +574,15 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) return 0; } +static void release_vpd_data(struct ibmvnic_adapter *adapter) +{ + if (!adapter->vpd) + return; + + kfree(adapter->vpd->buff); + kfree(adapter->vpd); +} + static void release_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; @@ -754,6 +763,8 @@ static void release_resources(struct ibmvnic_adapter *adapter) { int i; + release_vpd_data(adapter); + release_tx_pools(adapter); release_rx_pools(adapter); @@ -834,6 +845,57 @@ static int set_real_num_queues(struct net_device *netdev) return rc; } +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + dma_addr_t dma_addr; + int len = 0; + + if (adapter->vpd->buff) + len = adapter->vpd->len; + + reinit_completion(&adapter->fw_done); + crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; + crq.get_vpd_size.cmd = GET_VPD_SIZE; + ibmvnic_send_crq(adapter, &crq); + wait_for_completion(&adapter->fw_done); + + if (!adapter->vpd->len) + return -ENODATA; + + if (!adapter->vpd->buff) + adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL); + else if (adapter->vpd->len != len) + adapter->vpd->buff = + krealloc(adapter->vpd->buff, + adapter->vpd->len, GFP_KERNEL); + + if (!adapter->vpd->buff) { + dev_err(dev, "Could allocate VPD buffer\n"); + return -ENOMEM; + } + + adapter->vpd->dma_addr = + dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "Could not map VPD buffer\n"); + kfree(adapter->vpd->buff); + return -ENOMEM; + } + + reinit_completion(&adapter->fw_done); + crq.get_vpd.first = IBMVNIC_CRQ_CMD; + crq.get_vpd.cmd = GET_VPD; + crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); + crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); + ibmvnic_send_crq(adapter, &crq); + wait_for_completion(&adapter->fw_done); + + return 0; +} + static int init_resources(struct ibmvnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -851,6 +913,10 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (rc) return rc; + adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); + if (!adapter->vpd) + return -ENOMEM; + adapter->map_id = 1; adapter->napi = kcalloc(adapter->req_rx_queues, sizeof(struct napi_struct), GFP_KERNEL); @@ -924,7 +990,7 @@ static int __ibmvnic_open(struct net_device *netdev) static int ibmvnic_open(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int rc; + int rc, vpd; mutex_lock(&adapter->reset_lock); @@ -951,6 +1017,12 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); netif_carrier_on(netdev); + + /* Vital Product Data (VPD) */ + vpd = ibmvnic_get_vpd(adapter); + if (vpd) + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + mutex_unlock(&adapter->reset_lock); return rc; @@ -1879,11 +1951,15 @@ static int ibmvnic_get_link_ksettings(struct net_device *netdev, return 0; } -static void ibmvnic_get_drvinfo(struct net_device *dev, +static void ibmvnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, adapter->fw_version, + sizeof(info->fw_version)); } static u32 ibmvnic_get_msglevel(struct net_device *netdev) @@ -3140,6 +3216,73 @@ static void send_cap_queries(struct ibmvnic_adapter *adapter) ibmvnic_send_crq(adapter, &crq); } +static void handle_vpd_size_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (crq->get_vpd_size_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD size, rc=%x\n", + crq->get_vpd_size_rsp.rc.code); + complete(&adapter->fw_done); + return; + } + + adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len); + complete(&adapter->fw_done); +} + +static void handle_vpd_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + unsigned char *substr = NULL, *ptr = NULL; + u8 fw_level_len = 0; + + memset(adapter->fw_version, 0, 32); + + dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len, + DMA_FROM_DEVICE); + + if (crq->get_vpd_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD from device, rc=%x\n", + crq->get_vpd_rsp.rc.code); + goto complete; + } + + /* get the position of the firmware version info + * located after the ASCII 'RM' substring in the buffer + */ + substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len); + if (!substr) { + dev_info(dev, "No FW level provided by VPD\n"); + goto complete; + } + + /* get length of firmware level ASCII substring */ + if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) { + fw_level_len = *(substr + 2); + } else { + dev_info(dev, "Length of FW substr extrapolated VDP buff\n"); + goto complete; + } + + /* copy firmware version string from vpd into adapter */ + if ((substr + 3 + fw_level_len) < + (adapter->vpd->buff + adapter->vpd->len)) { + ptr = strncpy((char *)adapter->fw_version, + substr + 3, fw_level_len); + + if (!ptr) + dev_err(dev, "Failed to isolate FW level string\n"); + } else { + dev_info(dev, "FW substr extrapolated VPD buff\n"); + } + +complete: + complete(&adapter->fw_done); +} + static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; @@ -3871,6 +4014,12 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, netdev_dbg(netdev, "Got Collect firmware trace Response\n"); complete(&adapter->fw_done); break; + case GET_VPD_SIZE_RSP: + handle_vpd_size_rsp(crq, adapter); + break; + case GET_VPD_RSP: + handle_vpd_rsp(crq, adapter); + break; default: netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", gen_crq->cmd); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 8ed829c5b026..4487f1e2c266 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -560,6 +560,12 @@ struct ibmvnic_multicast_ctrl { struct ibmvnic_rc rc; } __packed __aligned(8); +struct ibmvnic_get_vpd_size { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + struct ibmvnic_get_vpd_size_rsp { u8 first; u8 cmd; @@ -577,6 +583,13 @@ struct ibmvnic_get_vpd { u8 reserved[4]; } __packed __aligned(8); +struct ibmvnic_get_vpd_rsp { + u8 first; + u8 cmd; + u8 reserved[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + struct ibmvnic_acl_change_indication { u8 first; u8 cmd; @@ -702,10 +715,10 @@ union ibmvnic_crq { struct ibmvnic_change_mac_addr change_mac_addr_rsp; struct ibmvnic_multicast_ctrl multicast_ctrl; struct ibmvnic_multicast_ctrl multicast_ctrl_rsp; - struct ibmvnic_generic_crq get_vpd_size; + struct ibmvnic_get_vpd_size get_vpd_size; struct ibmvnic_get_vpd_size_rsp get_vpd_size_rsp; struct ibmvnic_get_vpd get_vpd; - struct ibmvnic_generic_crq get_vpd_rsp; + struct ibmvnic_get_vpd_rsp get_vpd_rsp; struct ibmvnic_acl_change_indication acl_change_indication; struct ibmvnic_acl_query acl_query; struct ibmvnic_generic_crq acl_query_rsp; @@ -939,6 +952,12 @@ struct ibmvnic_error_buff { __be32 error_id; }; +struct ibmvnic_vpd { + unsigned char *buff; + dma_addr_t dma_addr; + u64 len; +}; + enum vnic_state {VNIC_PROBING = 1, VNIC_PROBED, VNIC_OPENING, @@ -980,6 +999,10 @@ struct ibmvnic_adapter { dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; + /* Vital Product Data (VPD) */ + struct ibmvnic_vpd *vpd; + char fw_version[32]; + /* Statistics */ struct ibmvnic_statistics stats; dma_addr_t stats_token; From fae45363ae4bac980b1d7451233c7bf3d66d9300 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Nov 2017 09:12:03 +0300 Subject: [PATCH 2156/2177] xdp: sample: Missing curly braces in read_route() The assert statement is supposed to be part of the else branch but the curly braces were accidentally left off. Fixes: 3e29cd0e6563 ("xdp: Sample xdp program implementing ip forward") Signed-off-by: Dan Carpenter Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/xdp_router_ipv4_user.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 2c1fe3f4b1a4..916462112d55 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -206,12 +206,13 @@ static void read_route(struct nlmsghdr *nh, int nll) direct_entry.arp.mac = 0; direct_entry.arp.dst = 0; if (route.dst_len == 32) { - if (nh->nlmsg_type == RTM_DELROUTE) + if (nh->nlmsg_type == RTM_DELROUTE) { assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); - else + } else { if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) direct_entry.arp.dst = route.dst; assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + } } for (i = 0; i < 4; i++) prefix_key->data[i] = (route.dst >> i * 8) & 0xff; From 228aa0121c8897d97fd0864d4ee4851545f7ebac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Nov 2017 09:14:17 +0300 Subject: [PATCH 2157/2177] liquidio: Missing error code in liquidio_init_nic_module() We accidentally return success if lio_vf_rep_modinit() fails instead of propogating the error code. Fixes: e20f469660ad ("liquidio: synchronize VF representor names with NIC firmware") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index f05045a69dcc..6aa0eee88ea5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -4038,7 +4038,8 @@ static int liquidio_init_nic_module(struct octeon_device *oct) */ if (!oct->octeon_id && oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) { - if (lio_vf_rep_modinit()) { + retval = lio_vf_rep_modinit(); + if (retval) { liquidio_stop_nic_module(oct); goto octnet_init_failure; } From 8a860c2bcc84a8e4fbcabb928cd97e4c51b17d93 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Nov 2017 06:20:16 +0000 Subject: [PATCH 2158/2177] openvswitch: Fix return value check in ovs_meter_cmd_features() In case of error, the function ovs_meter_cmd_reply_start() returns ERR_PTR() not NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Signed-off-by: Wei Yongjun Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 2a5ba356c472..2e58b6c4c65f 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -166,7 +166,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, &ovs_reply_header); - if (!reply) + if (IS_ERR(reply)) return PTR_ERR(reply); if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || From 06c2351fdebb38803f10ace19ed8daf9b9c91e12 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Nov 2017 06:27:03 +0000 Subject: [PATCH 2159/2177] openvswitch: Make local function ovs_nsh_key_attr_size() static Fixes the following sparse warnings: net/openvswitch/flow_netlink.c:340:8: warning: symbol 'ovs_nsh_key_attr_size' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index bb4dae198c78..dc424798ba6f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -338,7 +338,7 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ } -size_t ovs_nsh_key_attr_size(void) +static size_t ovs_nsh_key_attr_size(void) { /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider * updating this function. From 6dc14dc40a1d1dafd8491c349b5f3e15aabc4edb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Nov 2017 06:27:12 +0000 Subject: [PATCH 2160/2177] openvswitch: Using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Wei Yongjun Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 2e58b6c4c65f..52ddd6c408b3 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -42,19 +42,12 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, }; -static void rcu_free_ovs_meter_callback(struct rcu_head *rcu) -{ - struct dp_meter *meter = container_of(rcu, struct dp_meter, rcu); - - kfree(meter); -} - static void ovs_meter_free(struct dp_meter *meter) { if (!meter) return; - call_rcu(&meter->rcu, rcu_free_ovs_meter_callback); + kfree_rcu(meter, rcu); } static struct hlist_head *meter_hash_bucket(const struct datapath *dp, From c92eb77aff6a11c79059e2caffdd3baede218a9e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 13 Nov 2017 23:21:36 -0800 Subject: [PATCH 2161/2177] net-sysfs: trigger netlink notification on ifalias change via sysfs This patch adds netlink notifications on iflias changes via sysfs. makes it consistent with the netlink path which also calls netdev_state_change. Also makes it consistent with other sysfs netdev_store operations. Signed-off-by: Roopa Prabhu Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 51d5836d8fb9..799b75268291 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -382,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); struct net *net = dev_net(netdev); size_t count = len; - ssize_t ret; + ssize_t ret = 0; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -391,9 +391,20 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - ret = dev_set_alias(netdev, buf, count); + if (!rtnl_trylock()) + return restart_syscall(); - return ret < 0 ? ret : len; + if (dev_isalive(netdev)) { + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); + } +err: + rtnl_unlock(); + + return ret; } static ssize_t ifalias_show(struct device *dev, From bde533f2ea607cbbbe76ef8738b36243939a7bc2 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 14 Nov 2017 13:42:38 +0530 Subject: [PATCH 2162/2177] atm: horizon: Fix irq release error atm_dev_register() can fail here and passed parameters to free irq which is not initialised. Initialization of 'dev->irq' happened after the 'goto out_free_irq'. So using 'irq' insted of 'dev->irq' in free_irq(). Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/atm/horizon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 7e76b35f422c..e121b8485731 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2803,7 +2803,7 @@ out: return err; out_free_irq: - free_irq(dev->irq, dev); + free_irq(irq, dev); out_free: kfree(dev); out_release: From 4497478c60c04d2bf37082e27fc98f4f835db96b Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 14 Nov 2017 11:15:54 +0100 Subject: [PATCH 2163/2177] net: stmmac: fix LPI transitioning for dwmac4 The LPI transitioning logic in stmmac_main uses priv->tx_path_in_lpi_mode to enter/exit LPI. However, priv->tx_path_in_lpi_mode is assigned using the return value from host_irq_status(). So for dwmac4, priv->tx_path_in_lpi_mode was always false, so stmmac_tx_clean() would always try to put us in eee mode, and stmmac_xmit() would never take us out of eee mode. To fix this, make host_irq_status() read and return the LPI irq status also for dwmac4. This also increments the existing LPI counters, so that ethtool --statistics shows LPI transitions also for dwmac4. For dwmac1000, irqs are enabled/disabled using the register named "Interrupt Mask Register", and thus setting a bit disables that specific irq. For dwmac4 the matching register is named "MAC_Interrupt_Enable", and thus setting a bit enables that specific irq. Looking at dwmac1000_core.c, the irqs that are always enabled are: LPI and PMT. Looking at dwmac4_core.c, the irqs that are always enabled are: PMT. To be able to read the LPI irq status, we need to enable the LPI irq also for dwmac4. Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 7 ++++++- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index aeda3ab2d761..789dad8a07b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -98,7 +98,7 @@ #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ GMAC_INT_PCS_ANE) -#define GMAC_INT_DEFAULT_MASK GMAC_INT_PMT_EN +#define GMAC_INT_DEFAULT_MASK (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN) enum dwmac4_irq_status { time_stamp_irq = 0x00001000, @@ -106,6 +106,7 @@ enum dwmac4_irq_status { mmc_tx_irq = 0x00000400, mmc_rx_irq = 0x00000200, mmc_irq = 0x00000100, + lpi_irq = 0x00000020, pmt_irq = 0x00000010, }; @@ -132,6 +133,10 @@ enum power_event { #define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ #define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ #define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ +#define GMAC4_LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ +#define GMAC4_LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ +#define GMAC4_LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ +#define GMAC4_LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ /* MAC Debug bitmap */ #define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 2f7d7ec59962..f3ed8f7853eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -580,6 +580,25 @@ static int dwmac4_irq_status(struct mac_device_info *hw, x->irq_receive_pmt_irq_n++; } + /* MAC tx/rx EEE LPI entry/exit interrupts */ + if (intr_status & lpi_irq) { + /* Clear LPI interrupt by reading MAC_LPI_Control_Status */ + u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); + + if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) { + ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; + x->irq_tx_path_in_lpi_mode_n++; + } + if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) { + ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; + x->irq_tx_path_exit_lpi_mode_n++; + } + if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN) + x->irq_rx_path_in_lpi_mode_n++; + if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX) + x->irq_rx_path_exit_lpi_mode_n++; + } + dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x); if (intr_status & PCS_RGSMIIIS_IRQ) dwmac4_phystatus(ioaddr, x); From 0eef304bc9f7d079a1165e8cd2f24b078e9e1f2a Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Nov 2017 03:37:06 +0300 Subject: [PATCH 2164/2177] uapi: fix linux/rxrpc.h userspace compilation errors Consistently use types provided by to fix the following linux/rxrpc.h userspace compilation errors: /usr/include/linux/rxrpc.h:24:2: error: unknown type name 'u16' u16 srx_service; /* service desired */ /usr/include/linux/rxrpc.h:25:2: error: unknown type name 'u16' u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ /usr/include/linux/rxrpc.h:26:2: error: unknown type name 'u16' u16 transport_len; /* length of transport address */ Use __kernel_sa_family_t instead of sa_family_t the same way as uapi/linux/in.h does, to fix the following linux/rxrpc.h userspace compilation errors: /usr/include/linux/rxrpc.h:23:2: error: unknown type name 'sa_family_t' sa_family_t srx_family; /* address family */ /usr/include/linux/rxrpc.h:28:3: error: unknown type name 'sa_family_t' sa_family_t family; /* transport address family */ Fixes: 727f8914477e ("rxrpc: Expose UAPI definitions to userspace") Cc: # v4.14 Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/rxrpc.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 9656aad8f8f7..9d4afea308a4 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -20,12 +20,12 @@ * RxRPC socket address */ struct sockaddr_rxrpc { - sa_family_t srx_family; /* address family */ - u16 srx_service; /* service desired */ - u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ - u16 transport_len; /* length of transport address */ + __kernel_sa_family_t srx_family; /* address family */ + __u16 srx_service; /* service desired */ + __u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ + __u16 transport_len; /* length of transport address */ union { - sa_family_t family; /* transport address family */ + __kernel_sa_family_t family; /* transport address family */ struct sockaddr_in sin; /* IPv4 transport address */ struct sockaddr_in6 sin6; /* IPv6 transport address */ } transport; From feb8892cb441c742d4220cf7ced001e7fa070731 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 13 Nov 2017 11:45:34 +0800 Subject: [PATCH 2165/2177] vhost_net: conditionally enable tx polling We always poll tx for socket, this is sub optimal since this will slightly increase the waitqueue traversing time and more important, vhost could not benefit from commit 9e641bdcfa4e ("net-tun: restructure tun_do_read for better sleep/wakeup efficiency") even if we've stopped rx polling during handle_rx(), tx poll were still left in the waitqueue. Pktgen from a remote host to VM over mlx4 on two 2.00GHz Xeon E5-2650 shows 11.7% improvements on rx PPS. (from 1.28Mpps to 1.44Mpps) Cc: Wei Xu Cc: Matthew Rosato Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 68677d930e20..8d626d7c2e7e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -471,6 +471,7 @@ static void handle_tx(struct vhost_net *net) goto out; vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; @@ -556,6 +557,7 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); + vhost_net_enable_vq(net, vq); break; } if (err != len) From bb1b40c7cb863f0800a6410c7dcb86cf3f28d3b1 Mon Sep 17 00:00:00 2001 From: Alexander Kappner Date: Mon, 13 Nov 2017 17:44:20 -0800 Subject: [PATCH 2166/2177] usbnet: ipheth: prevent TX queue timeouts when device not ready iOS devices require the host to be "trusted" before servicing network packets. Establishing trust requires the user to confirm a dialog on the iOS device.Until trust is established, the iOS device will silently discard network packets from the host. Currently, the ipheth driver does not detect whether an iOS device has established trust with the host, and immediately sets up the transmit queues. This causes the following problems: - Kernel taint due to WARN() in netdev watchdog. - Dmesg spam ("TX timeout"). - Disruption of user space networking activity (dhcpd, etc...) when new interface comes up but cannot be used. - Unnecessary host and device wakeups and USB traffic Example dmesg output: [ 1101.319778] NETDEV WATCHDOG: eth1 (ipheth): transmit queue 0 timed out [ 1101.319817] ------------[ cut here ]------------ [ 1101.319828] WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:316 dev_watchdog+0x20f/0x220 [ 1101.319831] Modules linked in: ipheth usbmon nvidia_drm(PO) nvidia_modeset(PO) nvidia(PO) iwlmvm mac80211 iwlwifi btusb btrtl btbcm btintel qmi_wwan bluetooth cfg80211 ecdh_generic thinkpad_acpi rfkill [last unloaded: ipheth] [ 1101.319861] CPU: 0 PID: 0 Comm: swapper/0 Tainted: P O 4.13.12.1 #1 [ 1101.319864] Hardware name: LENOVO 20ENCTO1WW/20ENCTO1WW, BIOS N1EET62W (1.35 ) 11/10/2016 [ 1101.319867] task: ffffffff81e11500 task.stack: ffffffff81e00000 [ 1101.319873] RIP: 0010:dev_watchdog+0x20f/0x220 [ 1101.319876] RSP: 0018:ffff8810a3c03e98 EFLAGS: 00010292 [ 1101.319880] RAX: 000000000000003a RBX: 0000000000000000 RCX: 0000000000000000 [ 1101.319883] RDX: ffff8810a3c15c48 RSI: ffffffff81ccbfc2 RDI: 00000000ffffffff [ 1101.319886] RBP: ffff880c04ebc41c R08: 0000000000000000 R09: 0000000000000379 [ 1101.319889] R10: 00000100696589d0 R11: 0000000000000378 R12: ffff880c04ebc000 [ 1101.319892] R13: 0000000000000000 R14: 0000000000000001 R15: ffff880c2865fc80 [ 1101.319896] FS: 0000000000000000(0000) GS:ffff8810a3c00000(0000) knlGS:0000000000000000 [ 1101.319899] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1101.319902] CR2: 00007f3ff24ac000 CR3: 0000000001e0a000 CR4: 00000000003406f0 [ 1101.319905] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1101.319908] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1101.319910] Call Trace: [ 1101.319914] [ 1101.319921] ? dev_graft_qdisc+0x70/0x70 [ 1101.319928] ? dev_graft_qdisc+0x70/0x70 [ 1101.319934] ? call_timer_fn+0x2e/0x170 [ 1101.319939] ? dev_graft_qdisc+0x70/0x70 [ 1101.319944] ? run_timer_softirq+0x1ea/0x440 [ 1101.319951] ? timerqueue_add+0x54/0x80 [ 1101.319956] ? enqueue_hrtimer+0x38/0xa0 [ 1101.319963] ? __do_softirq+0xed/0x2e7 [ 1101.319970] ? irq_exit+0xb4/0xc0 [ 1101.319976] ? smp_apic_timer_interrupt+0x39/0x50 [ 1101.319981] ? apic_timer_interrupt+0x8c/0xa0 [ 1101.319983] [ 1101.319992] ? cpuidle_enter_state+0xfa/0x2a0 [ 1101.319999] ? do_idle+0x1a3/0x1f0 [ 1101.320004] ? cpu_startup_entry+0x5f/0x70 [ 1101.320011] ? start_kernel+0x444/0x44c [ 1101.320017] ? early_idt_handler_array+0x120/0x120 [ 1101.320023] ? x86_64_start_kernel+0x145/0x154 [ 1101.320028] ? secondary_startup_64+0x9f/0x9f [ 1101.320033] Code: 20 04 00 00 eb 9f 4c 89 e7 c6 05 59 44 71 00 01 e8 a7 df fd ff 89 d9 4c 89 e6 48 c7 c7 70 b7 cd 81 48 89 c2 31 c0 e8 97 64 90 ff <0f> ff eb bf 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 [ 1101.320103] ---[ end trace 0cc4d251e2b57080 ]--- [ 1101.320110] ipheth 1-5:4.2: ipheth_tx_timeout: TX timeout The last message "TX timeout" is repeated every 5 seconds until trust is established or the device is disconnected, filling up dmesg. The proposed patch eliminates the problem by, upon connection, keeping the TX queue and carrier disabled until a packet is first received from the iOS device. This is reflected by the confirmed_pairing variable in the device structure. Only after at least one packet has been received from the iOS device, the transmit queue and carrier are brought up during the periodic device poll in ipheth_carrier_set. Because the iOS device will always send a packet immediately upon trust being established, this should not delay the interface becoming useable. To prevent failed UBRs in ipheth_rcvbulk_callback from perpetually re-enabling the queue if it was disabled, a new check is added so only successful transfers re-enable the queue, whereas failed transfers only trigger an immediate poll. This has the added benefit of removing the periodic control requests to the iOS device until trust has been established and thus should reduce wakeup events on both the host and the iOS device. Signed-off-by: Alexander Kappner Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index d49c7103085e..ca71f6c03859 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -149,6 +149,7 @@ struct ipheth_device { u8 bulk_in; u8 bulk_out; struct delayed_work carrier_work; + bool confirmed_pairing; }; static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags); @@ -259,7 +260,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb) dev->net->stats.rx_packets++; dev->net->stats.rx_bytes += len; - + dev->confirmed_pairing = true; netif_rx(skb); ipheth_rx_submit(dev, GFP_ATOMIC); } @@ -281,14 +282,21 @@ static void ipheth_sndbulk_callback(struct urb *urb) __func__, status); dev_kfree_skb_irq(dev->tx_skb); - netif_wake_queue(dev->net); + if (status == 0) + netif_wake_queue(dev->net); + else + // on URB error, trigger immediate poll + schedule_delayed_work(&dev->carrier_work, 0); } static int ipheth_carrier_set(struct ipheth_device *dev) { struct usb_device *udev = dev->udev; int retval; - + if (!dev) + return 0; + if (!dev->confirmed_pairing) + return 0; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, IPHETH_CTRL_ENDP), IPHETH_CMD_CARRIER_CHECK, /* request */ @@ -303,11 +311,14 @@ static int ipheth_carrier_set(struct ipheth_device *dev) return retval; } - if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) + if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) { netif_carrier_on(dev->net); - else + if (dev->tx_urb->status != -EINPROGRESS) + netif_wake_queue(dev->net); + } else { netif_carrier_off(dev->net); - + netif_stop_queue(dev->net); + } return 0; } @@ -387,7 +398,6 @@ static int ipheth_open(struct net_device *net) return retval; schedule_delayed_work(&dev->carrier_work, IPHETH_CARRIER_CHECK_TIMEOUT); - netif_start_queue(net); return retval; } @@ -491,7 +501,7 @@ static int ipheth_probe(struct usb_interface *intf, dev->udev = udev; dev->net = netdev; dev->intf = intf; - + dev->confirmed_pairing = false; /* Set up endpoints */ hintf = usb_altnum_to_altsetting(intf, IPHETH_ALT_INTFNUM); if (hintf == NULL) { @@ -542,7 +552,9 @@ static int ipheth_probe(struct usb_interface *intf, retval = -EIO; goto err_register_netdev; } - + // carrier down and transmit queues stopped until packet from device + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); dev_info(&intf->dev, "Apple iPhone USB Ethernet device attached\n"); return 0; From b9f3eb499d84f8d4adcb2f9212ec655700b28228 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 14 Nov 2017 06:30:11 +0300 Subject: [PATCH 2167/2177] uapi: fix linux/tls.h userspace compilation error Move inclusion of a private kernel header from uapi/linux/tls.h to its only user - net/tls.h, to fix the following linux/tls.h userspace compilation error: /usr/include/linux/tls.h:41:21: fatal error: net/tcp.h: No such file or directory As to this point uapi/linux/tls.h was totaly unusuable for userspace, cleanup this header file further by moving other redundant includes to net/tls.h. Fixes: 3c4d7559159b ("tls: kernel TLS support") Cc: # v4.13+ Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/net/tls.h | 4 ++++ include/uapi/linux/tls.h | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 70becd0a9299..936cfc5cab7d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -35,6 +35,10 @@ #define _TLS_OFFLOAD_H #include +#include +#include +#include +#include #include diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index d5e0682ab837..293b2cdad88d 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -35,10 +35,6 @@ #define _UAPI_LINUX_TLS_H #include -#include -#include -#include -#include /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ From 094009531612246d9e13f9e0c3ae2205d7f63a0a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 14 Nov 2017 14:21:32 +0100 Subject: [PATCH 2168/2177] ipv6: set all.accept_dad to 0 by default With commits 35e015e1f577 and a2d3f3e33853, the global 'accept_dad' flag is also taken into account (default value is 1). If either global or per-interface flag is non-zero, DAD will be enabled on a given interface. This is not backward compatible: before those patches, the user could disable DAD just by setting the per-interface flag to 0. Now, the user instead needs to set both flags to 0 to actually disable DAD. Restore the previous behaviour by setting the default for the global 'accept_dad' flag to 0. This way, DAD is still enabled by default, as per-interface flags are set to 1 on device creation, but setting them to 0 is enough to disable DAD on a given interface. - Before 35e015e1f57a7 and a2d3f3e33853: global per-interface DAD enabled [default] 1 1 yes X 0 no X 1 yes - After 35e015e1f577 and a2d3f3e33853: global per-interface DAD enabled [default] 1 1 yes 0 0 no 0 1 yes 1 0 yes - After this fix: global per-interface DAD enabled 1 1 yes 0 0 no [default] 0 1 yes 1 0 yes Fixes: 35e015e1f577 ("ipv6: fix net.ipv6.conf.all interface DAD handlers") Fixes: a2d3f3e33853 ("ipv6: fix net.ipv6.conf.all.accept_dad behaviour for real") CC: Stefano Brivio CC: Matteo Croce CC: Erik Kline Signed-off-by: Nicolas Dichtel Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a6dffd65eb9d..a0ae1c9d37df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, - .accept_dad = 1, + .accept_dad = 0, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, .stable_secret = { From 11bf284f81b46f59d5f4a4522c13aa7852cfd560 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 14 Nov 2017 16:51:56 +0300 Subject: [PATCH 2169/2177] net: Protect iterations over net::fib_notifier_ops in fib_seq_sum() There is at least unlocked deletion of net->ipv4.fib_notifier_ops from net::fib_notifier_ops: ip_fib_net_exit() rtnl_unlock() fib4_notifier_exit() fib_notifier_ops_unregister(net->ipv4.notifier_ops) list_del_rcu(&ops->list) So fib_seq_sum() can't use rtnl_lock() only for protection. The possible solution could be to use rtnl_lock() in fib_notifier_ops_unregister(), but this adds a possible delay during net namespace creation, so we better use rcu_read_lock() till someone really needs the mutex (if that happens). Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/core/fib_notifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 6b8cd494574f..0c048bdeb016 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -34,12 +34,14 @@ static unsigned int fib_seq_sum(void) rtnl_lock(); for_each_net(net) { - list_for_each_entry(ops, &net->fib_notifier_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { if (!try_module_get(ops->owner)) continue; fib_seq += ops->fib_seq_read(net); module_put(ops->owner); } + rcu_read_unlock(); } rtnl_unlock(); From 6670e152447732ba90626f36dfc015a13fbf150e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Nov 2017 08:25:49 -0800 Subject: [PATCH 2170/2177] tcp: Namespace-ify sysctl_tcp_default_congestion_control Make default TCP default congestion control to a per namespace value. This changes default congestion control to a pointer to congestion ops (rather than implicit as first element of available lsit). The congestion control setting of new namespaces is inherited from the current setting of the root namespace. Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 6 +-- net/ipv4/fib_semantics.c | 4 +- net/ipv4/sysctl_net_ipv4.c | 19 ++++++---- net/ipv4/tcp_cong.c | 76 ++++++++++++++++++-------------------- net/ipv4/tcp_ipv4.c | 9 +++++ net/ipv6/route.c | 3 +- 7 files changed, 64 insertions(+), 54 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 5e12975fc658..44668c29701a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -160,6 +160,7 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; + const struct tcp_congestion_ops __rcu *tcp_congestion_control; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; unsigned int sysctl_tcp_fastopen_blackhole_timeout; diff --git a/include/net/tcp.h b/include/net/tcp.h index ed71511e67a6..35cc7d0d3d47 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1002,8 +1002,8 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); void tcp_cleanup_congestion_control(struct sock *sk); -int tcp_set_default_congestion_control(const char *name); -void tcp_get_default_congestion_control(char *name); +int tcp_set_default_congestion_control(struct net *net, const char *name); +void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); @@ -1017,7 +1017,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 589caaa90613..f04d944f8abe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -710,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) bool ecn_ca = false; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); } else { val = nla_get_u32(nla); } @@ -1030,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) return -EINVAL; } else { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef0ff3357a44..93e172118a94 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -201,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(ctl->data, struct net, + ipv4.tcp_congestion_control); char val[TCP_CA_NAME_MAX]; struct ctl_table tbl = { .data = val, @@ -208,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, }; int ret; - tcp_get_default_congestion_control(val); + tcp_get_default_congestion_control(net, val); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) - ret = tcp_set_default_congestion_control(val); + ret = tcp_set_default_congestion_control(net, val); return ret; } @@ -447,12 +449,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_congestion_control", - .mode = 0644, - .maxlen = TCP_CA_NAME_MAX, - .proc_handler = proc_tcp_congestion_control, - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -763,6 +759,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &one }, #endif + { + .procname = "tcp_congestion_control", + .data = &init_net.ipv4.tcp_congestion_control, + .mode = 0644, + .maxlen = TCP_CA_NAME_MAX, + .proc_handler = proc_tcp_congestion_control, + }, { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2f26124fd160..bc6c02f16243 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) } /* Must be called with rcu lock held */ -static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, + const char *name) { - const struct tcp_congestion_ops *ca = tcp_ca_find(name); + struct tcp_congestion_ops *ca = tcp_ca_find(name); + #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); @@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(net, name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { + struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; rcu_read_lock(); - list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (likely(try_module_get(ca->owner))) { - icsk->icsk_ca_ops = ca; - goto out; - } - /* Fallback to next available. The last really - * guaranteed fallback is Reno from this list. - */ - } -out: + ca = rcu_dereference(net->ipv4.tcp_congestion_control); + if (unlikely(!try_module_get(ca->owner))) + ca = &tcp_reno; + icsk->icsk_ca_ops = ca; rcu_read_unlock(); - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else @@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk) } /* Used by sysctl to change default congestion control */ -int tcp_set_default_congestion_control(const char *name) +int tcp_set_default_congestion_control(struct net *net, const char *name) { struct tcp_congestion_ops *ca; - int ret = -ENOENT; + const struct tcp_congestion_ops *prev; + int ret; - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); -#ifdef CONFIG_MODULES - if (!ca && capable(CAP_NET_ADMIN)) { - spin_unlock(&tcp_cong_list_lock); + rcu_read_lock(); + ca = tcp_ca_find_autoload(net, name); + if (!ca) { + ret = -ENOENT; + } else if (!try_module_get(ca->owner)) { + ret = -EBUSY; + } else { + prev = xchg(&net->ipv4.tcp_congestion_control, ca); + if (prev) + module_put(prev->owner); - request_module("tcp_%s", name); - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); - } -#endif - - if (ca) { - ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ - list_move(&ca->list, &tcp_cong_list); + ca->flags |= TCP_CONG_NON_RESTRICTED; ret = 0; } - spin_unlock(&tcp_cong_list_lock); + rcu_read_unlock(); return ret; } @@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name) /* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { - return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); + return tcp_set_default_congestion_control(&init_net, + CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); @@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) } /* Get current default congestion control */ -void tcp_get_default_congestion_control(char *name) +void tcp_get_default_congestion_control(struct net *net, char *name) { - struct tcp_congestion_ops *ca; - /* We will always have reno... */ - BUG_ON(list_empty(&tcp_cong_list)); + const struct tcp_congestion_ops *ca; rcu_read_lock(); - ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); + ca = rcu_dereference(net->ipv4.tcp_congestion_control); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } @@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo if (!load) ca = tcp_ca_find(name); else - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(sock_net(sk), name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } + if (!ca) { err = -ENOENT; } else if (!load) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1eac84b8044e..c6bc0c4d19c6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2430,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net) { int cpu; + module_put(net->ipv4.tcp_congestion_control->owner); + for_each_possible_cpu(cpu) inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); free_percpu(net->ipv4.tcp_sk); @@ -2522,6 +2524,13 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; atomic_set(&net->ipv4.tfo_active_disable_times, 0); + /* Reno is always built in */ + if (!net_eq(net, &init_net) && + try_module_get(init_net.ipv4.tcp_congestion_control->owner)) + net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control; + else + net->ipv4.tcp_congestion_control = &tcp_reno; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 70d9659fc1e9..05eb7bc36156 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2378,6 +2378,7 @@ out: static int ip6_convert_metrics(struct mx6_config *mxc, const struct fib6_config *cfg) { + struct net *net = cfg->fc_nlinfo.nl_net; bool ecn_ca = false; struct nlattr *nla; int remaining; @@ -2403,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) goto err; } else { From bce552fd6f6e37f9567c85c4f0d6d1987eef379f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Nov 2017 11:27:01 -0800 Subject: [PATCH 2171/2177] netem: use 64 bit divide by rate Since times are now expressed in nanosecond, need to now do true 64 bit divide. Old code would truncate rate at 32 bits. Rename function to better express current usage. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b686e755fda9..644323d6081c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -339,10 +339,8 @@ static s64 tabledist(s64 mu, s64 sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -static u64 packet_len_2_sched_time(unsigned int len, - struct netem_sched_data *q) +static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) { - u64 offset; len += q->packet_overhead; if (q->cell_size) { @@ -352,9 +350,8 @@ static u64 packet_len_2_sched_time(unsigned int len, cells++; len = cells * (q->cell_size + q->cell_overhead); } - offset = (u64)len * NSEC_PER_SEC; - do_div(offset, q->rate); - return offset; + + return div64_u64(len * NSEC_PER_SEC, q->rate); } static void tfifo_reset(struct Qdisc *sch) @@ -556,7 +553,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, now = last->time_to_send; } - delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); + delay += packet_time_ns(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; From 9b0ed89172efec1d9f214d173ad6046f10f6b742 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Nov 2017 11:27:02 -0800 Subject: [PATCH 2172/2177] netem: remove unnecessary 64 bit modulus Fix compilation on 32 bit platforms (where doing modulus operation with 64 bit requires extra glibc functions) by truncation. The jitter for table distribution is limited to a 32 bit value because random numbers are scaled as 32 bit value. Also fix some whitespace. Fixes: 99803171ef04 ("netem: add uapi to express delay and jitter in nanoseconds") Reported-by: Randy Dunlap Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 644323d6081c..dd70924cbcdf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -312,9 +312,9 @@ static bool loss_event(struct netem_sched_data *q) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static s64 tabledist(s64 mu, s64 sigma, +static s64 tabledist(s64 mu, s32 sigma, struct crndstate *state, - const struct disttable *dist) + const struct disttable *dist) { s64 x; long t; @@ -327,7 +327,7 @@ static s64 tabledist(s64 mu, s64 sigma, /* default uniform distribution */ if (dist == NULL) - return (rnd % (2*sigma)) - sigma + mu; + return (rnd % (2 * sigma)) - sigma + mu; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; From b74912a2fdae9aadd20da502644aa8848c861954 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 14 Nov 2017 14:26:16 -0600 Subject: [PATCH 2173/2177] openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start It seems that the intention of the code is to null check the value returned by function genlmsg_put. But the current code is null checking the address of the pointer that holds the value returned by genlmsg_put. Fix this by properly null checking the value returned by function genlmsg_put in order to avoid a pontential null pointer dereference. Addresses-Coverity-ID: 1461561 ("Dereference before null check") Addresses-Coverity-ID: 1461562 ("Dereference null return value") Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 52ddd6c408b3..3fbfc78991ac 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -99,7 +99,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, *ovs_reply_header = genlmsg_put(skb, info->snd_portid, info->snd_seq, &dp_meter_genl_family, 0, cmd); - if (!ovs_reply_header) { + if (!*ovs_reply_header) { nlmsg_free(skb); return ERR_PTR(-EMSGSIZE); } From 6314dab4b8fb8493d810e175cb340376052c69b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 15 Nov 2017 09:35:02 +0100 Subject: [PATCH 2174/2177] net: cdc_ncm: GetNtbFormat endian fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GetNtbFormat and SetNtbFormat requests operate on 16 bit little endian values. We get away with ignoring this most of the time, because we only care about USB_CDC_NCM_NTB16_FORMAT which is 0x0000. This fails for USB_CDC_NCM_NTB32_FORMAT. Fix comparison between LE value from device and constant by converting the constant to LE. Reported-by: Ben Hutchings Fixes: 2b02c20ce0c2 ("cdc_ncm: Set NTB format again after altsetting switch for Huawei devices") Cc: Enrico Mioso Cc: Christian Panton Signed-off-by: Bjørn Mork Acked-By: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 47cab1bde065..9e1b74590682 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -771,7 +771,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ int err; u8 iface_no; struct usb_cdc_parsed_header hdr; - u16 curr_ntb_format; + __le16 curr_ntb_format; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -889,7 +889,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ goto error2; } - if (curr_ntb_format == USB_CDC_NCM_NTB32_FORMAT) { + if (curr_ntb_format == cpu_to_le16(USB_CDC_NCM_NTB32_FORMAT)) { dev_info(&intf->dev, "resetting NTB format to 16-bit"); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS | USB_DIR_OUT From 89ad2fa3f043a1e8daae193bcb5fe34d5f8caf28 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Nov 2017 17:15:50 -0800 Subject: [PATCH 2175/2177] bpf: fix lockdep splat pcpu_freelist_pop() needs the same lockdep awareness than pcpu_freelist_populate() to avoid a false positive. [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] switchto-defaul/12508 [HC0[0]:SC0[6]:HE0:SE0] is trying to acquire: (&htab->buckets[i].lock){......}, at: [] __htab_percpu_map_update_elem+0x1cb/0x300 and this task is already holding: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...}, at: [] __dev_queue_xmit+0 x868/0x1240 which would create a new lock dependency: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} -> (&htab->buckets[i].lock){......} but this new dependency connects a SOFTIRQ-irq-safe lock: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} ... which became SOFTIRQ-irq-safe at: [] __lock_acquire+0x42b/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] __dev_queue_xmit+0x868/0x1240 [] dev_queue_xmit+0x10/0x20 [] ip_finish_output2+0x439/0x590 [] ip_finish_output+0x150/0x2f0 [] ip_output+0x7d/0x260 [] ip_local_out+0x5e/0xe0 [] ip_queue_xmit+0x205/0x620 [] tcp_transmit_skb+0x5a8/0xcb0 [] tcp_write_xmit+0x242/0x1070 [] __tcp_push_pending_frames+0x3c/0xf0 [] tcp_rcv_established+0x312/0x700 [] tcp_v4_do_rcv+0x11c/0x200 [] tcp_v4_rcv+0xaa2/0xc30 [] ip_local_deliver_finish+0xa7/0x240 [] ip_local_deliver+0x66/0x200 [] ip_rcv_finish+0xdd/0x560 [] ip_rcv+0x295/0x510 [] __netif_receive_skb_core+0x988/0x1020 [] __netif_receive_skb+0x21/0x70 [] process_backlog+0x6f/0x230 [] net_rx_action+0x229/0x420 [] __do_softirq+0xd8/0x43d [] do_softirq_own_stack+0x1c/0x30 [] do_softirq+0x55/0x60 [] __local_bh_enable_ip+0xa8/0xb0 [] cpu_startup_entry+0x1c7/0x500 [] start_secondary+0x113/0x140 to a SOFTIRQ-irq-unsafe lock: (&head->lock){+.+...} ... which became SOFTIRQ-irq-unsafe at: ... [] __lock_acquire+0x82f/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] pcpu_freelist_pop+0x7a/0xb0 [] htab_map_alloc+0x50c/0x5f0 [] SyS_bpf+0x265/0x1200 [] entry_SYSCALL_64_fastpath+0x12/0x17 other info that might help us debug this: Chain exists of: dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2 --> &htab->buckets[i].lock --> &head->lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&head->lock); local_irq_disable(); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); lock(&htab->buckets[i].lock); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); *** DEADLOCK *** Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- kernel/bpf/percpu_freelist.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 5c51d1985b51..673fa6fe2d73 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -78,8 +78,10 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; + unsigned long flags; int orig_cpu, cpu; + local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -87,14 +89,16 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock(&head->lock); + raw_spin_unlock_irqrestore(&head->lock, flags); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) + if (cpu == orig_cpu) { + local_irq_restore(flags); return NULL; + } } } From fd7eafd02121d6ef501ef1a4a891e6061366c952 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 15 Nov 2017 09:43:09 +0800 Subject: [PATCH 2176/2177] geneve: fix fill_info when link down geneve->sock4/6 were added with geneve_open and released with geneve_stop. So when geneve link down, we will not able to show remote address and checksum info after commit 11387fe4a98 ("geneve: fix fill_info when using collect_metadata"). Fix this by avoid passing *_REMOTE{,6} for COLLECT_METADATA since they are mutually exclusive, and always show UDP_ZERO_CSUM6_RX info. Fixes: 11387fe4a98 ("geneve: fix fill_info when using collect_metadata") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/geneve.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 688906aad19c..4e16d839c311 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1490,6 +1490,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = &geneve->info; + bool metadata = geneve->collect_md; __u8 tmp_vni[3]; __u32 vni; @@ -1498,32 +1499,24 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; - if (rtnl_dereference(geneve->sock4)) { + if (!metadata && ip_tunnel_info_af(info) == AF_INET) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, !!(info->key.tun_flags & TUNNEL_CSUM))) goto nla_put_failure; - } - #if IS_ENABLED(CONFIG_IPV6) - if (rtnl_dereference(geneve->sock6)) { + } else if (!metadata) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, !(info->key.tun_flags & TUNNEL_CSUM))) goto nla_put_failure; - - if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, - !geneve->use_udp6_rx_checksums)) - goto nla_put_failure; - } #endif + } if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || @@ -1533,10 +1526,13 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) goto nla_put_failure; - if (geneve->collect_md) { - if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) + if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) goto nla_put_failure; - } + + if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + !geneve->use_udp6_rx_checksums)) + goto nla_put_failure; + return 0; nla_put_failure: From 50895b9de1d3e0258e015e8e55128d835d9a9f19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Nov 2017 21:02:19 -0800 Subject: [PATCH 2177/2177] tcp: highest_sack fix syzbot easily found a regression added in our latest patches [1] No longer set tp->highest_sack to the head of the send queue since this is not logical and error prone. Only sack processing should maintain the pointer to an skb from rtx queue. We might in the future only remember the sequence instead of a pointer to skb, since rb-tree should allow a fast lookup. [1] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1706 [inline] BUG: KASAN: use-after-free in tcp_ack+0x42bb/0x4fd0 net/ipv4/tcp_input.c:3537 Read of size 4 at addr ffff8801c154faa8 by task syz-executor4/12860 CPU: 0 PID: 12860 Comm: syz-executor4 Not tainted 4.14.0-next-20171113+ #41 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 tcp_highest_sack_seq include/net/tcp.h:1706 [inline] tcp_ack+0x42bb/0x4fd0 net/ipv4/tcp_input.c:3537 tcp_rcv_established+0x672/0x18a0 net/ipv4/tcp_input.c:5439 tcp_v4_do_rcv+0x2ab/0x7d0 net/ipv4/tcp_ipv4.c:1468 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x124/0x360 net/core/sock.c:2264 release_sock+0xa4/0x2a0 net/core/sock.c:2778 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1462 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2048 __sys_sendmsg+0xe5/0x210 net/socket.c:2082 SYSC_sendmsg net/socket.c:2093 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2089 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x452879 RSP: 002b:00007fc9761bfbe8 EFLAGS: 00000212 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000758020 RCX: 0000000000452879 RDX: 0000000000000000 RSI: 0000000020917fc8 RDI: 0000000000000015 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: 00000000006ee3a0 R13: 00000000ffffffff R14: 00007fc9761c06d4 R15: 0000000000000000 Allocated by task 12860: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc_node+0x144/0x760 mm/slab.c:3638 __alloc_skb+0xf1/0x780 net/core/skbuff.c:193 alloc_skb_fclone include/linux/skbuff.h:1023 [inline] sk_stream_alloc_skb+0x11d/0x900 net/ipv4/tcp.c:870 tcp_sendmsg_locked+0x1341/0x3b80 net/ipv4/tcp.c:1299 tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1461 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 SYSC_sendto+0x358/0x5a0 net/socket.c:1749 SyS_sendto+0x40/0x50 net/socket.c:1717 entry_SYSCALL_64_fastpath+0x1f/0x96 Freed by task 12860: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3492 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3750 kfree_skbmem+0xdd/0x1d0 net/core/skbuff.c:603 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_wmem_free_skb include/net/sock.h:1419 [inline] tcp_rtx_queue_unlink_and_free include/net/tcp.h:1682 [inline] tcp_clean_rtx_queue net/ipv4/tcp_input.c:3111 [inline] tcp_ack+0x1b17/0x4fd0 net/ipv4/tcp_input.c:3593 tcp_rcv_established+0x672/0x18a0 net/ipv4/tcp_input.c:5439 tcp_v4_do_rcv+0x2ab/0x7d0 net/ipv4/tcp_ipv4.c:1468 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x124/0x360 net/core/sock.c:2264 release_sock+0xa4/0x2a0 net/core/sock.c:2778 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1462 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2048 __sys_sendmsg+0xe5/0x210 net/socket.c:2082 SYSC_sendmsg net/socket.c:2093 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2089 entry_SYSCALL_64_fastpath+0x1f/0x96 The buggy address belongs to the object at ffff8801c154fa80 which belongs to the cache skbuff_fclone_cache of size 456 The buggy address is located 40 bytes inside of 456-byte region [ffff8801c154fa80, ffff8801c154fc48) The buggy address belongs to the page: page:ffffea00070553c0 count:1 mapcount:0 mapping:ffff8801c154f080 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffff8801c154f080 0000000000000000 0000000100000006 raw: ffffea00070a5a20 ffffea0006a18360 ffff8801d9ca0500 0000000000000000 page dumped because: kasan: bad access detected Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/tcp.h | 17 +++-------------- net/ipv4/tcp_input.c | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 35cc7d0d3d47..85ea578195d4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1630,9 +1630,6 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - - if (tcp_sk(sk)->highest_sack == skb_unlinked) - tcp_sk(sk)->highest_sack = NULL; } static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) @@ -1645,12 +1642,8 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_write_queue.next == skb) { + if (sk->sk_write_queue.next == skb) tcp_chrono_start(sk, TCP_CHRONO_BUSY); - - if (tcp_sk(sk)->highest_sack == NULL) - tcp_sk(sk)->highest_sack = skb; - } } /* Insert new before skb on the write queue of sk. */ @@ -1708,9 +1701,7 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *next = skb_rb_next(skb); - - tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk); + tcp_sk(sk)->highest_sack = skb_rb_next(skb); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) @@ -1720,9 +1711,7 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk) { - struct sk_buff *skb = tcp_rtx_queue_head(sk); - - tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk); + tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk); } /* Called when old skb is about to be deleted and replaced by new skb */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c3447c5512fd..f0b572fe959a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3534,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) icsk->icsk_retransmits = 0; } - prior_fack = tcp_highest_sack_seq(tp); + prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet